voice-router-dev 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +40 -0
- package/dist/constants.d.mts +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/{field-configs-DYiUtRUz.d.mts → field-configs-FbtCPxzs.d.mts} +5730 -5786
- package/dist/{field-configs-DYiUtRUz.d.ts → field-configs-FbtCPxzs.d.ts} +5730 -5786
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +42 -51
- package/dist/field-configs.mjs +42 -51
- package/dist/index.d.mts +840 -1161
- package/dist/index.d.ts +840 -1161
- package/dist/index.js +613 -701
- package/dist/index.mjs +613 -701
- package/dist/{speechToTextChunkResponseModel-CI-Aqxcr.d.ts → speechToTextChunkResponseModel-BY2lGyZ3.d.ts} +319 -1
- package/dist/{speechToTextChunkResponseModel-D8VJ-wz6.d.mts → speechToTextChunkResponseModel-KayxDiZ7.d.mts} +319 -1
- package/dist/webhooks.d.mts +1 -1
- package/dist/webhooks.d.ts +1 -1
- package/package.json +2 -1
package/dist/index.js
CHANGED
|
@@ -82,7 +82,7 @@ __export(src_exports, {
|
|
|
82
82
|
DeepgramTTSSampleRate: () => DeepgramTTSSampleRate,
|
|
83
83
|
DeepgramTopicMode: () => DeepgramTopicMode,
|
|
84
84
|
DeepgramTranscriptionSchema: () => DeepgramTranscriptionSchema,
|
|
85
|
-
DeepgramTypes: () =>
|
|
85
|
+
DeepgramTypes: () => schema_exports5,
|
|
86
86
|
DeepgramZodSchemas: () => deepgramAPI_zod_exports,
|
|
87
87
|
ElevenLabsAdapter: () => ElevenLabsAdapter,
|
|
88
88
|
ElevenLabsCapabilities: () => ElevenLabsCapabilities,
|
|
@@ -119,7 +119,7 @@ __export(src_exports, {
|
|
|
119
119
|
OpenAIResponseFormat: () => OpenAIResponseFormat,
|
|
120
120
|
OpenAIStreamingTypes: () => streaming_types_exports,
|
|
121
121
|
OpenAITranscriptionSchema: () => OpenAITranscriptionSchema,
|
|
122
|
-
OpenAITypes: () =>
|
|
122
|
+
OpenAITypes: () => schema_exports6,
|
|
123
123
|
OpenAIWhisperAdapter: () => OpenAIWhisperAdapter,
|
|
124
124
|
OpenAIZodSchemas: () => openAIAudioRealtimeAPI_zod_exports,
|
|
125
125
|
ProfanityFilterMode: () => ProfanityFilterMode,
|
|
@@ -148,7 +148,7 @@ __export(src_exports, {
|
|
|
148
148
|
SonioxStreamingUpdateSchema: () => SonioxStreamingUpdateSchema,
|
|
149
149
|
SonioxStreamingZodSchemas: () => streaming_types_zod_exports,
|
|
150
150
|
SonioxTranscriptionSchema: () => SonioxTranscriptionSchema,
|
|
151
|
-
SonioxTypes: () =>
|
|
151
|
+
SonioxTypes: () => schema_exports4,
|
|
152
152
|
SpeakV1ContainerParameter: () => SpeakV1ContainerParameter,
|
|
153
153
|
SpeakV1EncodingParameter: () => SpeakV1EncodingParameter,
|
|
154
154
|
SpeakV1SampleRateParameter: () => SpeakV1SampleRateParameter,
|
|
@@ -163,7 +163,7 @@ __export(src_exports, {
|
|
|
163
163
|
SpeechmaticsStreamingSchema: () => SpeechmaticsStreamingSchema,
|
|
164
164
|
SpeechmaticsStreamingUpdateSchema: () => SpeechmaticsStreamingUpdateSchema,
|
|
165
165
|
SpeechmaticsTranscriptionSchema: () => SpeechmaticsTranscriptionSchema,
|
|
166
|
-
SpeechmaticsTypes: () =>
|
|
166
|
+
SpeechmaticsTypes: () => schema_exports7,
|
|
167
167
|
SpeechmaticsZodSchemas: () => speechmaticsASRRESTAPI_zod_exports,
|
|
168
168
|
StreamingProviders: () => StreamingProviders,
|
|
169
169
|
StreamingSupportedBitDepthEnum: () => StreamingSupportedBitDepthEnum,
|
|
@@ -6064,23 +6064,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
6064
6064
|
"AssemblyAI adapter currently only supports URL-based audio input. Use audio.type='url'"
|
|
6065
6065
|
);
|
|
6066
6066
|
}
|
|
6067
|
-
const
|
|
6068
|
-
|
|
6069
|
-
|
|
6070
|
-
|
|
6071
|
-
|
|
6072
|
-
|
|
6067
|
+
const passthrough = options?.assemblyai;
|
|
6068
|
+
let speechModels;
|
|
6069
|
+
if (passthrough?.speech_model != null && !passthrough.speech_models) {
|
|
6070
|
+
speechModels = [passthrough.speech_model];
|
|
6071
|
+
} else if (passthrough?.speech_models) {
|
|
6072
|
+
speechModels = passthrough.speech_models;
|
|
6073
6073
|
}
|
|
6074
|
+
const { speech_model: _deprecated, ...typedOpts } = passthrough ?? {};
|
|
6074
6075
|
const request = {
|
|
6075
|
-
...
|
|
6076
|
+
...typedOpts,
|
|
6076
6077
|
audio_url: audioUrl,
|
|
6077
6078
|
// speech_models is required — default to universal-3-pro
|
|
6078
|
-
speech_models:
|
|
6079
|
-
"universal-3-pro"
|
|
6080
|
-
],
|
|
6079
|
+
speech_models: speechModels ?? ["universal-3-pro"],
|
|
6081
6080
|
// Enable punctuation and formatting by default
|
|
6082
|
-
punctuate:
|
|
6083
|
-
format_text:
|
|
6081
|
+
punctuate: typedOpts.punctuate ?? true,
|
|
6082
|
+
format_text: typedOpts.format_text ?? true
|
|
6084
6083
|
};
|
|
6085
6084
|
if (options) {
|
|
6086
6085
|
if (options.model) {
|
|
@@ -6128,22 +6127,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
6128
6127
|
normalizeResponse(response) {
|
|
6129
6128
|
let status;
|
|
6130
6129
|
switch (response.status) {
|
|
6131
|
-
case
|
|
6130
|
+
case "queued":
|
|
6132
6131
|
status = "queued";
|
|
6133
6132
|
break;
|
|
6134
|
-
case
|
|
6133
|
+
case "processing":
|
|
6135
6134
|
status = "processing";
|
|
6136
6135
|
break;
|
|
6137
|
-
case
|
|
6136
|
+
case "completed":
|
|
6138
6137
|
status = "completed";
|
|
6139
6138
|
break;
|
|
6140
|
-
case
|
|
6139
|
+
case "error":
|
|
6141
6140
|
status = "error";
|
|
6142
6141
|
break;
|
|
6143
6142
|
default:
|
|
6144
6143
|
status = "queued";
|
|
6145
6144
|
}
|
|
6146
|
-
if (response.status ===
|
|
6145
|
+
if (response.status === "error") {
|
|
6147
6146
|
return {
|
|
6148
6147
|
success: false,
|
|
6149
6148
|
provider: this.name,
|
|
@@ -6795,8 +6794,14 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6795
6794
|
/**
|
|
6796
6795
|
* Submit audio for transcription
|
|
6797
6796
|
*
|
|
6798
|
-
* Sends audio to Deepgram API for transcription. Deepgram processes
|
|
6799
|
-
* synchronously and returns results immediately
|
|
6797
|
+
* Sends audio to Deepgram API for transcription. Deepgram normally processes
|
|
6798
|
+
* synchronously and returns results immediately.
|
|
6799
|
+
*
|
|
6800
|
+
* **Callback mode:** When `webhookUrl` is set, Deepgram returns immediately
|
|
6801
|
+
* with a `request_id` (status `"queued"`). The full transcript is POSTed to
|
|
6802
|
+
* the webhook URL — this is the primary delivery mechanism. `getTranscript()`
|
|
6803
|
+
* can attempt to retrieve the result later via request history, but that
|
|
6804
|
+
* endpoint is best-effort and not a guaranteed durable store.
|
|
6800
6805
|
*
|
|
6801
6806
|
* @param audio - Audio input (URL or file buffer)
|
|
6802
6807
|
* @param options - Transcription options
|
|
@@ -6847,47 +6852,81 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6847
6852
|
{ params }
|
|
6848
6853
|
).then((res) => res.data);
|
|
6849
6854
|
} else if (audio.type === "file") {
|
|
6850
|
-
response = await this.client.post(
|
|
6851
|
-
|
|
6852
|
-
|
|
6853
|
-
|
|
6855
|
+
response = await this.client.post(
|
|
6856
|
+
"/listen",
|
|
6857
|
+
audio.file,
|
|
6858
|
+
{
|
|
6859
|
+
params,
|
|
6860
|
+
headers: {
|
|
6861
|
+
"Content-Type": "audio/*"
|
|
6862
|
+
}
|
|
6854
6863
|
}
|
|
6855
|
-
|
|
6864
|
+
).then((res) => res.data);
|
|
6856
6865
|
} else {
|
|
6857
6866
|
throw new Error(
|
|
6858
6867
|
"Deepgram adapter does not support stream type for pre-recorded transcription. Use transcribeStream() for real-time streaming."
|
|
6859
6868
|
);
|
|
6860
6869
|
}
|
|
6870
|
+
if (options?.webhookUrl) {
|
|
6871
|
+
const requestId = ("request_id" in response ? response.request_id : void 0) || ("metadata" in response ? response.metadata?.request_id : void 0);
|
|
6872
|
+
if (!requestId) {
|
|
6873
|
+
return {
|
|
6874
|
+
success: false,
|
|
6875
|
+
provider: this.name,
|
|
6876
|
+
error: {
|
|
6877
|
+
code: "MISSING_REQUEST_ID",
|
|
6878
|
+
message: "Deepgram callback mode did not return a request ID"
|
|
6879
|
+
},
|
|
6880
|
+
raw: response
|
|
6881
|
+
};
|
|
6882
|
+
}
|
|
6883
|
+
return {
|
|
6884
|
+
success: true,
|
|
6885
|
+
provider: this.name,
|
|
6886
|
+
data: {
|
|
6887
|
+
id: requestId,
|
|
6888
|
+
text: "",
|
|
6889
|
+
status: "queued"
|
|
6890
|
+
},
|
|
6891
|
+
tracking: {
|
|
6892
|
+
requestId
|
|
6893
|
+
},
|
|
6894
|
+
raw: response
|
|
6895
|
+
};
|
|
6896
|
+
}
|
|
6897
|
+
if (!("results" in response) || !("metadata" in response)) {
|
|
6898
|
+
return {
|
|
6899
|
+
success: false,
|
|
6900
|
+
provider: this.name,
|
|
6901
|
+
error: {
|
|
6902
|
+
code: "INVALID_RESPONSE",
|
|
6903
|
+
message: "Deepgram did not return a synchronous transcription payload"
|
|
6904
|
+
},
|
|
6905
|
+
raw: response
|
|
6906
|
+
};
|
|
6907
|
+
}
|
|
6861
6908
|
return this.normalizeResponse(response);
|
|
6862
6909
|
} catch (error) {
|
|
6863
6910
|
return this.createErrorResponse(error);
|
|
6864
6911
|
}
|
|
6865
6912
|
}
|
|
6866
6913
|
/**
|
|
6867
|
-
* Get transcription result by ID
|
|
6914
|
+
* Get transcription result by ID (best-effort)
|
|
6868
6915
|
*
|
|
6869
|
-
* Retrieves a previous transcription from Deepgram's request history.
|
|
6870
|
-
*
|
|
6871
|
-
* Unlike the list endpoint, getting a single request DOES include the full
|
|
6872
|
-
* transcript response. Requires `projectId` to be set during initialization.
|
|
6916
|
+
* Retrieves a previous transcription from Deepgram's request history API.
|
|
6917
|
+
* Requires `projectId` to be set during initialization.
|
|
6873
6918
|
*
|
|
6874
|
-
*
|
|
6875
|
-
*
|
|
6919
|
+
* **Important:** Deepgram's request history is best-effort. Requests may
|
|
6920
|
+
* expire or be unavailable depending on your plan and retention settings.
|
|
6921
|
+
* This is NOT a durable transcript store — for reliable retrieval, use
|
|
6922
|
+
* callback mode (`webhookUrl`) and persist the webhook payload yourself.
|
|
6876
6923
|
*
|
|
6877
|
-
*
|
|
6878
|
-
*
|
|
6879
|
-
*
|
|
6880
|
-
* adapter.initialize({
|
|
6881
|
-
* apiKey: process.env.DEEPGRAM_API_KEY,
|
|
6882
|
-
* projectId: process.env.DEEPGRAM_PROJECT_ID
|
|
6883
|
-
* })
|
|
6924
|
+
* The response field on the request history entry is cast to
|
|
6925
|
+
* `ListenV1Response` — this appears to work in practice but is not
|
|
6926
|
+
* explicitly documented by Deepgram as a guaranteed contract.
|
|
6884
6927
|
*
|
|
6885
|
-
*
|
|
6886
|
-
* if
|
|
6887
|
-
* console.log(result.data?.text)
|
|
6888
|
-
* console.log(result.data?.words)
|
|
6889
|
-
* }
|
|
6890
|
-
* ```
|
|
6928
|
+
* @param transcriptId - Request ID from a previous transcription
|
|
6929
|
+
* @returns Transcript response if still available in request history
|
|
6891
6930
|
*
|
|
6892
6931
|
* @see https://developers.deepgram.com/reference/get-request
|
|
6893
6932
|
*/
|
|
@@ -7518,7 +7557,8 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
7518
7557
|
break;
|
|
7519
7558
|
}
|
|
7520
7559
|
case "Metadata": {
|
|
7521
|
-
|
|
7560
|
+
const { type: _, ...metadata } = message;
|
|
7561
|
+
callbacks?.onMetadata?.(metadata);
|
|
7522
7562
|
break;
|
|
7523
7563
|
}
|
|
7524
7564
|
case "Error": {
|
|
@@ -7954,10 +7994,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
7954
7994
|
contentUrls: [audio.url],
|
|
7955
7995
|
properties: this.buildTranscriptionProperties(options)
|
|
7956
7996
|
};
|
|
7957
|
-
const response = await transcriptionsCreate(
|
|
7958
|
-
transcriptionRequest,
|
|
7959
|
-
this.getAxiosConfig()
|
|
7960
|
-
);
|
|
7997
|
+
const response = await transcriptionsCreate(transcriptionRequest, this.getAxiosConfig());
|
|
7961
7998
|
const transcription = response.data;
|
|
7962
7999
|
const transcriptId = transcription.self?.split("/").pop() || "";
|
|
7963
8000
|
return await this.pollForCompletion(transcriptId);
|
|
@@ -8497,7 +8534,6 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
8497
8534
|
const request = {
|
|
8498
8535
|
...options?.openai,
|
|
8499
8536
|
file: audioData,
|
|
8500
|
-
// Buffer/Blob both accepted at runtime; generated type expects Blob
|
|
8501
8537
|
model
|
|
8502
8538
|
};
|
|
8503
8539
|
if (options?.language) {
|
|
@@ -8517,11 +8553,7 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
8517
8553
|
request.response_format = OpenAIResponseFormat.json;
|
|
8518
8554
|
}
|
|
8519
8555
|
const response = await createTranscription(request, this.getAxiosConfig());
|
|
8520
|
-
return this.normalizeResponse(
|
|
8521
|
-
response.data,
|
|
8522
|
-
model,
|
|
8523
|
-
isDiarization
|
|
8524
|
-
);
|
|
8556
|
+
return this.normalizeResponse(response.data, model, isDiarization);
|
|
8525
8557
|
} catch (error) {
|
|
8526
8558
|
return this.createErrorResponse(error);
|
|
8527
8559
|
}
|
|
@@ -8928,7 +8960,6 @@ function createOpenAIWhisperAdapter(config) {
|
|
|
8928
8960
|
|
|
8929
8961
|
// src/adapters/speechmatics-adapter.ts
|
|
8930
8962
|
var import_axios8 = __toESM(require("axios"));
|
|
8931
|
-
var import_ws5 = __toESM(require("ws"));
|
|
8932
8963
|
|
|
8933
8964
|
// src/generated/speechmatics/schema/notificationConfigContentsItem.ts
|
|
8934
8965
|
var NotificationConfigContentsItem = {
|
|
@@ -9113,16 +9144,13 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9113
9144
|
jobConfig.fetch_data = {
|
|
9114
9145
|
url: audio.url
|
|
9115
9146
|
};
|
|
9116
|
-
|
|
9117
|
-
|
|
9118
|
-
requestBody = formData;
|
|
9119
|
-
headers = { "Content-Type": "multipart/form-data" };
|
|
9147
|
+
requestBody = { config: JSON.stringify(jobConfig) };
|
|
9148
|
+
headers = { "Content-Type": "application/json" };
|
|
9120
9149
|
} else if (audio.type === "file") {
|
|
9121
|
-
|
|
9122
|
-
|
|
9123
|
-
|
|
9124
|
-
|
|
9125
|
-
requestBody = formData;
|
|
9150
|
+
requestBody = {
|
|
9151
|
+
config: JSON.stringify(jobConfig),
|
|
9152
|
+
data_file: audio.file
|
|
9153
|
+
};
|
|
9126
9154
|
headers = { "Content-Type": "multipart/form-data" };
|
|
9127
9155
|
} else {
|
|
9128
9156
|
return {
|
|
@@ -9228,216 +9256,224 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9228
9256
|
}
|
|
9229
9257
|
}
|
|
9230
9258
|
/**
|
|
9231
|
-
*
|
|
9259
|
+
* Get the regional WebSocket host for real-time streaming
|
|
9232
9260
|
*
|
|
9233
|
-
*
|
|
9234
|
-
* - Batch: {region}.asr.api.speechmatics.com
|
|
9235
|
-
* - Real-time: {region}.rt.speechmatics.com
|
|
9236
|
-
*
|
|
9237
|
-
* @param region - Regional endpoint identifier
|
|
9238
|
-
* @returns WebSocket URL for real-time API
|
|
9261
|
+
* Speechmatics RT uses a different host pattern: {region}.rt.speechmatics.com
|
|
9239
9262
|
*/
|
|
9240
|
-
|
|
9241
|
-
|
|
9242
|
-
|
|
9243
|
-
}
|
|
9244
|
-
const rtRegionMap = {
|
|
9245
|
-
eu1: "eu",
|
|
9246
|
-
eu2: "eu",
|
|
9247
|
-
us1: "us",
|
|
9248
|
-
us2: "us",
|
|
9249
|
-
au1: "eu"
|
|
9250
|
-
// No AU RT endpoint — fall back to EU
|
|
9251
|
-
};
|
|
9252
|
-
const rtPrefix = rtRegionMap[region || ""] || "eu";
|
|
9253
|
-
return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
|
|
9263
|
+
getRegionalWsHost(region) {
|
|
9264
|
+
const regionPrefix = region || "eu1";
|
|
9265
|
+
return `${regionPrefix}.rt.speechmatics.com`;
|
|
9254
9266
|
}
|
|
9255
9267
|
/**
|
|
9256
|
-
* Stream audio for real-time transcription
|
|
9257
|
-
*
|
|
9258
|
-
* Connects to Speechmatics' real-time API and sends audio chunks
|
|
9259
|
-
* for transcription with results returned via callbacks.
|
|
9268
|
+
* Stream audio for real-time transcription
|
|
9260
9269
|
*
|
|
9261
|
-
*
|
|
9262
|
-
*
|
|
9263
|
-
*
|
|
9270
|
+
* Creates a WebSocket connection to the Speechmatics Real-Time API.
|
|
9271
|
+
* Protocol: send StartRecognition config, then AddAudio binary frames,
|
|
9272
|
+
* receive AddPartialTranscript/AddTranscript/EndOfUtterance messages.
|
|
9264
9273
|
*
|
|
9265
|
-
* @
|
|
9266
|
-
*
|
|
9267
|
-
*
|
|
9268
|
-
* language: 'en',
|
|
9269
|
-
* speechmaticsStreaming: {
|
|
9270
|
-
* enablePartials: true,
|
|
9271
|
-
* operatingPoint: 'enhanced'
|
|
9272
|
-
* }
|
|
9273
|
-
* }, {
|
|
9274
|
-
* onTranscript: (event) => console.log(event.text),
|
|
9275
|
-
* onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
|
|
9276
|
-
* onError: (error) => console.error(error)
|
|
9277
|
-
* });
|
|
9274
|
+
* @param options - Streaming configuration
|
|
9275
|
+
* @param callbacks - Event callbacks
|
|
9276
|
+
* @returns StreamingSession for sending audio and closing
|
|
9278
9277
|
*
|
|
9279
|
-
*
|
|
9280
|
-
* await session.close();
|
|
9281
|
-
* ```
|
|
9278
|
+
* @see https://docs.speechmatics.com/rt-api-ref
|
|
9282
9279
|
*/
|
|
9283
9280
|
async transcribeStream(options, callbacks) {
|
|
9284
9281
|
this.validateConfig();
|
|
9285
|
-
const
|
|
9286
|
-
const
|
|
9287
|
-
const
|
|
9288
|
-
const
|
|
9289
|
-
|
|
9290
|
-
|
|
9291
|
-
|
|
9292
|
-
|
|
9293
|
-
|
|
9294
|
-
const
|
|
9295
|
-
|
|
9296
|
-
|
|
9297
|
-
|
|
9298
|
-
|
|
9299
|
-
|
|
9300
|
-
|
|
9301
|
-
|
|
9302
|
-
|
|
9303
|
-
|
|
9304
|
-
|
|
9305
|
-
}
|
|
9306
|
-
|
|
9307
|
-
|
|
9308
|
-
|
|
9309
|
-
|
|
9310
|
-
const startMsg = {
|
|
9311
|
-
message: "StartRecognition",
|
|
9312
|
-
audio_format: {
|
|
9313
|
-
type: "raw",
|
|
9314
|
-
encoding,
|
|
9315
|
-
sample_rate: sampleRate
|
|
9316
|
-
},
|
|
9317
|
-
transcription_config: {
|
|
9318
|
-
language: smOpts.language || options?.language || "en",
|
|
9319
|
-
enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
|
|
9320
|
-
}
|
|
9321
|
-
};
|
|
9322
|
-
const txConfig = startMsg.transcription_config;
|
|
9323
|
-
if (smOpts.domain) txConfig.domain = smOpts.domain;
|
|
9324
|
-
if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
|
|
9325
|
-
if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
|
|
9326
|
-
if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
|
|
9327
|
-
if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
|
|
9328
|
-
if (smOpts.diarization === "speaker" || options?.diarization) {
|
|
9329
|
-
txConfig.diarization = "speaker";
|
|
9330
|
-
if (smOpts.maxSpeakers) {
|
|
9331
|
-
txConfig.speaker_diarization_config = {
|
|
9332
|
-
max_speakers: smOpts.maxSpeakers
|
|
9333
|
-
};
|
|
9334
|
-
} else if (options?.speakersExpected) {
|
|
9335
|
-
txConfig.speaker_diarization_config = {
|
|
9336
|
-
max_speakers: options.speakersExpected
|
|
9337
|
-
};
|
|
9338
|
-
}
|
|
9339
|
-
}
|
|
9340
|
-
if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
|
|
9341
|
-
txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
|
|
9342
|
-
content: word
|
|
9343
|
-
}));
|
|
9344
|
-
} else if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9345
|
-
txConfig.additional_vocab = options.customVocabulary.map((word) => ({
|
|
9346
|
-
content: word
|
|
9347
|
-
}));
|
|
9348
|
-
}
|
|
9349
|
-
if (smOpts.conversationConfig) {
|
|
9350
|
-
txConfig.conversation_config = {
|
|
9351
|
-
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9352
|
-
};
|
|
9353
|
-
}
|
|
9354
|
-
const startPayload = JSON.stringify(startMsg);
|
|
9355
|
-
if (callbacks?.onRawMessage) {
|
|
9356
|
-
callbacks.onRawMessage({
|
|
9357
|
-
provider: "speechmatics",
|
|
9358
|
-
direction: "outgoing",
|
|
9359
|
-
timestamp: Date.now(),
|
|
9360
|
-
payload: startPayload,
|
|
9361
|
-
messageType: "StartRecognition"
|
|
9362
|
-
});
|
|
9282
|
+
const sessionId = `speechmatics_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9283
|
+
const createdAt = /* @__PURE__ */ new Date();
|
|
9284
|
+
const smOpts = options?.speechmaticsStreaming;
|
|
9285
|
+
const region = smOpts?.region || this.config?.region;
|
|
9286
|
+
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost(region)}`);
|
|
9287
|
+
const wsUrl = `${wsBase}/v2`;
|
|
9288
|
+
let status = "connecting";
|
|
9289
|
+
let recognitionStarted = false;
|
|
9290
|
+
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : require("ws");
|
|
9291
|
+
const ws = new WebSocketImpl(wsUrl);
|
|
9292
|
+
const language = smOpts?.language || options?.language || "en";
|
|
9293
|
+
const transcriptionConfig = {
|
|
9294
|
+
language,
|
|
9295
|
+
enable_entities: smOpts?.enableEntities ?? options?.entityDetection ?? false,
|
|
9296
|
+
enable_partials: smOpts?.enablePartials ?? options?.interimResults !== false,
|
|
9297
|
+
operating_point: smOpts?.operatingPoint || OperatingPoint.enhanced,
|
|
9298
|
+
...smOpts?.maxDelay !== void 0 && { max_delay: smOpts.maxDelay },
|
|
9299
|
+
...smOpts?.maxDelayMode && {
|
|
9300
|
+
max_delay_mode: smOpts.maxDelayMode
|
|
9301
|
+
},
|
|
9302
|
+
...smOpts?.domain && { domain: smOpts.domain },
|
|
9303
|
+
...(options?.diarization || smOpts?.diarization === TranscriptionConfigDiarization.speaker) && {
|
|
9304
|
+
diarization: TranscriptionConfigDiarization.speaker,
|
|
9305
|
+
...smOpts?.maxSpeakers !== void 0 && {
|
|
9306
|
+
speaker_diarization_config: { max_speakers: smOpts.maxSpeakers }
|
|
9363
9307
|
}
|
|
9364
|
-
|
|
9365
|
-
|
|
9366
|
-
|
|
9367
|
-
|
|
9368
|
-
|
|
9369
|
-
|
|
9370
|
-
|
|
9371
|
-
|
|
9372
|
-
|
|
9373
|
-
|
|
9374
|
-
|
|
9375
|
-
|
|
9376
|
-
|
|
9377
|
-
|
|
9378
|
-
|
|
9379
|
-
|
|
9380
|
-
|
|
9308
|
+
},
|
|
9309
|
+
...(options?.customVocabulary?.length || smOpts?.additionalVocab?.length) && {
|
|
9310
|
+
additional_vocab: (smOpts?.additionalVocab || options?.customVocabulary || []).map(
|
|
9311
|
+
(term) => ({ content: term })
|
|
9312
|
+
)
|
|
9313
|
+
}
|
|
9314
|
+
};
|
|
9315
|
+
const startRecognition = {
|
|
9316
|
+
message: "StartRecognition",
|
|
9317
|
+
audio_format: {
|
|
9318
|
+
type: "raw",
|
|
9319
|
+
encoding: smOpts?.encoding || "pcm_s16le",
|
|
9320
|
+
sample_rate: smOpts?.sampleRate || options?.sampleRate || 16e3
|
|
9321
|
+
},
|
|
9322
|
+
transcription_config: transcriptionConfig,
|
|
9323
|
+
...smOpts?.conversationConfig && {
|
|
9324
|
+
conversation_config: {
|
|
9325
|
+
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9381
9326
|
}
|
|
9382
|
-
}
|
|
9383
|
-
|
|
9384
|
-
|
|
9385
|
-
|
|
9386
|
-
const
|
|
9327
|
+
}
|
|
9328
|
+
};
|
|
9329
|
+
ws.onopen = () => {
|
|
9330
|
+
status = "open";
|
|
9331
|
+
const msg = JSON.stringify(startRecognition);
|
|
9332
|
+
if (callbacks?.onRawMessage) {
|
|
9333
|
+
callbacks.onRawMessage({
|
|
9334
|
+
provider: this.name,
|
|
9335
|
+
direction: "outgoing",
|
|
9336
|
+
timestamp: Date.now(),
|
|
9337
|
+
payload: msg,
|
|
9338
|
+
messageType: "StartRecognition"
|
|
9339
|
+
});
|
|
9340
|
+
}
|
|
9341
|
+
ws.send(msg);
|
|
9342
|
+
};
|
|
9343
|
+
ws.onmessage = (event) => {
|
|
9344
|
+
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
9387
9345
|
try {
|
|
9388
|
-
const
|
|
9346
|
+
const data = JSON.parse(rawPayload);
|
|
9347
|
+
const messageType = data.message;
|
|
9389
9348
|
if (callbacks?.onRawMessage) {
|
|
9390
9349
|
callbacks.onRawMessage({
|
|
9391
|
-
provider:
|
|
9350
|
+
provider: this.name,
|
|
9392
9351
|
direction: "incoming",
|
|
9393
9352
|
timestamp: Date.now(),
|
|
9394
9353
|
payload: rawPayload,
|
|
9395
|
-
messageType
|
|
9354
|
+
messageType
|
|
9396
9355
|
});
|
|
9397
9356
|
}
|
|
9398
|
-
|
|
9399
|
-
|
|
9400
|
-
|
|
9401
|
-
|
|
9402
|
-
|
|
9403
|
-
|
|
9404
|
-
|
|
9405
|
-
|
|
9406
|
-
|
|
9407
|
-
}
|
|
9357
|
+
switch (messageType) {
|
|
9358
|
+
case "RecognitionStarted": {
|
|
9359
|
+
recognitionStarted = true;
|
|
9360
|
+
callbacks?.onOpen?.();
|
|
9361
|
+
callbacks?.onMetadata?.({
|
|
9362
|
+
id: data.id,
|
|
9363
|
+
languagePackInfo: data.language_pack_info
|
|
9364
|
+
});
|
|
9365
|
+
break;
|
|
9366
|
+
}
|
|
9367
|
+
case "AddPartialTranscript": {
|
|
9368
|
+
const partial = data;
|
|
9369
|
+
const words = this.resultsToWords(partial.results);
|
|
9370
|
+
callbacks?.onTranscript?.({
|
|
9371
|
+
type: "transcript",
|
|
9372
|
+
text: partial.metadata.transcript,
|
|
9373
|
+
isFinal: false,
|
|
9374
|
+
words,
|
|
9375
|
+
speaker: words[0]?.speaker,
|
|
9376
|
+
confidence: partial.results[0]?.alternatives?.[0]?.confidence,
|
|
9377
|
+
channel: partial.channel ? parseInt(partial.channel) : void 0
|
|
9378
|
+
});
|
|
9379
|
+
break;
|
|
9380
|
+
}
|
|
9381
|
+
case "AddTranscript": {
|
|
9382
|
+
const final = data;
|
|
9383
|
+
const words = this.resultsToWords(final.results);
|
|
9384
|
+
callbacks?.onTranscript?.({
|
|
9385
|
+
type: "transcript",
|
|
9386
|
+
text: final.metadata.transcript,
|
|
9387
|
+
isFinal: true,
|
|
9388
|
+
words,
|
|
9389
|
+
speaker: words[0]?.speaker,
|
|
9390
|
+
confidence: final.results[0]?.alternatives?.[0]?.confidence,
|
|
9391
|
+
channel: final.channel ? parseInt(final.channel) : void 0
|
|
9392
|
+
});
|
|
9393
|
+
if (options?.diarization || smOpts?.diarization === "speaker") {
|
|
9394
|
+
const utterances = buildUtterancesFromWords(words);
|
|
9395
|
+
for (const utterance of utterances) {
|
|
9396
|
+
callbacks?.onUtterance?.(utterance);
|
|
9397
|
+
}
|
|
9398
|
+
}
|
|
9399
|
+
break;
|
|
9400
|
+
}
|
|
9401
|
+
case "EndOfUtterance": {
|
|
9402
|
+
break;
|
|
9403
|
+
}
|
|
9404
|
+
case "EndOfTranscript": {
|
|
9405
|
+
callbacks?.onClose?.(1e3, "Transcription complete");
|
|
9406
|
+
break;
|
|
9407
|
+
}
|
|
9408
|
+
case "Error": {
|
|
9409
|
+
const err = data;
|
|
9410
|
+
callbacks?.onError?.({
|
|
9411
|
+
code: err.type || "SPEECHMATICS_ERROR",
|
|
9412
|
+
message: err.reason || "Unknown error"
|
|
9413
|
+
});
|
|
9414
|
+
break;
|
|
9415
|
+
}
|
|
9416
|
+
case "Warning": {
|
|
9417
|
+
const warn = data;
|
|
9418
|
+
callbacks?.onMetadata?.({
|
|
9419
|
+
warning: warn.type,
|
|
9420
|
+
reason: warn.reason
|
|
9421
|
+
});
|
|
9422
|
+
break;
|
|
9423
|
+
}
|
|
9424
|
+
case "Info": {
|
|
9425
|
+
callbacks?.onMetadata?.(data);
|
|
9426
|
+
break;
|
|
9427
|
+
}
|
|
9428
|
+
case "AudioAdded":
|
|
9429
|
+
case "ChannelAudioAdded":
|
|
9430
|
+
break;
|
|
9431
|
+
default:
|
|
9432
|
+
callbacks?.onMetadata?.(data);
|
|
9433
|
+
break;
|
|
9408
9434
|
}
|
|
9435
|
+
} catch (error) {
|
|
9409
9436
|
callbacks?.onError?.({
|
|
9410
9437
|
code: "PARSE_ERROR",
|
|
9411
|
-
message:
|
|
9412
|
-
details: error
|
|
9438
|
+
message: `Failed to parse message: ${error}`
|
|
9413
9439
|
});
|
|
9414
9440
|
}
|
|
9415
|
-
}
|
|
9416
|
-
ws.
|
|
9441
|
+
};
|
|
9442
|
+
ws.onerror = () => {
|
|
9417
9443
|
callbacks?.onError?.({
|
|
9418
9444
|
code: "WEBSOCKET_ERROR",
|
|
9419
|
-
message: error
|
|
9420
|
-
details: error
|
|
9445
|
+
message: "WebSocket error occurred"
|
|
9421
9446
|
});
|
|
9447
|
+
};
|
|
9448
|
+
ws.onclose = (event) => {
|
|
9449
|
+
status = "closed";
|
|
9450
|
+
callbacks?.onClose?.(event.code, event.reason);
|
|
9451
|
+
};
|
|
9452
|
+
await new Promise((resolve, reject) => {
|
|
9453
|
+
const timeout = setTimeout(() => {
|
|
9454
|
+
reject(new Error("WebSocket connection timeout"));
|
|
9455
|
+
}, 1e4);
|
|
9456
|
+
const checkReady = () => {
|
|
9457
|
+
if (recognitionStarted) {
|
|
9458
|
+
clearTimeout(timeout);
|
|
9459
|
+
resolve();
|
|
9460
|
+
} else if (status === "closed") {
|
|
9461
|
+
clearTimeout(timeout);
|
|
9462
|
+
reject(new Error("WebSocket connection failed"));
|
|
9463
|
+
} else {
|
|
9464
|
+
setTimeout(checkReady, 100);
|
|
9465
|
+
}
|
|
9466
|
+
};
|
|
9467
|
+
checkReady();
|
|
9422
9468
|
});
|
|
9423
|
-
ws.on("close", (code, reason) => {
|
|
9424
|
-
sessionStatus = "closed";
|
|
9425
|
-
callbacks?.onClose?.(code, reason.toString());
|
|
9426
|
-
});
|
|
9427
|
-
await sessionReady;
|
|
9428
|
-
sessionStatus = "open";
|
|
9429
|
-
callbacks?.onOpen?.();
|
|
9430
9469
|
return {
|
|
9431
9470
|
id: sessionId,
|
|
9432
9471
|
provider: this.name,
|
|
9433
|
-
createdAt
|
|
9434
|
-
getStatus: () =>
|
|
9472
|
+
createdAt,
|
|
9473
|
+
getStatus: () => status,
|
|
9435
9474
|
sendAudio: async (chunk) => {
|
|
9436
|
-
if (
|
|
9437
|
-
throw new Error(
|
|
9438
|
-
}
|
|
9439
|
-
if (ws.readyState !== import_ws5.default.OPEN) {
|
|
9440
|
-
throw new Error("WebSocket is not open");
|
|
9475
|
+
if (status !== "open") {
|
|
9476
|
+
throw new Error("Session is not open");
|
|
9441
9477
|
}
|
|
9442
9478
|
if (callbacks?.onRawMessage) {
|
|
9443
9479
|
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
@@ -9453,12 +9489,11 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9453
9489
|
});
|
|
9454
9490
|
}
|
|
9455
9491
|
ws.send(chunk.data);
|
|
9456
|
-
|
|
9457
|
-
|
|
9458
|
-
|
|
9459
|
-
|
|
9460
|
-
|
|
9461
|
-
});
|
|
9492
|
+
},
|
|
9493
|
+
close: async () => {
|
|
9494
|
+
if (status === "open") {
|
|
9495
|
+
status = "closing";
|
|
9496
|
+
const endMsg = JSON.stringify({ message: "EndOfStream", last_seq_no: 0 });
|
|
9462
9497
|
if (callbacks?.onRawMessage) {
|
|
9463
9498
|
callbacks.onRawMessage({
|
|
9464
9499
|
provider: this.name,
|
|
@@ -9470,144 +9505,19 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9470
9505
|
}
|
|
9471
9506
|
ws.send(endMsg);
|
|
9472
9507
|
}
|
|
9473
|
-
},
|
|
9474
|
-
close: async () => {
|
|
9475
|
-
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
9476
|
-
return;
|
|
9477
|
-
}
|
|
9478
|
-
sessionStatus = "closing";
|
|
9479
|
-
if (ws.readyState === import_ws5.default.OPEN) {
|
|
9480
|
-
seqNo++;
|
|
9481
|
-
ws.send(
|
|
9482
|
-
JSON.stringify({
|
|
9483
|
-
message: "EndOfStream",
|
|
9484
|
-
last_seq_no: seqNo
|
|
9485
|
-
})
|
|
9486
|
-
);
|
|
9487
|
-
}
|
|
9488
|
-
return new Promise((resolve) => {
|
|
9489
|
-
const timeout = setTimeout(() => {
|
|
9490
|
-
ws.terminate();
|
|
9491
|
-
sessionStatus = "closed";
|
|
9492
|
-
resolve();
|
|
9493
|
-
}, 5e3);
|
|
9494
|
-
const onMsg = (data) => {
|
|
9495
|
-
try {
|
|
9496
|
-
const msg = JSON.parse(data.toString());
|
|
9497
|
-
if (msg.message === "EndOfTranscript") {
|
|
9498
|
-
ws.removeListener("message", onMsg);
|
|
9499
|
-
clearTimeout(timeout);
|
|
9500
|
-
ws.close();
|
|
9501
|
-
}
|
|
9502
|
-
} catch {
|
|
9503
|
-
}
|
|
9504
|
-
};
|
|
9505
|
-
ws.on("message", onMsg);
|
|
9506
|
-
ws.once("close", () => {
|
|
9507
|
-
clearTimeout(timeout);
|
|
9508
|
-
sessionStatus = "closed";
|
|
9509
|
-
resolve();
|
|
9510
|
-
});
|
|
9511
|
-
});
|
|
9512
9508
|
}
|
|
9513
9509
|
};
|
|
9514
9510
|
}
|
|
9515
9511
|
/**
|
|
9516
|
-
*
|
|
9517
|
-
*/
|
|
9518
|
-
handleStreamingMessage(message, callbacks, utteranceResults) {
|
|
9519
|
-
switch (message.message) {
|
|
9520
|
-
case "RecognitionStarted": {
|
|
9521
|
-
break;
|
|
9522
|
-
}
|
|
9523
|
-
case "AddPartialTranscript": {
|
|
9524
|
-
const results = message.results || [];
|
|
9525
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9526
|
-
if (text) {
|
|
9527
|
-
callbacks?.onTranscript?.({
|
|
9528
|
-
type: "transcript",
|
|
9529
|
-
text,
|
|
9530
|
-
isFinal: false,
|
|
9531
|
-
words: this.extractWordsFromResults(results),
|
|
9532
|
-
data: message
|
|
9533
|
-
});
|
|
9534
|
-
}
|
|
9535
|
-
break;
|
|
9536
|
-
}
|
|
9537
|
-
case "AddTranscript": {
|
|
9538
|
-
const results = message.results || [];
|
|
9539
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9540
|
-
if (utteranceResults) {
|
|
9541
|
-
utteranceResults.push(...results);
|
|
9542
|
-
}
|
|
9543
|
-
if (text) {
|
|
9544
|
-
callbacks?.onTranscript?.({
|
|
9545
|
-
type: "transcript",
|
|
9546
|
-
text,
|
|
9547
|
-
isFinal: true,
|
|
9548
|
-
words: this.extractWordsFromResults(results),
|
|
9549
|
-
data: message
|
|
9550
|
-
});
|
|
9551
|
-
}
|
|
9552
|
-
break;
|
|
9553
|
-
}
|
|
9554
|
-
case "EndOfUtterance": {
|
|
9555
|
-
if (utteranceResults && utteranceResults.length > 0) {
|
|
9556
|
-
const text = buildTextFromSpeechmaticsResults(utteranceResults);
|
|
9557
|
-
const words = this.extractWordsFromResults(utteranceResults);
|
|
9558
|
-
const utterances = buildUtterancesFromWords(words);
|
|
9559
|
-
if (utterances.length > 0) {
|
|
9560
|
-
for (const utt of utterances) {
|
|
9561
|
-
callbacks?.onUtterance?.(utt);
|
|
9562
|
-
}
|
|
9563
|
-
} else if (text) {
|
|
9564
|
-
callbacks?.onUtterance?.({
|
|
9565
|
-
text,
|
|
9566
|
-
start: words.length > 0 ? words[0].start : 0,
|
|
9567
|
-
end: words.length > 0 ? words[words.length - 1].end : 0,
|
|
9568
|
-
words
|
|
9569
|
-
});
|
|
9570
|
-
}
|
|
9571
|
-
utteranceResults.length = 0;
|
|
9572
|
-
}
|
|
9573
|
-
break;
|
|
9574
|
-
}
|
|
9575
|
-
case "AudioAdded": {
|
|
9576
|
-
break;
|
|
9577
|
-
}
|
|
9578
|
-
case "EndOfTranscript": {
|
|
9579
|
-
break;
|
|
9580
|
-
}
|
|
9581
|
-
case "Info":
|
|
9582
|
-
case "Warning": {
|
|
9583
|
-
callbacks?.onMetadata?.(message);
|
|
9584
|
-
break;
|
|
9585
|
-
}
|
|
9586
|
-
case "Error": {
|
|
9587
|
-
const errMsg = message;
|
|
9588
|
-
callbacks?.onError?.({
|
|
9589
|
-
code: errMsg.type || "SPEECHMATICS_ERROR",
|
|
9590
|
-
message: errMsg.reason || "Unknown error",
|
|
9591
|
-
details: message
|
|
9592
|
-
});
|
|
9593
|
-
break;
|
|
9594
|
-
}
|
|
9595
|
-
default: {
|
|
9596
|
-
callbacks?.onMetadata?.(message);
|
|
9597
|
-
break;
|
|
9598
|
-
}
|
|
9599
|
-
}
|
|
9600
|
-
}
|
|
9601
|
-
/**
|
|
9602
|
-
* Extract unified Word[] from Speechmatics recognition results
|
|
9512
|
+
* Convert Speechmatics RecognitionResult[] to unified Word[]
|
|
9603
9513
|
*/
|
|
9604
|
-
|
|
9605
|
-
return results.filter((r) => r.type === "word"
|
|
9606
|
-
word:
|
|
9607
|
-
start:
|
|
9608
|
-
end:
|
|
9609
|
-
confidence:
|
|
9610
|
-
speaker:
|
|
9514
|
+
resultsToWords(results) {
|
|
9515
|
+
return results.filter((r) => r.type === "word").map((r) => ({
|
|
9516
|
+
word: r.alternatives?.[0]?.content || "",
|
|
9517
|
+
start: r.start_time,
|
|
9518
|
+
end: r.end_time,
|
|
9519
|
+
confidence: r.alternatives?.[0]?.confidence,
|
|
9520
|
+
speaker: r.alternatives?.[0]?.speaker
|
|
9611
9521
|
}));
|
|
9612
9522
|
}
|
|
9613
9523
|
/**
|
|
@@ -9678,9 +9588,6 @@ function createSpeechmaticsAdapter(config) {
|
|
|
9678
9588
|
return adapter;
|
|
9679
9589
|
}
|
|
9680
9590
|
|
|
9681
|
-
// src/adapters/soniox-adapter.ts
|
|
9682
|
-
var import_axios9 = __toESM(require("axios"));
|
|
9683
|
-
|
|
9684
9591
|
// src/generated/soniox/schema/transcriptionStatus.ts
|
|
9685
9592
|
var TranscriptionStatus = {
|
|
9686
9593
|
queued: "queued",
|
|
@@ -9689,6 +9596,57 @@ var TranscriptionStatus = {
|
|
|
9689
9596
|
error: "error"
|
|
9690
9597
|
};
|
|
9691
9598
|
|
|
9599
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9600
|
+
var import_axios9 = __toESM(require("axios"));
|
|
9601
|
+
|
|
9602
|
+
// src/generated/soniox/schema/index.ts
|
|
9603
|
+
var schema_exports4 = {};
|
|
9604
|
+
__export(schema_exports4, {
|
|
9605
|
+
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
9606
|
+
TranscriptionMode: () => TranscriptionMode,
|
|
9607
|
+
TranscriptionStatus: () => TranscriptionStatus,
|
|
9608
|
+
TranslationConfigType: () => TranslationConfigType
|
|
9609
|
+
});
|
|
9610
|
+
|
|
9611
|
+
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
9612
|
+
var TemporaryApiKeyUsageType = {
|
|
9613
|
+
transcribe_websocket: "transcribe_websocket"
|
|
9614
|
+
};
|
|
9615
|
+
|
|
9616
|
+
// src/generated/soniox/schema/transcriptionMode.ts
|
|
9617
|
+
var TranscriptionMode = {
|
|
9618
|
+
real_time: "real_time",
|
|
9619
|
+
async: "async"
|
|
9620
|
+
};
|
|
9621
|
+
|
|
9622
|
+
// src/generated/soniox/schema/translationConfigType.ts
|
|
9623
|
+
var TranslationConfigType = {
|
|
9624
|
+
one_way: "one_way",
|
|
9625
|
+
two_way: "two_way"
|
|
9626
|
+
};
|
|
9627
|
+
|
|
9628
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9629
|
+
var uploadFile = (uploadFileBody2, options) => {
|
|
9630
|
+
const formData = new FormData();
|
|
9631
|
+
if (uploadFileBody2.client_reference_id !== void 0 && uploadFileBody2.client_reference_id !== null) {
|
|
9632
|
+
formData.append("client_reference_id", uploadFileBody2.client_reference_id);
|
|
9633
|
+
}
|
|
9634
|
+
formData.append("file", uploadFileBody2.file);
|
|
9635
|
+
return import_axios9.default.post("/v1/files", formData, options);
|
|
9636
|
+
};
|
|
9637
|
+
var createTranscription2 = (createTranscriptionPayload, options) => {
|
|
9638
|
+
return import_axios9.default.post("/v1/transcriptions", createTranscriptionPayload, options);
|
|
9639
|
+
};
|
|
9640
|
+
var getTranscription = (transcriptionId, options) => {
|
|
9641
|
+
return import_axios9.default.get(`/v1/transcriptions/${transcriptionId}`, options);
|
|
9642
|
+
};
|
|
9643
|
+
var getTranscriptionTranscript = (transcriptionId, options) => {
|
|
9644
|
+
return import_axios9.default.get(`/v1/transcriptions/${transcriptionId}/transcript`, options);
|
|
9645
|
+
};
|
|
9646
|
+
var getModels = (options) => {
|
|
9647
|
+
return import_axios9.default.get("/v1/models", options);
|
|
9648
|
+
};
|
|
9649
|
+
|
|
9692
9650
|
// src/adapters/soniox-adapter.ts
|
|
9693
9651
|
var SonioxAdapter = class extends BaseAdapter {
|
|
9694
9652
|
constructor() {
|
|
@@ -9743,11 +9701,17 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9743
9701
|
}
|
|
9744
9702
|
}
|
|
9745
9703
|
/**
|
|
9746
|
-
* Get the base URL for API requests
|
|
9704
|
+
* Get the base URL for API requests (no /v1 suffix — generated functions include /v1 in paths)
|
|
9747
9705
|
*/
|
|
9748
9706
|
get baseUrl() {
|
|
9749
9707
|
if (this.config?.baseUrl) return this.config.baseUrl;
|
|
9750
|
-
return `https://${this.getRegionalHost()}
|
|
9708
|
+
return `https://${this.getRegionalHost()}`;
|
|
9709
|
+
}
|
|
9710
|
+
/**
|
|
9711
|
+
* Build axios config with Soniox Bearer auth
|
|
9712
|
+
*/
|
|
9713
|
+
getAxiosConfig() {
|
|
9714
|
+
return super.getAxiosConfig("Authorization", (key) => `Bearer ${key}`);
|
|
9751
9715
|
}
|
|
9752
9716
|
initialize(config) {
|
|
9753
9717
|
super.initialize(config);
|
|
@@ -9757,15 +9721,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9757
9721
|
if (config.model) {
|
|
9758
9722
|
this.defaultModel = config.model;
|
|
9759
9723
|
}
|
|
9760
|
-
this.client = import_axios9.default.create({
|
|
9761
|
-
baseURL: this.baseUrl,
|
|
9762
|
-
timeout: config.timeout || 12e4,
|
|
9763
|
-
headers: {
|
|
9764
|
-
Authorization: `Bearer ${config.apiKey}`,
|
|
9765
|
-
"Content-Type": "application/json",
|
|
9766
|
-
...config.headers
|
|
9767
|
-
}
|
|
9768
|
-
});
|
|
9769
9724
|
}
|
|
9770
9725
|
/**
|
|
9771
9726
|
* Get current region
|
|
@@ -9795,23 +9750,12 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9795
9750
|
*/
|
|
9796
9751
|
setRegion(region) {
|
|
9797
9752
|
this.region = region;
|
|
9798
|
-
if (this.config?.apiKey) {
|
|
9799
|
-
this.client = import_axios9.default.create({
|
|
9800
|
-
baseURL: this.baseUrl,
|
|
9801
|
-
timeout: this.config.timeout || 12e4,
|
|
9802
|
-
headers: {
|
|
9803
|
-
Authorization: `Bearer ${this.config.apiKey}`,
|
|
9804
|
-
"Content-Type": "application/json",
|
|
9805
|
-
...this.config.headers
|
|
9806
|
-
}
|
|
9807
|
-
});
|
|
9808
|
-
}
|
|
9809
9753
|
}
|
|
9810
9754
|
/**
|
|
9811
9755
|
* Submit audio for transcription
|
|
9812
9756
|
*
|
|
9813
|
-
*
|
|
9814
|
-
*
|
|
9757
|
+
* Uses the async v1 API: createTranscription returns status `queued`,
|
|
9758
|
+
* then polls until completed (or returns immediately if webhook is set).
|
|
9815
9759
|
*
|
|
9816
9760
|
* @param audio - Audio input (URL or file)
|
|
9817
9761
|
* @param options - Transcription options
|
|
@@ -9820,21 +9764,44 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9820
9764
|
async transcribe(audio, options) {
|
|
9821
9765
|
this.validateConfig();
|
|
9822
9766
|
try {
|
|
9823
|
-
const
|
|
9824
|
-
|
|
9825
|
-
};
|
|
9826
|
-
if (audio.type === "url") {
|
|
9827
|
-
requestBody.audio_url = audio.url;
|
|
9828
|
-
} else if (audio.type === "file") {
|
|
9829
|
-
const formData = new FormData();
|
|
9767
|
+
const sonioxOpts = options?.soniox;
|
|
9768
|
+
if (audio.type === "file") {
|
|
9830
9769
|
const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
|
|
9831
|
-
|
|
9832
|
-
const
|
|
9833
|
-
|
|
9834
|
-
|
|
9835
|
-
|
|
9836
|
-
|
|
9837
|
-
|
|
9770
|
+
const uploadBody = { file: audioBlob };
|
|
9771
|
+
const fileResp = await uploadFile(uploadBody, this.getAxiosConfig());
|
|
9772
|
+
const payload = {
|
|
9773
|
+
...sonioxOpts,
|
|
9774
|
+
model: options?.model || this.defaultModel,
|
|
9775
|
+
file_id: fileResp.data.id,
|
|
9776
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9777
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9778
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9779
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9780
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9781
|
+
};
|
|
9782
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9783
|
+
const meta = createResp.data;
|
|
9784
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9785
|
+
return this.normalizeTranscription(meta);
|
|
9786
|
+
}
|
|
9787
|
+
return this.pollForCompletion(meta.id);
|
|
9788
|
+
} else if (audio.type === "url") {
|
|
9789
|
+
const payload = {
|
|
9790
|
+
...sonioxOpts,
|
|
9791
|
+
model: options?.model || this.defaultModel,
|
|
9792
|
+
audio_url: audio.url,
|
|
9793
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9794
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9795
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9796
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9797
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9798
|
+
};
|
|
9799
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9800
|
+
const meta = createResp.data;
|
|
9801
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9802
|
+
return this.normalizeTranscription(meta);
|
|
9803
|
+
}
|
|
9804
|
+
return this.pollForCompletion(meta.id);
|
|
9838
9805
|
} else {
|
|
9839
9806
|
return {
|
|
9840
9807
|
success: false,
|
|
@@ -9845,38 +9812,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9845
9812
|
}
|
|
9846
9813
|
};
|
|
9847
9814
|
}
|
|
9848
|
-
if (options?.language) {
|
|
9849
|
-
requestBody.language_hints = [options.language];
|
|
9850
|
-
}
|
|
9851
|
-
if (options?.diarization) {
|
|
9852
|
-
requestBody.enable_speaker_diarization = true;
|
|
9853
|
-
}
|
|
9854
|
-
if (options?.languageDetection) {
|
|
9855
|
-
requestBody.enable_language_identification = true;
|
|
9856
|
-
}
|
|
9857
|
-
if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9858
|
-
requestBody.context = {
|
|
9859
|
-
terms: options.customVocabulary
|
|
9860
|
-
};
|
|
9861
|
-
}
|
|
9862
|
-
if (options?.webhookUrl) {
|
|
9863
|
-
requestBody.webhook_url = options.webhookUrl;
|
|
9864
|
-
}
|
|
9865
|
-
const response = await this.client.post("/transcriptions", requestBody);
|
|
9866
|
-
const transcriptionId = response.data.id;
|
|
9867
|
-
if (options?.webhookUrl) {
|
|
9868
|
-
return {
|
|
9869
|
-
success: true,
|
|
9870
|
-
provider: this.name,
|
|
9871
|
-
data: {
|
|
9872
|
-
id: transcriptionId,
|
|
9873
|
-
text: "",
|
|
9874
|
-
status: "queued"
|
|
9875
|
-
},
|
|
9876
|
-
raw: response.data
|
|
9877
|
-
};
|
|
9878
|
-
}
|
|
9879
|
-
return await this.pollForCompletion(transcriptionId);
|
|
9880
9815
|
} catch (error) {
|
|
9881
9816
|
return this.createErrorResponse(error);
|
|
9882
9817
|
}
|
|
@@ -9884,9 +9819,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9884
9819
|
/**
|
|
9885
9820
|
* Get transcription result by ID
|
|
9886
9821
|
*
|
|
9887
|
-
*
|
|
9888
|
-
*
|
|
9889
|
-
* when completed.
|
|
9822
|
+
* Fetches transcription metadata and, if completed, the transcript text/tokens.
|
|
9823
|
+
* Used by pollForCompletion() for async polling.
|
|
9890
9824
|
*
|
|
9891
9825
|
* @param transcriptId - Transcript ID
|
|
9892
9826
|
* @returns Transcription response
|
|
@@ -9894,39 +9828,20 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9894
9828
|
async getTranscript(transcriptId) {
|
|
9895
9829
|
this.validateConfig();
|
|
9896
9830
|
try {
|
|
9897
|
-
const
|
|
9898
|
-
const
|
|
9899
|
-
if (
|
|
9900
|
-
|
|
9901
|
-
|
|
9902
|
-
|
|
9903
|
-
|
|
9904
|
-
|
|
9905
|
-
|
|
9906
|
-
|
|
9907
|
-
|
|
9908
|
-
|
|
9909
|
-
if (job.status !== "completed") {
|
|
9910
|
-
return {
|
|
9911
|
-
success: true,
|
|
9912
|
-
provider: this.name,
|
|
9913
|
-
data: {
|
|
9914
|
-
id: job.id,
|
|
9915
|
-
text: "",
|
|
9916
|
-
status: job.status
|
|
9917
|
-
},
|
|
9918
|
-
raw: job
|
|
9919
|
-
};
|
|
9831
|
+
const metaResp = await getTranscription(transcriptId, this.getAxiosConfig());
|
|
9832
|
+
const meta = metaResp.data;
|
|
9833
|
+
if (meta.status === TranscriptionStatus.completed) {
|
|
9834
|
+
try {
|
|
9835
|
+
const transcriptResp = await getTranscriptionTranscript(
|
|
9836
|
+
transcriptId,
|
|
9837
|
+
this.getAxiosConfig()
|
|
9838
|
+
);
|
|
9839
|
+
return this.normalizeTranscription(meta, transcriptResp.data);
|
|
9840
|
+
} catch (transcriptError) {
|
|
9841
|
+
return this.createErrorResponse(transcriptError);
|
|
9842
|
+
}
|
|
9920
9843
|
}
|
|
9921
|
-
|
|
9922
|
-
`/transcriptions/${transcriptId}/transcript`
|
|
9923
|
-
);
|
|
9924
|
-
return this.normalizeResponse({
|
|
9925
|
-
...transcriptResponse.data,
|
|
9926
|
-
// Carry over job metadata
|
|
9927
|
-
id: job.id,
|
|
9928
|
-
audio_duration_ms: job.audio_duration_ms
|
|
9929
|
-
});
|
|
9844
|
+
return this.normalizeTranscription(meta);
|
|
9930
9845
|
} catch (error) {
|
|
9931
9846
|
return this.createErrorResponse(error);
|
|
9932
9847
|
}
|
|
@@ -9946,51 +9861,50 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9946
9861
|
const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9947
9862
|
const createdAt = /* @__PURE__ */ new Date();
|
|
9948
9863
|
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
|
|
9949
|
-
const wsUrl = `${wsBase}/transcribe-websocket
|
|
9950
|
-
|
|
9951
|
-
const
|
|
9952
|
-
|
|
9953
|
-
|
|
9954
|
-
model: modelId
|
|
9955
|
-
};
|
|
9956
|
-
if (sonioxOpts?.audioFormat) {
|
|
9957
|
-
initMessage.audio_format = sonioxOpts.audioFormat;
|
|
9958
|
-
} else if (options?.encoding) {
|
|
9864
|
+
const wsUrl = new URL(`${wsBase}/transcribe-websocket`);
|
|
9865
|
+
wsUrl.searchParams.set("api_key", this.config.apiKey);
|
|
9866
|
+
const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-preview";
|
|
9867
|
+
wsUrl.searchParams.set("model", modelId);
|
|
9868
|
+
if (options?.encoding) {
|
|
9959
9869
|
const encodingMap = {
|
|
9960
9870
|
linear16: "pcm_s16le",
|
|
9961
9871
|
pcm: "pcm_s16le",
|
|
9962
9872
|
mulaw: "mulaw",
|
|
9963
9873
|
alaw: "alaw"
|
|
9964
9874
|
};
|
|
9965
|
-
|
|
9875
|
+
wsUrl.searchParams.set("audio_format", encodingMap[options.encoding] || options.encoding);
|
|
9966
9876
|
}
|
|
9967
|
-
if (
|
|
9968
|
-
|
|
9877
|
+
if (options?.sampleRate) {
|
|
9878
|
+
wsUrl.searchParams.set("sample_rate", options.sampleRate.toString());
|
|
9969
9879
|
}
|
|
9970
|
-
if (
|
|
9971
|
-
|
|
9880
|
+
if (options?.channels) {
|
|
9881
|
+
wsUrl.searchParams.set("num_channels", options.channels.toString());
|
|
9972
9882
|
}
|
|
9883
|
+
const sonioxOpts = options?.sonioxStreaming;
|
|
9973
9884
|
if (sonioxOpts) {
|
|
9974
9885
|
if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
|
|
9975
|
-
|
|
9886
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify(sonioxOpts.languageHints));
|
|
9976
9887
|
}
|
|
9977
9888
|
if (sonioxOpts.enableLanguageIdentification) {
|
|
9978
|
-
|
|
9889
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9979
9890
|
}
|
|
9980
9891
|
if (sonioxOpts.enableEndpointDetection) {
|
|
9981
|
-
|
|
9892
|
+
wsUrl.searchParams.set("enable_endpoint_detection", "true");
|
|
9982
9893
|
}
|
|
9983
9894
|
if (sonioxOpts.enableSpeakerDiarization) {
|
|
9984
|
-
|
|
9895
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
9985
9896
|
}
|
|
9986
9897
|
if (sonioxOpts.context) {
|
|
9987
|
-
|
|
9898
|
+
wsUrl.searchParams.set(
|
|
9899
|
+
"context",
|
|
9900
|
+
typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
|
|
9901
|
+
);
|
|
9988
9902
|
}
|
|
9989
9903
|
if (sonioxOpts.translation) {
|
|
9990
|
-
|
|
9904
|
+
wsUrl.searchParams.set("translation", JSON.stringify(sonioxOpts.translation));
|
|
9991
9905
|
}
|
|
9992
9906
|
if (sonioxOpts.clientReferenceId) {
|
|
9993
|
-
|
|
9907
|
+
wsUrl.searchParams.set("client_reference_id", sonioxOpts.clientReferenceId);
|
|
9994
9908
|
}
|
|
9995
9909
|
}
|
|
9996
9910
|
if (!sonioxOpts?.languageHints && options?.language) {
|
|
@@ -9999,33 +9913,24 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9999
9913
|
`[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
|
|
10000
9914
|
);
|
|
10001
9915
|
}
|
|
10002
|
-
|
|
9916
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify([options.language]));
|
|
10003
9917
|
}
|
|
10004
9918
|
if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
|
|
10005
|
-
|
|
9919
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
10006
9920
|
}
|
|
10007
9921
|
if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
|
|
10008
|
-
|
|
9922
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9923
|
+
}
|
|
9924
|
+
if (options?.interimResults !== false) {
|
|
10009
9925
|
}
|
|
10010
9926
|
let status = "connecting";
|
|
10011
9927
|
let openedAt = null;
|
|
10012
9928
|
let receivedData = false;
|
|
10013
9929
|
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : require("ws");
|
|
10014
|
-
const ws = new WebSocketImpl(wsUrl);
|
|
9930
|
+
const ws = new WebSocketImpl(wsUrl.toString());
|
|
10015
9931
|
ws.onopen = () => {
|
|
10016
|
-
openedAt = Date.now();
|
|
10017
|
-
const initPayload = JSON.stringify(initMessage);
|
|
10018
|
-
if (callbacks?.onRawMessage) {
|
|
10019
|
-
callbacks.onRawMessage({
|
|
10020
|
-
provider: this.name,
|
|
10021
|
-
direction: "outgoing",
|
|
10022
|
-
timestamp: Date.now(),
|
|
10023
|
-
payload: initPayload,
|
|
10024
|
-
messageType: "init"
|
|
10025
|
-
});
|
|
10026
|
-
}
|
|
10027
|
-
ws.send(initPayload);
|
|
10028
9932
|
status = "open";
|
|
9933
|
+
openedAt = Date.now();
|
|
10029
9934
|
callbacks?.onOpen?.();
|
|
10030
9935
|
};
|
|
10031
9936
|
ws.onmessage = (event) => {
|
|
@@ -10034,7 +9939,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10034
9939
|
let messageType;
|
|
10035
9940
|
try {
|
|
10036
9941
|
const data = JSON.parse(rawPayload);
|
|
10037
|
-
|
|
9942
|
+
const errorMessage = data.error_message;
|
|
9943
|
+
if (errorMessage) {
|
|
10038
9944
|
messageType = "error";
|
|
10039
9945
|
} else if (data.finished) {
|
|
10040
9946
|
messageType = "finished";
|
|
@@ -10050,10 +9956,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10050
9956
|
messageType
|
|
10051
9957
|
});
|
|
10052
9958
|
}
|
|
10053
|
-
if (
|
|
9959
|
+
if (errorMessage) {
|
|
10054
9960
|
callbacks?.onError?.({
|
|
10055
9961
|
code: data.error_code?.toString() || "STREAM_ERROR",
|
|
10056
|
-
message:
|
|
9962
|
+
message: errorMessage
|
|
10057
9963
|
});
|
|
10058
9964
|
return;
|
|
10059
9965
|
}
|
|
@@ -10067,7 +9973,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10067
9973
|
start: token.start_ms ? token.start_ms / 1e3 : 0,
|
|
10068
9974
|
end: token.end_ms ? token.end_ms / 1e3 : 0,
|
|
10069
9975
|
confidence: token.confidence,
|
|
10070
|
-
speaker: token.speaker
|
|
9976
|
+
speaker: token.speaker ?? void 0
|
|
10071
9977
|
}));
|
|
10072
9978
|
const text = data.text || data.tokens.map((t) => t.text).join("");
|
|
10073
9979
|
const isFinal = data.tokens.every((t) => t.is_final);
|
|
@@ -10076,8 +9982,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10076
9982
|
text,
|
|
10077
9983
|
isFinal,
|
|
10078
9984
|
words,
|
|
10079
|
-
speaker: data.tokens[0]?.speaker,
|
|
10080
|
-
language: data.tokens[0]?.language,
|
|
9985
|
+
speaker: data.tokens[0]?.speaker ?? void 0,
|
|
9986
|
+
language: data.tokens[0]?.language ?? void 0,
|
|
10081
9987
|
confidence: data.tokens[0]?.confidence
|
|
10082
9988
|
};
|
|
10083
9989
|
callbacks?.onTranscript?.(event2);
|
|
@@ -10104,10 +10010,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10104
10010
|
ws.onclose = (event) => {
|
|
10105
10011
|
status = "closed";
|
|
10106
10012
|
const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
|
|
10107
|
-
const
|
|
10108
|
-
if (
|
|
10013
|
+
const isImmediateClose = timeSinceOpen !== null && timeSinceOpen < 1e3 && !receivedData;
|
|
10014
|
+
if (isImmediateClose && event.code === 1e3) {
|
|
10109
10015
|
const errorMessage = [
|
|
10110
|
-
"Soniox closed connection
|
|
10016
|
+
"Soniox closed connection immediately after opening.",
|
|
10111
10017
|
`Current config: region=${this.region}, model=${modelId}`,
|
|
10112
10018
|
"Likely causes:",
|
|
10113
10019
|
" - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
|
|
@@ -10193,7 +10099,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10193
10099
|
async getModels() {
|
|
10194
10100
|
this.validateConfig();
|
|
10195
10101
|
try {
|
|
10196
|
-
const response = await this.
|
|
10102
|
+
const response = await getModels(this.getAxiosConfig());
|
|
10197
10103
|
return response.data.models || [];
|
|
10198
10104
|
} catch (error) {
|
|
10199
10105
|
console.error("Failed to fetch Soniox models:", error);
|
|
@@ -10225,11 +10131,44 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10225
10131
|
return buildUtterancesFromWords(words);
|
|
10226
10132
|
}
|
|
10227
10133
|
/**
|
|
10228
|
-
* Normalize
|
|
10134
|
+
* Normalize v1 API response to unified format
|
|
10135
|
+
*
|
|
10136
|
+
* @param meta - Transcription metadata from getTranscription/createTranscription
|
|
10137
|
+
* @param transcript - Transcript data (text/tokens), only present when status is completed
|
|
10229
10138
|
*/
|
|
10230
|
-
|
|
10231
|
-
|
|
10232
|
-
|
|
10139
|
+
normalizeTranscription(meta, transcript) {
|
|
10140
|
+
if (meta.status === TranscriptionStatus.error) {
|
|
10141
|
+
return {
|
|
10142
|
+
success: false,
|
|
10143
|
+
provider: this.name,
|
|
10144
|
+
data: {
|
|
10145
|
+
id: meta.id,
|
|
10146
|
+
text: "",
|
|
10147
|
+
status: "error"
|
|
10148
|
+
},
|
|
10149
|
+
error: {
|
|
10150
|
+
code: meta.error_type || "TRANSCRIPTION_ERROR",
|
|
10151
|
+
message: meta.error_message || "Transcription failed"
|
|
10152
|
+
},
|
|
10153
|
+
raw: { meta, transcript }
|
|
10154
|
+
};
|
|
10155
|
+
}
|
|
10156
|
+
if (!transcript) {
|
|
10157
|
+
return {
|
|
10158
|
+
success: true,
|
|
10159
|
+
provider: this.name,
|
|
10160
|
+
data: {
|
|
10161
|
+
id: meta.id,
|
|
10162
|
+
text: "",
|
|
10163
|
+
status: meta.status,
|
|
10164
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0
|
|
10165
|
+
},
|
|
10166
|
+
raw: { meta }
|
|
10167
|
+
};
|
|
10168
|
+
}
|
|
10169
|
+
const tokens = transcript.tokens || [];
|
|
10170
|
+
const text = transcript.text || tokens.map((t) => t.text).join("");
|
|
10171
|
+
const words = tokens.filter((t) => t.start_ms !== void 0 && t.end_ms !== void 0).map((token) => ({
|
|
10233
10172
|
word: token.text,
|
|
10234
10173
|
start: token.start_ms / 1e3,
|
|
10235
10174
|
end: token.end_ms / 1e3,
|
|
@@ -10237,33 +10176,32 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10237
10176
|
speaker: token.speaker ?? void 0
|
|
10238
10177
|
}));
|
|
10239
10178
|
const speakerSet = /* @__PURE__ */ new Set();
|
|
10240
|
-
|
|
10241
|
-
if (
|
|
10242
|
-
}
|
|
10179
|
+
tokens.forEach((t) => {
|
|
10180
|
+
if (t.speaker) speakerSet.add(String(t.speaker));
|
|
10181
|
+
});
|
|
10243
10182
|
const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
|
|
10244
10183
|
id,
|
|
10245
10184
|
label: `Speaker ${id}`
|
|
10246
10185
|
})) : void 0;
|
|
10247
|
-
const utterances =
|
|
10186
|
+
const utterances = this.buildUtterancesFromTokens(tokens);
|
|
10248
10187
|
const language = tokens.find((t) => t.language)?.language ?? void 0;
|
|
10249
10188
|
return {
|
|
10250
10189
|
success: true,
|
|
10251
10190
|
provider: this.name,
|
|
10252
10191
|
data: {
|
|
10253
|
-
id:
|
|
10192
|
+
id: meta.id,
|
|
10254
10193
|
text,
|
|
10255
10194
|
status: TranscriptionStatus.completed,
|
|
10256
10195
|
language,
|
|
10257
|
-
duration:
|
|
10196
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0,
|
|
10258
10197
|
speakers,
|
|
10259
10198
|
words: words.length > 0 ? words : void 0,
|
|
10260
10199
|
utterances: utterances.length > 0 ? utterances : void 0
|
|
10261
10200
|
},
|
|
10262
10201
|
tracking: {
|
|
10263
|
-
requestId:
|
|
10264
|
-
processingTimeMs: response.total_audio_proc_ms
|
|
10202
|
+
requestId: meta.id
|
|
10265
10203
|
},
|
|
10266
|
-
raw:
|
|
10204
|
+
raw: { meta, transcript }
|
|
10267
10205
|
};
|
|
10268
10206
|
}
|
|
10269
10207
|
};
|
|
@@ -10360,7 +10298,15 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10360
10298
|
/**
|
|
10361
10299
|
* Submit audio for transcription
|
|
10362
10300
|
*
|
|
10363
|
-
* ElevenLabs batch is synchronous
|
|
10301
|
+
* ElevenLabs batch is normally synchronous — the API returns results directly.
|
|
10302
|
+
*
|
|
10303
|
+
* **Webhook mode:** When `webhookUrl` is set (or `elevenlabs.webhook` is true),
|
|
10304
|
+
* the request is processed asynchronously. ElevenLabs returns a 202 with a
|
|
10305
|
+
* `request_id` and delivers results to a webhook configured in the ElevenLabs
|
|
10306
|
+
* dashboard. The unified `webhookUrl` acts as an intent flag to enable async
|
|
10307
|
+
* mode — the actual delivery destination must be pre-configured in your
|
|
10308
|
+
* ElevenLabs dashboard. Use `elevenlabs.webhook_id` to target a specific
|
|
10309
|
+
* webhook endpoint.
|
|
10364
10310
|
*/
|
|
10365
10311
|
async transcribe(audio, options) {
|
|
10366
10312
|
this.validateConfig();
|
|
@@ -10383,6 +10329,11 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10383
10329
|
}
|
|
10384
10330
|
};
|
|
10385
10331
|
}
|
|
10332
|
+
const elevenlabsOpts = options?.elevenlabs;
|
|
10333
|
+
const useWebhook = options?.webhookUrl || elevenlabsOpts?.webhook;
|
|
10334
|
+
if (useWebhook) {
|
|
10335
|
+
formData.append("webhook", "true");
|
|
10336
|
+
}
|
|
10386
10337
|
if (options?.language) {
|
|
10387
10338
|
formData.append("language_code", options.language);
|
|
10388
10339
|
}
|
|
@@ -10401,7 +10352,6 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10401
10352
|
if (options?.entityDetection) {
|
|
10402
10353
|
formData.append("entity_detection", "all");
|
|
10403
10354
|
}
|
|
10404
|
-
const elevenlabsOpts = options?.elevenlabs;
|
|
10405
10355
|
if (elevenlabsOpts) {
|
|
10406
10356
|
for (const [key, value] of Object.entries(elevenlabsOpts)) {
|
|
10407
10357
|
if (value === void 0 || value === null) continue;
|
|
@@ -10419,26 +10369,24 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10419
10369
|
}
|
|
10420
10370
|
}
|
|
10421
10371
|
}
|
|
10422
|
-
if (options?.webhookUrl) {
|
|
10423
|
-
if (!formData.has("webhook")) {
|
|
10424
|
-
formData.append("webhook", "true");
|
|
10425
|
-
}
|
|
10426
|
-
}
|
|
10427
10372
|
const response = await this.client.post("/v1/speech-to-text", formData, {
|
|
10428
10373
|
headers: {
|
|
10429
10374
|
"Content-Type": "multipart/form-data"
|
|
10430
10375
|
}
|
|
10431
10376
|
});
|
|
10432
|
-
if (
|
|
10433
|
-
const
|
|
10377
|
+
if (useWebhook) {
|
|
10378
|
+
const ack = response.data;
|
|
10434
10379
|
return {
|
|
10435
10380
|
success: true,
|
|
10436
10381
|
provider: this.name,
|
|
10437
10382
|
data: {
|
|
10438
|
-
id:
|
|
10383
|
+
id: ack.request_id || ack.transcription_id || `elevenlabs_${Date.now()}`,
|
|
10439
10384
|
text: "",
|
|
10440
10385
|
status: "queued"
|
|
10441
10386
|
},
|
|
10387
|
+
tracking: {
|
|
10388
|
+
requestId: ack.request_id
|
|
10389
|
+
},
|
|
10442
10390
|
raw: response.data
|
|
10443
10391
|
};
|
|
10444
10392
|
}
|
|
@@ -10534,20 +10482,9 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10534
10482
|
ws.onmessage = (event) => {
|
|
10535
10483
|
receivedData = true;
|
|
10536
10484
|
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
10537
|
-
let messageType;
|
|
10538
10485
|
try {
|
|
10539
10486
|
const data = JSON.parse(rawPayload);
|
|
10540
|
-
|
|
10541
|
-
messageType = "error";
|
|
10542
|
-
} else if (data.message_type === "session_started") {
|
|
10543
|
-
messageType = "session_started";
|
|
10544
|
-
} else if (data.message_type === "partial_transcript") {
|
|
10545
|
-
messageType = "partial_transcript";
|
|
10546
|
-
} else if (data.message_type === "committed_transcript") {
|
|
10547
|
-
messageType = "committed_transcript";
|
|
10548
|
-
} else if (data.message_type === "committed_transcript_with_timestamps") {
|
|
10549
|
-
messageType = "committed_transcript_with_timestamps";
|
|
10550
|
-
}
|
|
10487
|
+
const messageType = "error" in data ? "error" : data.message_type;
|
|
10551
10488
|
if (callbacks?.onRawMessage) {
|
|
10552
10489
|
callbacks.onRawMessage({
|
|
10553
10490
|
provider: this.name,
|
|
@@ -10557,50 +10494,62 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10557
10494
|
messageType
|
|
10558
10495
|
});
|
|
10559
10496
|
}
|
|
10560
|
-
if (data
|
|
10497
|
+
if ("error" in data) {
|
|
10561
10498
|
callbacks?.onError?.({
|
|
10562
|
-
code: data.
|
|
10499
|
+
code: data.message_type || "STREAM_ERROR",
|
|
10563
10500
|
message: data.error
|
|
10564
10501
|
});
|
|
10565
10502
|
return;
|
|
10566
10503
|
}
|
|
10567
|
-
|
|
10568
|
-
|
|
10569
|
-
|
|
10570
|
-
|
|
10571
|
-
|
|
10572
|
-
|
|
10573
|
-
|
|
10574
|
-
|
|
10575
|
-
|
|
10576
|
-
|
|
10577
|
-
|
|
10578
|
-
|
|
10579
|
-
|
|
10580
|
-
|
|
10581
|
-
|
|
10582
|
-
|
|
10583
|
-
|
|
10584
|
-
|
|
10585
|
-
|
|
10586
|
-
|
|
10587
|
-
|
|
10588
|
-
|
|
10589
|
-
|
|
10590
|
-
|
|
10591
|
-
|
|
10592
|
-
|
|
10593
|
-
|
|
10594
|
-
|
|
10595
|
-
|
|
10596
|
-
|
|
10597
|
-
|
|
10598
|
-
|
|
10599
|
-
|
|
10600
|
-
|
|
10601
|
-
|
|
10602
|
-
|
|
10504
|
+
switch (data.message_type) {
|
|
10505
|
+
case "session_started":
|
|
10506
|
+
break;
|
|
10507
|
+
case "partial_transcript": {
|
|
10508
|
+
const streamEvent = {
|
|
10509
|
+
type: "transcript",
|
|
10510
|
+
text: data.text || "",
|
|
10511
|
+
isFinal: false,
|
|
10512
|
+
confidence: void 0
|
|
10513
|
+
};
|
|
10514
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10515
|
+
break;
|
|
10516
|
+
}
|
|
10517
|
+
case "committed_transcript": {
|
|
10518
|
+
const streamEvent = {
|
|
10519
|
+
type: "transcript",
|
|
10520
|
+
text: data.text || "",
|
|
10521
|
+
isFinal: true,
|
|
10522
|
+
confidence: void 0
|
|
10523
|
+
};
|
|
10524
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10525
|
+
break;
|
|
10526
|
+
}
|
|
10527
|
+
case "committed_transcript_with_timestamps": {
|
|
10528
|
+
const tsData = data;
|
|
10529
|
+
const words = tsData.words ? tsData.words.map((w) => ({
|
|
10530
|
+
word: w.text || "",
|
|
10531
|
+
start: w.start || 0,
|
|
10532
|
+
end: w.end || 0,
|
|
10533
|
+
confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
|
|
10534
|
+
speaker: w.speaker_id
|
|
10535
|
+
})) : [];
|
|
10536
|
+
const streamEvent = {
|
|
10537
|
+
type: "transcript",
|
|
10538
|
+
text: tsData.text || "",
|
|
10539
|
+
isFinal: true,
|
|
10540
|
+
words: words.length > 0 ? words : void 0,
|
|
10541
|
+
speaker: words[0]?.speaker,
|
|
10542
|
+
language: tsData.language_code,
|
|
10543
|
+
confidence: void 0
|
|
10544
|
+
};
|
|
10545
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10546
|
+
if (options?.diarization && words.length > 0) {
|
|
10547
|
+
const utterances = buildUtterancesFromWords(words);
|
|
10548
|
+
for (const utterance of utterances) {
|
|
10549
|
+
callbacks?.onUtterance?.(utterance);
|
|
10550
|
+
}
|
|
10603
10551
|
}
|
|
10552
|
+
break;
|
|
10604
10553
|
}
|
|
10605
10554
|
}
|
|
10606
10555
|
} catch (error) {
|
|
@@ -10755,7 +10704,7 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10755
10704
|
}
|
|
10756
10705
|
}
|
|
10757
10706
|
}
|
|
10758
|
-
const transcriptionId =
|
|
10707
|
+
const transcriptionId = response.transcription_id || chunks[0]?.transcription_id || `elevenlabs_${Date.now()}`;
|
|
10759
10708
|
return {
|
|
10760
10709
|
success: true,
|
|
10761
10710
|
provider: this.name,
|
|
@@ -36673,12 +36622,10 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
|
|
|
36673
36622
|
var streaming_types_zod_exports = {};
|
|
36674
36623
|
__export(streaming_types_zod_exports, {
|
|
36675
36624
|
sonioxAudioFormatSchema: () => sonioxAudioFormatSchema,
|
|
36676
|
-
sonioxAutoDetectedAudioFormatSchema: () => sonioxAutoDetectedAudioFormatSchema,
|
|
36677
36625
|
sonioxContextGeneralItemSchema: () => sonioxContextGeneralItemSchema,
|
|
36678
36626
|
sonioxContextSchema: () => sonioxContextSchema,
|
|
36679
36627
|
sonioxErrorStatusSchema: () => sonioxErrorStatusSchema,
|
|
36680
36628
|
sonioxOneWayTranslationSchema: () => sonioxOneWayTranslationSchema,
|
|
36681
|
-
sonioxPcmAudioEncodingSchema: () => sonioxPcmAudioEncodingSchema,
|
|
36682
36629
|
sonioxRealtimeModelSchema: () => sonioxRealtimeModelSchema,
|
|
36683
36630
|
sonioxRecorderStateSchema: () => sonioxRecorderStateSchema,
|
|
36684
36631
|
sonioxStreamingResponseSchema: () => sonioxStreamingResponseSchema,
|
|
@@ -36692,7 +36639,7 @@ __export(streaming_types_zod_exports, {
|
|
|
36692
36639
|
streamingUpdateConfigParams: () => streamingUpdateConfigParams3
|
|
36693
36640
|
});
|
|
36694
36641
|
var import_zod11 = require("zod");
|
|
36695
|
-
var
|
|
36642
|
+
var sonioxAudioFormatSchema = import_zod11.z.enum([
|
|
36696
36643
|
"auto",
|
|
36697
36644
|
"aac",
|
|
36698
36645
|
"aiff",
|
|
@@ -36702,10 +36649,7 @@ var sonioxAutoDetectedAudioFormatSchema = import_zod11.z.enum([
|
|
|
36702
36649
|
"mp3",
|
|
36703
36650
|
"ogg",
|
|
36704
36651
|
"wav",
|
|
36705
|
-
"webm"
|
|
36706
|
-
]);
|
|
36707
|
-
var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
|
|
36708
|
-
// Signed PCM
|
|
36652
|
+
"webm",
|
|
36709
36653
|
"pcm_s8",
|
|
36710
36654
|
"pcm_s16le",
|
|
36711
36655
|
"pcm_s16be",
|
|
@@ -36713,7 +36657,6 @@ var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
|
|
|
36713
36657
|
"pcm_s24be",
|
|
36714
36658
|
"pcm_s32le",
|
|
36715
36659
|
"pcm_s32be",
|
|
36716
|
-
// Unsigned PCM
|
|
36717
36660
|
"pcm_u8",
|
|
36718
36661
|
"pcm_u16le",
|
|
36719
36662
|
"pcm_u16be",
|
|
@@ -36721,86 +36664,81 @@ var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
|
|
|
36721
36664
|
"pcm_u24be",
|
|
36722
36665
|
"pcm_u32le",
|
|
36723
36666
|
"pcm_u32be",
|
|
36724
|
-
// Float PCM
|
|
36725
36667
|
"pcm_f32le",
|
|
36726
36668
|
"pcm_f32be",
|
|
36727
36669
|
"pcm_f64le",
|
|
36728
36670
|
"pcm_f64be",
|
|
36729
|
-
// Companded
|
|
36730
36671
|
"mulaw",
|
|
36731
36672
|
"alaw"
|
|
36732
36673
|
]);
|
|
36733
|
-
var sonioxAudioFormatSchema = import_zod11.z.union([
|
|
36734
|
-
sonioxAutoDetectedAudioFormatSchema,
|
|
36735
|
-
sonioxPcmAudioEncodingSchema
|
|
36736
|
-
]);
|
|
36737
36674
|
var sonioxOneWayTranslationSchema = import_zod11.z.object({
|
|
36738
36675
|
type: import_zod11.z.literal("one_way"),
|
|
36739
|
-
target_language: import_zod11.z.string()
|
|
36676
|
+
target_language: import_zod11.z.string()
|
|
36740
36677
|
});
|
|
36741
36678
|
var sonioxTwoWayTranslationSchema = import_zod11.z.object({
|
|
36742
36679
|
type: import_zod11.z.literal("two_way"),
|
|
36743
|
-
language_a: import_zod11.z.string()
|
|
36744
|
-
language_b: import_zod11.z.string()
|
|
36680
|
+
language_a: import_zod11.z.string(),
|
|
36681
|
+
language_b: import_zod11.z.string()
|
|
36745
36682
|
});
|
|
36746
36683
|
var sonioxTranslationConfigSchema = import_zod11.z.union([
|
|
36747
36684
|
sonioxOneWayTranslationSchema,
|
|
36748
36685
|
sonioxTwoWayTranslationSchema
|
|
36749
36686
|
]);
|
|
36750
36687
|
var sonioxContextGeneralItemSchema = import_zod11.z.object({
|
|
36751
|
-
key: import_zod11.z.string()
|
|
36752
|
-
value: import_zod11.z.string()
|
|
36688
|
+
key: import_zod11.z.string(),
|
|
36689
|
+
value: import_zod11.z.string()
|
|
36753
36690
|
});
|
|
36754
36691
|
var sonioxTranslationTermSchema = import_zod11.z.object({
|
|
36755
|
-
source: import_zod11.z.string()
|
|
36756
|
-
target: import_zod11.z.string()
|
|
36692
|
+
source: import_zod11.z.string(),
|
|
36693
|
+
target: import_zod11.z.string()
|
|
36757
36694
|
});
|
|
36758
36695
|
var sonioxStructuredContextSchema = import_zod11.z.object({
|
|
36759
|
-
general: import_zod11.z.array(sonioxContextGeneralItemSchema).optional()
|
|
36760
|
-
text: import_zod11.z.string().optional()
|
|
36761
|
-
terms: import_zod11.z.array(import_zod11.z.string()).optional()
|
|
36762
|
-
translation_terms: import_zod11.z.array(sonioxTranslationTermSchema).optional()
|
|
36696
|
+
general: import_zod11.z.array(sonioxContextGeneralItemSchema).optional(),
|
|
36697
|
+
text: import_zod11.z.string().optional(),
|
|
36698
|
+
terms: import_zod11.z.array(import_zod11.z.string()).optional(),
|
|
36699
|
+
translation_terms: import_zod11.z.array(sonioxTranslationTermSchema).optional()
|
|
36763
36700
|
});
|
|
36764
36701
|
var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
|
|
36765
36702
|
var sonioxRealtimeModelSchema = import_zod11.z.enum([
|
|
36703
|
+
"stt-rt-v4",
|
|
36766
36704
|
"stt-rt-v3",
|
|
36767
36705
|
"stt-rt-preview",
|
|
36768
36706
|
"stt-rt-v3-preview",
|
|
36769
36707
|
"stt-rt-preview-v2"
|
|
36770
36708
|
]);
|
|
36771
36709
|
var streamingTranscriberParams3 = import_zod11.z.object({
|
|
36772
|
-
model: sonioxRealtimeModelSchema
|
|
36773
|
-
audioFormat: sonioxAudioFormatSchema.optional()
|
|
36774
|
-
sampleRate: import_zod11.z.number().optional()
|
|
36775
|
-
numChannels: import_zod11.z.number().
|
|
36776
|
-
languageHints: import_zod11.z.array(import_zod11.z.string()).optional()
|
|
36777
|
-
context: sonioxContextSchema.optional()
|
|
36778
|
-
enableSpeakerDiarization: import_zod11.z.boolean().optional()
|
|
36779
|
-
enableLanguageIdentification: import_zod11.z.boolean().optional()
|
|
36780
|
-
enableEndpointDetection: import_zod11.z.boolean().optional()
|
|
36781
|
-
translation: sonioxTranslationConfigSchema.optional()
|
|
36782
|
-
clientReferenceId: import_zod11.z.string().optional()
|
|
36783
|
-
});
|
|
36784
|
-
var sonioxTranslationStatusSchema = import_zod11.z.enum(["
|
|
36710
|
+
model: sonioxRealtimeModelSchema,
|
|
36711
|
+
audioFormat: sonioxAudioFormatSchema.optional(),
|
|
36712
|
+
sampleRate: import_zod11.z.number().optional(),
|
|
36713
|
+
numChannels: import_zod11.z.number().optional(),
|
|
36714
|
+
languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
|
|
36715
|
+
context: sonioxContextSchema.optional(),
|
|
36716
|
+
enableSpeakerDiarization: import_zod11.z.boolean().optional(),
|
|
36717
|
+
enableLanguageIdentification: import_zod11.z.boolean().optional(),
|
|
36718
|
+
enableEndpointDetection: import_zod11.z.boolean().optional(),
|
|
36719
|
+
translation: sonioxTranslationConfigSchema.optional(),
|
|
36720
|
+
clientReferenceId: import_zod11.z.string().optional()
|
|
36721
|
+
});
|
|
36722
|
+
var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
|
|
36785
36723
|
var sonioxTokenSchema = import_zod11.z.object({
|
|
36786
|
-
text: import_zod11.z.string()
|
|
36787
|
-
start_ms: import_zod11.z.number().optional()
|
|
36788
|
-
end_ms: import_zod11.z.number().optional()
|
|
36789
|
-
confidence: import_zod11.z.number()
|
|
36790
|
-
is_final: import_zod11.z.boolean()
|
|
36791
|
-
speaker: import_zod11.z.string().optional()
|
|
36792
|
-
|
|
36793
|
-
|
|
36794
|
-
|
|
36724
|
+
text: import_zod11.z.string(),
|
|
36725
|
+
start_ms: import_zod11.z.number().optional(),
|
|
36726
|
+
end_ms: import_zod11.z.number().optional(),
|
|
36727
|
+
confidence: import_zod11.z.number(),
|
|
36728
|
+
is_final: import_zod11.z.boolean(),
|
|
36729
|
+
speaker: import_zod11.z.string().optional(),
|
|
36730
|
+
translation_status: sonioxTranslationStatusSchema.optional(),
|
|
36731
|
+
language: import_zod11.z.string().optional(),
|
|
36732
|
+
source_language: import_zod11.z.string().optional()
|
|
36795
36733
|
});
|
|
36796
36734
|
var sonioxStreamingResponseSchema = import_zod11.z.object({
|
|
36797
|
-
text: import_zod11.z.string()
|
|
36798
|
-
tokens: import_zod11.z.array(sonioxTokenSchema)
|
|
36799
|
-
final_audio_proc_ms: import_zod11.z.number()
|
|
36800
|
-
total_audio_proc_ms: import_zod11.z.number()
|
|
36801
|
-
finished: import_zod11.z.boolean().optional()
|
|
36802
|
-
|
|
36803
|
-
|
|
36735
|
+
text: import_zod11.z.string(),
|
|
36736
|
+
tokens: import_zod11.z.array(sonioxTokenSchema),
|
|
36737
|
+
final_audio_proc_ms: import_zod11.z.number(),
|
|
36738
|
+
total_audio_proc_ms: import_zod11.z.number(),
|
|
36739
|
+
finished: import_zod11.z.boolean().optional(),
|
|
36740
|
+
error_code: import_zod11.z.number().optional(),
|
|
36741
|
+
error_message: import_zod11.z.string().optional()
|
|
36804
36742
|
});
|
|
36805
36743
|
var sonioxRecorderStateSchema = import_zod11.z.enum([
|
|
36806
36744
|
"Init",
|
|
@@ -37366,8 +37304,8 @@ var BatchOnlyProviders = AllProviders.filter(
|
|
|
37366
37304
|
);
|
|
37367
37305
|
|
|
37368
37306
|
// src/generated/deepgram/schema/index.ts
|
|
37369
|
-
var
|
|
37370
|
-
__export(
|
|
37307
|
+
var schema_exports5 = {};
|
|
37308
|
+
__export(schema_exports5, {
|
|
37371
37309
|
V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
|
|
37372
37310
|
V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
|
|
37373
37311
|
V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
|
|
@@ -37622,8 +37560,8 @@ var V1SpeakPostParametersSampleRate = {
|
|
|
37622
37560
|
};
|
|
37623
37561
|
|
|
37624
37562
|
// src/generated/openai/schema/index.ts
|
|
37625
|
-
var
|
|
37626
|
-
__export(
|
|
37563
|
+
var schema_exports6 = {};
|
|
37564
|
+
__export(schema_exports6, {
|
|
37627
37565
|
AudioResponseFormat: () => AudioResponseFormat,
|
|
37628
37566
|
CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
|
|
37629
37567
|
CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
|
|
@@ -37963,8 +37901,8 @@ var VoiceResourceObject = {
|
|
|
37963
37901
|
};
|
|
37964
37902
|
|
|
37965
37903
|
// src/generated/speechmatics/schema/index.ts
|
|
37966
|
-
var
|
|
37967
|
-
__export(
|
|
37904
|
+
var schema_exports7 = {};
|
|
37905
|
+
__export(schema_exports7, {
|
|
37968
37906
|
AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
|
|
37969
37907
|
ErrorResponseError: () => ErrorResponseError,
|
|
37970
37908
|
GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
|
|
@@ -38153,32 +38091,6 @@ var WrittenFormRecognitionResultType = {
|
|
|
38153
38091
|
word: "word"
|
|
38154
38092
|
};
|
|
38155
38093
|
|
|
38156
|
-
// src/generated/soniox/schema/index.ts
|
|
38157
|
-
var schema_exports7 = {};
|
|
38158
|
-
__export(schema_exports7, {
|
|
38159
|
-
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
38160
|
-
TranscriptionMode: () => TranscriptionMode,
|
|
38161
|
-
TranscriptionStatus: () => TranscriptionStatus,
|
|
38162
|
-
TranslationConfigType: () => TranslationConfigType
|
|
38163
|
-
});
|
|
38164
|
-
|
|
38165
|
-
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
38166
|
-
var TemporaryApiKeyUsageType = {
|
|
38167
|
-
transcribe_websocket: "transcribe_websocket"
|
|
38168
|
-
};
|
|
38169
|
-
|
|
38170
|
-
// src/generated/soniox/schema/transcriptionMode.ts
|
|
38171
|
-
var TranscriptionMode = {
|
|
38172
|
-
real_time: "real_time",
|
|
38173
|
-
async: "async"
|
|
38174
|
-
};
|
|
38175
|
-
|
|
38176
|
-
// src/generated/soniox/schema/translationConfigType.ts
|
|
38177
|
-
var TranslationConfigType = {
|
|
38178
|
-
one_way: "one_way",
|
|
38179
|
-
two_way: "two_way"
|
|
38180
|
-
};
|
|
38181
|
-
|
|
38182
38094
|
// src/generated/elevenlabs/schema/index.ts
|
|
38183
38095
|
var schema_exports8 = {};
|
|
38184
38096
|
__export(schema_exports8, {
|