voice-router-dev 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/constants.d.mts +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/{field-configs-DYiUtRUz.d.mts → field-configs-CH0lgAe8.d.mts} +5665 -5721
- package/dist/{field-configs-DYiUtRUz.d.ts → field-configs-CH0lgAe8.d.ts} +5665 -5721
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +42 -51
- package/dist/field-configs.mjs +42 -51
- package/dist/index.d.mts +921 -1270
- package/dist/index.d.ts +921 -1270
- package/dist/index.js +330 -707
- package/dist/index.mjs +330 -707
- package/dist/{speechToTextChunkResponseModel-CI-Aqxcr.d.ts → speechToTextChunkResponseModel-BY2lGyZ3.d.ts} +319 -1
- package/dist/{speechToTextChunkResponseModel-D8VJ-wz6.d.mts → speechToTextChunkResponseModel-KayxDiZ7.d.mts} +319 -1
- package/dist/webhooks.d.mts +1 -1
- package/dist/webhooks.d.ts +1 -1
- package/package.json +2 -1
package/dist/index.js
CHANGED
|
@@ -82,7 +82,7 @@ __export(src_exports, {
|
|
|
82
82
|
DeepgramTTSSampleRate: () => DeepgramTTSSampleRate,
|
|
83
83
|
DeepgramTopicMode: () => DeepgramTopicMode,
|
|
84
84
|
DeepgramTranscriptionSchema: () => DeepgramTranscriptionSchema,
|
|
85
|
-
DeepgramTypes: () =>
|
|
85
|
+
DeepgramTypes: () => schema_exports5,
|
|
86
86
|
DeepgramZodSchemas: () => deepgramAPI_zod_exports,
|
|
87
87
|
ElevenLabsAdapter: () => ElevenLabsAdapter,
|
|
88
88
|
ElevenLabsCapabilities: () => ElevenLabsCapabilities,
|
|
@@ -119,7 +119,7 @@ __export(src_exports, {
|
|
|
119
119
|
OpenAIResponseFormat: () => OpenAIResponseFormat,
|
|
120
120
|
OpenAIStreamingTypes: () => streaming_types_exports,
|
|
121
121
|
OpenAITranscriptionSchema: () => OpenAITranscriptionSchema,
|
|
122
|
-
OpenAITypes: () =>
|
|
122
|
+
OpenAITypes: () => schema_exports6,
|
|
123
123
|
OpenAIWhisperAdapter: () => OpenAIWhisperAdapter,
|
|
124
124
|
OpenAIZodSchemas: () => openAIAudioRealtimeAPI_zod_exports,
|
|
125
125
|
ProfanityFilterMode: () => ProfanityFilterMode,
|
|
@@ -148,7 +148,7 @@ __export(src_exports, {
|
|
|
148
148
|
SonioxStreamingUpdateSchema: () => SonioxStreamingUpdateSchema,
|
|
149
149
|
SonioxStreamingZodSchemas: () => streaming_types_zod_exports,
|
|
150
150
|
SonioxTranscriptionSchema: () => SonioxTranscriptionSchema,
|
|
151
|
-
SonioxTypes: () =>
|
|
151
|
+
SonioxTypes: () => schema_exports4,
|
|
152
152
|
SpeakV1ContainerParameter: () => SpeakV1ContainerParameter,
|
|
153
153
|
SpeakV1EncodingParameter: () => SpeakV1EncodingParameter,
|
|
154
154
|
SpeakV1SampleRateParameter: () => SpeakV1SampleRateParameter,
|
|
@@ -163,7 +163,7 @@ __export(src_exports, {
|
|
|
163
163
|
SpeechmaticsStreamingSchema: () => SpeechmaticsStreamingSchema,
|
|
164
164
|
SpeechmaticsStreamingUpdateSchema: () => SpeechmaticsStreamingUpdateSchema,
|
|
165
165
|
SpeechmaticsTranscriptionSchema: () => SpeechmaticsTranscriptionSchema,
|
|
166
|
-
SpeechmaticsTypes: () =>
|
|
166
|
+
SpeechmaticsTypes: () => schema_exports7,
|
|
167
167
|
SpeechmaticsZodSchemas: () => speechmaticsASRRESTAPI_zod_exports,
|
|
168
168
|
StreamingProviders: () => StreamingProviders,
|
|
169
169
|
StreamingSupportedBitDepthEnum: () => StreamingSupportedBitDepthEnum,
|
|
@@ -6064,23 +6064,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
6064
6064
|
"AssemblyAI adapter currently only supports URL-based audio input. Use audio.type='url'"
|
|
6065
6065
|
);
|
|
6066
6066
|
}
|
|
6067
|
-
const
|
|
6068
|
-
|
|
6069
|
-
|
|
6070
|
-
|
|
6071
|
-
|
|
6072
|
-
|
|
6067
|
+
const passthrough = options?.assemblyai;
|
|
6068
|
+
let speechModels;
|
|
6069
|
+
if (passthrough?.speech_model != null && !passthrough.speech_models) {
|
|
6070
|
+
speechModels = [passthrough.speech_model];
|
|
6071
|
+
} else if (passthrough?.speech_models) {
|
|
6072
|
+
speechModels = passthrough.speech_models;
|
|
6073
6073
|
}
|
|
6074
|
+
const { speech_model: _deprecated, ...typedOpts } = passthrough ?? {};
|
|
6074
6075
|
const request = {
|
|
6075
|
-
...
|
|
6076
|
+
...typedOpts,
|
|
6076
6077
|
audio_url: audioUrl,
|
|
6077
6078
|
// speech_models is required — default to universal-3-pro
|
|
6078
|
-
speech_models:
|
|
6079
|
-
"universal-3-pro"
|
|
6080
|
-
],
|
|
6079
|
+
speech_models: speechModels ?? ["universal-3-pro"],
|
|
6081
6080
|
// Enable punctuation and formatting by default
|
|
6082
|
-
punctuate:
|
|
6083
|
-
format_text:
|
|
6081
|
+
punctuate: typedOpts.punctuate ?? true,
|
|
6082
|
+
format_text: typedOpts.format_text ?? true
|
|
6084
6083
|
};
|
|
6085
6084
|
if (options) {
|
|
6086
6085
|
if (options.model) {
|
|
@@ -6128,22 +6127,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
6128
6127
|
normalizeResponse(response) {
|
|
6129
6128
|
let status;
|
|
6130
6129
|
switch (response.status) {
|
|
6131
|
-
case
|
|
6130
|
+
case "queued":
|
|
6132
6131
|
status = "queued";
|
|
6133
6132
|
break;
|
|
6134
|
-
case
|
|
6133
|
+
case "processing":
|
|
6135
6134
|
status = "processing";
|
|
6136
6135
|
break;
|
|
6137
|
-
case
|
|
6136
|
+
case "completed":
|
|
6138
6137
|
status = "completed";
|
|
6139
6138
|
break;
|
|
6140
|
-
case
|
|
6139
|
+
case "error":
|
|
6141
6140
|
status = "error";
|
|
6142
6141
|
break;
|
|
6143
6142
|
default:
|
|
6144
6143
|
status = "queued";
|
|
6145
6144
|
}
|
|
6146
|
-
if (response.status ===
|
|
6145
|
+
if (response.status === "error") {
|
|
6147
6146
|
return {
|
|
6148
6147
|
success: false,
|
|
6149
6148
|
provider: this.name,
|
|
@@ -6795,8 +6794,10 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6795
6794
|
/**
|
|
6796
6795
|
* Submit audio for transcription
|
|
6797
6796
|
*
|
|
6798
|
-
* Sends audio to Deepgram API for transcription. Deepgram processes
|
|
6799
|
-
* synchronously and returns results immediately
|
|
6797
|
+
* Sends audio to Deepgram API for transcription. Deepgram normally processes
|
|
6798
|
+
* synchronously and returns results immediately. When `webhookUrl` is set,
|
|
6799
|
+
* Deepgram can instead return an async callback acknowledgment containing a
|
|
6800
|
+
* request ID.
|
|
6800
6801
|
*
|
|
6801
6802
|
* @param audio - Audio input (URL or file buffer)
|
|
6802
6803
|
* @param options - Transcription options
|
|
@@ -6847,17 +6848,59 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6847
6848
|
{ params }
|
|
6848
6849
|
).then((res) => res.data);
|
|
6849
6850
|
} else if (audio.type === "file") {
|
|
6850
|
-
response = await this.client.post(
|
|
6851
|
-
|
|
6852
|
-
|
|
6853
|
-
|
|
6851
|
+
response = await this.client.post(
|
|
6852
|
+
"/listen",
|
|
6853
|
+
audio.file,
|
|
6854
|
+
{
|
|
6855
|
+
params,
|
|
6856
|
+
headers: {
|
|
6857
|
+
"Content-Type": "audio/*"
|
|
6858
|
+
}
|
|
6854
6859
|
}
|
|
6855
|
-
|
|
6860
|
+
).then((res) => res.data);
|
|
6856
6861
|
} else {
|
|
6857
6862
|
throw new Error(
|
|
6858
6863
|
"Deepgram adapter does not support stream type for pre-recorded transcription. Use transcribeStream() for real-time streaming."
|
|
6859
6864
|
);
|
|
6860
6865
|
}
|
|
6866
|
+
if (options?.webhookUrl) {
|
|
6867
|
+
const requestId = ("request_id" in response ? response.request_id : void 0) || ("metadata" in response ? response.metadata?.request_id : void 0);
|
|
6868
|
+
if (!requestId) {
|
|
6869
|
+
return {
|
|
6870
|
+
success: false,
|
|
6871
|
+
provider: this.name,
|
|
6872
|
+
error: {
|
|
6873
|
+
code: "MISSING_REQUEST_ID",
|
|
6874
|
+
message: "Deepgram callback mode did not return a request ID"
|
|
6875
|
+
},
|
|
6876
|
+
raw: response
|
|
6877
|
+
};
|
|
6878
|
+
}
|
|
6879
|
+
return {
|
|
6880
|
+
success: true,
|
|
6881
|
+
provider: this.name,
|
|
6882
|
+
data: {
|
|
6883
|
+
id: requestId,
|
|
6884
|
+
text: "",
|
|
6885
|
+
status: "queued"
|
|
6886
|
+
},
|
|
6887
|
+
tracking: {
|
|
6888
|
+
requestId
|
|
6889
|
+
},
|
|
6890
|
+
raw: response
|
|
6891
|
+
};
|
|
6892
|
+
}
|
|
6893
|
+
if (!("results" in response) || !("metadata" in response)) {
|
|
6894
|
+
return {
|
|
6895
|
+
success: false,
|
|
6896
|
+
provider: this.name,
|
|
6897
|
+
error: {
|
|
6898
|
+
code: "INVALID_RESPONSE",
|
|
6899
|
+
message: "Deepgram did not return a synchronous transcription payload"
|
|
6900
|
+
},
|
|
6901
|
+
raw: response
|
|
6902
|
+
};
|
|
6903
|
+
}
|
|
6861
6904
|
return this.normalizeResponse(response);
|
|
6862
6905
|
} catch (error) {
|
|
6863
6906
|
return this.createErrorResponse(error);
|
|
@@ -7518,7 +7561,8 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
7518
7561
|
break;
|
|
7519
7562
|
}
|
|
7520
7563
|
case "Metadata": {
|
|
7521
|
-
|
|
7564
|
+
const { type: _, ...metadata } = message;
|
|
7565
|
+
callbacks?.onMetadata?.(metadata);
|
|
7522
7566
|
break;
|
|
7523
7567
|
}
|
|
7524
7568
|
case "Error": {
|
|
@@ -7954,10 +7998,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
7954
7998
|
contentUrls: [audio.url],
|
|
7955
7999
|
properties: this.buildTranscriptionProperties(options)
|
|
7956
8000
|
};
|
|
7957
|
-
const response = await transcriptionsCreate(
|
|
7958
|
-
transcriptionRequest,
|
|
7959
|
-
this.getAxiosConfig()
|
|
7960
|
-
);
|
|
8001
|
+
const response = await transcriptionsCreate(transcriptionRequest, this.getAxiosConfig());
|
|
7961
8002
|
const transcription = response.data;
|
|
7962
8003
|
const transcriptId = transcription.self?.split("/").pop() || "";
|
|
7963
8004
|
return await this.pollForCompletion(transcriptId);
|
|
@@ -8497,7 +8538,6 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
8497
8538
|
const request = {
|
|
8498
8539
|
...options?.openai,
|
|
8499
8540
|
file: audioData,
|
|
8500
|
-
// Buffer/Blob both accepted at runtime; generated type expects Blob
|
|
8501
8541
|
model
|
|
8502
8542
|
};
|
|
8503
8543
|
if (options?.language) {
|
|
@@ -8517,11 +8557,7 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
8517
8557
|
request.response_format = OpenAIResponseFormat.json;
|
|
8518
8558
|
}
|
|
8519
8559
|
const response = await createTranscription(request, this.getAxiosConfig());
|
|
8520
|
-
return this.normalizeResponse(
|
|
8521
|
-
response.data,
|
|
8522
|
-
model,
|
|
8523
|
-
isDiarization
|
|
8524
|
-
);
|
|
8560
|
+
return this.normalizeResponse(response.data, model, isDiarization);
|
|
8525
8561
|
} catch (error) {
|
|
8526
8562
|
return this.createErrorResponse(error);
|
|
8527
8563
|
}
|
|
@@ -8928,7 +8964,6 @@ function createOpenAIWhisperAdapter(config) {
|
|
|
8928
8964
|
|
|
8929
8965
|
// src/adapters/speechmatics-adapter.ts
|
|
8930
8966
|
var import_axios8 = __toESM(require("axios"));
|
|
8931
|
-
var import_ws5 = __toESM(require("ws"));
|
|
8932
8967
|
|
|
8933
8968
|
// src/generated/speechmatics/schema/notificationConfigContentsItem.ts
|
|
8934
8969
|
var NotificationConfigContentsItem = {
|
|
@@ -8978,7 +9013,8 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8978
9013
|
super(...arguments);
|
|
8979
9014
|
this.name = "speechmatics";
|
|
8980
9015
|
this.capabilities = {
|
|
8981
|
-
streaming:
|
|
9016
|
+
streaming: false,
|
|
9017
|
+
// Batch only (streaming available via separate WebSocket API)
|
|
8982
9018
|
diarization: true,
|
|
8983
9019
|
wordTimestamps: true,
|
|
8984
9020
|
languageDetection: false,
|
|
@@ -9113,16 +9149,13 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9113
9149
|
jobConfig.fetch_data = {
|
|
9114
9150
|
url: audio.url
|
|
9115
9151
|
};
|
|
9116
|
-
|
|
9117
|
-
|
|
9118
|
-
requestBody = formData;
|
|
9119
|
-
headers = { "Content-Type": "multipart/form-data" };
|
|
9152
|
+
requestBody = { config: JSON.stringify(jobConfig) };
|
|
9153
|
+
headers = { "Content-Type": "application/json" };
|
|
9120
9154
|
} else if (audio.type === "file") {
|
|
9121
|
-
|
|
9122
|
-
|
|
9123
|
-
|
|
9124
|
-
|
|
9125
|
-
requestBody = formData;
|
|
9155
|
+
requestBody = {
|
|
9156
|
+
config: JSON.stringify(jobConfig),
|
|
9157
|
+
data_file: audio.file
|
|
9158
|
+
};
|
|
9126
9159
|
headers = { "Content-Type": "multipart/form-data" };
|
|
9127
9160
|
} else {
|
|
9128
9161
|
return {
|
|
@@ -9227,389 +9260,6 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9227
9260
|
throw error;
|
|
9228
9261
|
}
|
|
9229
9262
|
}
|
|
9230
|
-
/**
|
|
9231
|
-
* Build WebSocket URL for real-time streaming
|
|
9232
|
-
*
|
|
9233
|
-
* Note: Real-time API uses a different host from the batch API:
|
|
9234
|
-
* - Batch: {region}.asr.api.speechmatics.com
|
|
9235
|
-
* - Real-time: {region}.rt.speechmatics.com
|
|
9236
|
-
*
|
|
9237
|
-
* @param region - Regional endpoint identifier
|
|
9238
|
-
* @returns WebSocket URL for real-time API
|
|
9239
|
-
*/
|
|
9240
|
-
getRegionalWsUrl(region) {
|
|
9241
|
-
if (this.config?.wsBaseUrl) {
|
|
9242
|
-
return this.config.wsBaseUrl;
|
|
9243
|
-
}
|
|
9244
|
-
const rtRegionMap = {
|
|
9245
|
-
eu1: "eu",
|
|
9246
|
-
eu2: "eu",
|
|
9247
|
-
us1: "us",
|
|
9248
|
-
us2: "us",
|
|
9249
|
-
au1: "eu"
|
|
9250
|
-
// No AU RT endpoint — fall back to EU
|
|
9251
|
-
};
|
|
9252
|
-
const rtPrefix = rtRegionMap[region || ""] || "eu";
|
|
9253
|
-
return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
|
|
9254
|
-
}
|
|
9255
|
-
/**
|
|
9256
|
-
* Stream audio for real-time transcription via WebSocket
|
|
9257
|
-
*
|
|
9258
|
-
* Connects to Speechmatics' real-time API and sends audio chunks
|
|
9259
|
-
* for transcription with results returned via callbacks.
|
|
9260
|
-
*
|
|
9261
|
-
* @param options - Streaming configuration options
|
|
9262
|
-
* @param callbacks - Event callbacks for transcription results
|
|
9263
|
-
* @returns Promise that resolves with a StreamingSession
|
|
9264
|
-
*
|
|
9265
|
-
* @example Basic streaming
|
|
9266
|
-
* ```typescript
|
|
9267
|
-
* const session = await adapter.transcribeStream({
|
|
9268
|
-
* language: 'en',
|
|
9269
|
-
* speechmaticsStreaming: {
|
|
9270
|
-
* enablePartials: true,
|
|
9271
|
-
* operatingPoint: 'enhanced'
|
|
9272
|
-
* }
|
|
9273
|
-
* }, {
|
|
9274
|
-
* onTranscript: (event) => console.log(event.text),
|
|
9275
|
-
* onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
|
|
9276
|
-
* onError: (error) => console.error(error)
|
|
9277
|
-
* });
|
|
9278
|
-
*
|
|
9279
|
-
* await session.sendAudio({ data: audioBuffer });
|
|
9280
|
-
* await session.close();
|
|
9281
|
-
* ```
|
|
9282
|
-
*/
|
|
9283
|
-
async transcribeStream(options, callbacks) {
|
|
9284
|
-
this.validateConfig();
|
|
9285
|
-
const smOpts = options?.speechmaticsStreaming || {};
|
|
9286
|
-
const region = smOpts.region || this.config?.region;
|
|
9287
|
-
const wsUrl = this.getRegionalWsUrl(region);
|
|
9288
|
-
const ws = new import_ws5.default(wsUrl, {
|
|
9289
|
-
headers: {
|
|
9290
|
-
Authorization: `Bearer ${this.config.apiKey}`
|
|
9291
|
-
}
|
|
9292
|
-
});
|
|
9293
|
-
let sessionStatus = "connecting";
|
|
9294
|
-
const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
9295
|
-
let seqNo = 0;
|
|
9296
|
-
let utteranceResults = [];
|
|
9297
|
-
const sessionReady = new Promise((resolve, reject) => {
|
|
9298
|
-
const timeout = setTimeout(() => {
|
|
9299
|
-
reject(new Error("WebSocket connection timeout"));
|
|
9300
|
-
}, 1e4);
|
|
9301
|
-
let wsOpen = false;
|
|
9302
|
-
ws.once("error", (error) => {
|
|
9303
|
-
clearTimeout(timeout);
|
|
9304
|
-
reject(error);
|
|
9305
|
-
});
|
|
9306
|
-
ws.once("open", () => {
|
|
9307
|
-
wsOpen = true;
|
|
9308
|
-
const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
|
|
9309
|
-
const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
|
|
9310
|
-
const startMsg = {
|
|
9311
|
-
message: "StartRecognition",
|
|
9312
|
-
audio_format: {
|
|
9313
|
-
type: "raw",
|
|
9314
|
-
encoding,
|
|
9315
|
-
sample_rate: sampleRate
|
|
9316
|
-
},
|
|
9317
|
-
transcription_config: {
|
|
9318
|
-
language: smOpts.language || options?.language || "en",
|
|
9319
|
-
enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
|
|
9320
|
-
}
|
|
9321
|
-
};
|
|
9322
|
-
const txConfig = startMsg.transcription_config;
|
|
9323
|
-
if (smOpts.domain) txConfig.domain = smOpts.domain;
|
|
9324
|
-
if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
|
|
9325
|
-
if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
|
|
9326
|
-
if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
|
|
9327
|
-
if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
|
|
9328
|
-
if (smOpts.diarization === "speaker" || options?.diarization) {
|
|
9329
|
-
txConfig.diarization = "speaker";
|
|
9330
|
-
if (smOpts.maxSpeakers) {
|
|
9331
|
-
txConfig.speaker_diarization_config = {
|
|
9332
|
-
max_speakers: smOpts.maxSpeakers
|
|
9333
|
-
};
|
|
9334
|
-
} else if (options?.speakersExpected) {
|
|
9335
|
-
txConfig.speaker_diarization_config = {
|
|
9336
|
-
max_speakers: options.speakersExpected
|
|
9337
|
-
};
|
|
9338
|
-
}
|
|
9339
|
-
}
|
|
9340
|
-
if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
|
|
9341
|
-
txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
|
|
9342
|
-
content: word
|
|
9343
|
-
}));
|
|
9344
|
-
} else if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9345
|
-
txConfig.additional_vocab = options.customVocabulary.map((word) => ({
|
|
9346
|
-
content: word
|
|
9347
|
-
}));
|
|
9348
|
-
}
|
|
9349
|
-
if (smOpts.conversationConfig) {
|
|
9350
|
-
txConfig.conversation_config = {
|
|
9351
|
-
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9352
|
-
};
|
|
9353
|
-
}
|
|
9354
|
-
const startPayload = JSON.stringify(startMsg);
|
|
9355
|
-
if (callbacks?.onRawMessage) {
|
|
9356
|
-
callbacks.onRawMessage({
|
|
9357
|
-
provider: "speechmatics",
|
|
9358
|
-
direction: "outgoing",
|
|
9359
|
-
timestamp: Date.now(),
|
|
9360
|
-
payload: startPayload,
|
|
9361
|
-
messageType: "StartRecognition"
|
|
9362
|
-
});
|
|
9363
|
-
}
|
|
9364
|
-
ws.send(startPayload);
|
|
9365
|
-
});
|
|
9366
|
-
const onMessage = (data) => {
|
|
9367
|
-
const rawPayload = data.toString();
|
|
9368
|
-
try {
|
|
9369
|
-
const msg = JSON.parse(rawPayload);
|
|
9370
|
-
if (msg.message === "RecognitionStarted") {
|
|
9371
|
-
clearTimeout(timeout);
|
|
9372
|
-
ws.removeListener("message", onMessage);
|
|
9373
|
-
ws.emit("message", data);
|
|
9374
|
-
resolve();
|
|
9375
|
-
} else if (msg.message === "Error") {
|
|
9376
|
-
clearTimeout(timeout);
|
|
9377
|
-
ws.removeListener("message", onMessage);
|
|
9378
|
-
reject(new Error(msg.reason || "Recognition failed to start"));
|
|
9379
|
-
}
|
|
9380
|
-
} catch {
|
|
9381
|
-
}
|
|
9382
|
-
};
|
|
9383
|
-
ws.on("message", onMessage);
|
|
9384
|
-
});
|
|
9385
|
-
ws.on("message", (data) => {
|
|
9386
|
-
const rawPayload = data.toString();
|
|
9387
|
-
try {
|
|
9388
|
-
const message = JSON.parse(rawPayload);
|
|
9389
|
-
if (callbacks?.onRawMessage) {
|
|
9390
|
-
callbacks.onRawMessage({
|
|
9391
|
-
provider: "speechmatics",
|
|
9392
|
-
direction: "incoming",
|
|
9393
|
-
timestamp: Date.now(),
|
|
9394
|
-
payload: rawPayload,
|
|
9395
|
-
messageType: message.message
|
|
9396
|
-
});
|
|
9397
|
-
}
|
|
9398
|
-
this.handleStreamingMessage(message, callbacks, utteranceResults);
|
|
9399
|
-
} catch (error) {
|
|
9400
|
-
if (callbacks?.onRawMessage) {
|
|
9401
|
-
callbacks.onRawMessage({
|
|
9402
|
-
provider: "speechmatics",
|
|
9403
|
-
direction: "incoming",
|
|
9404
|
-
timestamp: Date.now(),
|
|
9405
|
-
payload: rawPayload,
|
|
9406
|
-
messageType: "parse_error"
|
|
9407
|
-
});
|
|
9408
|
-
}
|
|
9409
|
-
callbacks?.onError?.({
|
|
9410
|
-
code: "PARSE_ERROR",
|
|
9411
|
-
message: "Failed to parse WebSocket message",
|
|
9412
|
-
details: error
|
|
9413
|
-
});
|
|
9414
|
-
}
|
|
9415
|
-
});
|
|
9416
|
-
ws.on("error", (error) => {
|
|
9417
|
-
callbacks?.onError?.({
|
|
9418
|
-
code: "WEBSOCKET_ERROR",
|
|
9419
|
-
message: error.message,
|
|
9420
|
-
details: error
|
|
9421
|
-
});
|
|
9422
|
-
});
|
|
9423
|
-
ws.on("close", (code, reason) => {
|
|
9424
|
-
sessionStatus = "closed";
|
|
9425
|
-
callbacks?.onClose?.(code, reason.toString());
|
|
9426
|
-
});
|
|
9427
|
-
await sessionReady;
|
|
9428
|
-
sessionStatus = "open";
|
|
9429
|
-
callbacks?.onOpen?.();
|
|
9430
|
-
return {
|
|
9431
|
-
id: sessionId,
|
|
9432
|
-
provider: this.name,
|
|
9433
|
-
createdAt: /* @__PURE__ */ new Date(),
|
|
9434
|
-
getStatus: () => sessionStatus,
|
|
9435
|
-
sendAudio: async (chunk) => {
|
|
9436
|
-
if (sessionStatus !== "open") {
|
|
9437
|
-
throw new Error(`Cannot send audio: session is ${sessionStatus}`);
|
|
9438
|
-
}
|
|
9439
|
-
if (ws.readyState !== import_ws5.default.OPEN) {
|
|
9440
|
-
throw new Error("WebSocket is not open");
|
|
9441
|
-
}
|
|
9442
|
-
if (callbacks?.onRawMessage) {
|
|
9443
|
-
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9444
|
-
chunk.data.byteOffset,
|
|
9445
|
-
chunk.data.byteOffset + chunk.data.byteLength
|
|
9446
|
-
);
|
|
9447
|
-
callbacks.onRawMessage({
|
|
9448
|
-
provider: this.name,
|
|
9449
|
-
direction: "outgoing",
|
|
9450
|
-
timestamp: Date.now(),
|
|
9451
|
-
payload: audioPayload,
|
|
9452
|
-
messageType: "audio"
|
|
9453
|
-
});
|
|
9454
|
-
}
|
|
9455
|
-
ws.send(chunk.data);
|
|
9456
|
-
seqNo++;
|
|
9457
|
-
if (chunk.isLast) {
|
|
9458
|
-
const endMsg = JSON.stringify({
|
|
9459
|
-
message: "EndOfStream",
|
|
9460
|
-
last_seq_no: seqNo
|
|
9461
|
-
});
|
|
9462
|
-
if (callbacks?.onRawMessage) {
|
|
9463
|
-
callbacks.onRawMessage({
|
|
9464
|
-
provider: this.name,
|
|
9465
|
-
direction: "outgoing",
|
|
9466
|
-
timestamp: Date.now(),
|
|
9467
|
-
payload: endMsg,
|
|
9468
|
-
messageType: "EndOfStream"
|
|
9469
|
-
});
|
|
9470
|
-
}
|
|
9471
|
-
ws.send(endMsg);
|
|
9472
|
-
}
|
|
9473
|
-
},
|
|
9474
|
-
close: async () => {
|
|
9475
|
-
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
9476
|
-
return;
|
|
9477
|
-
}
|
|
9478
|
-
sessionStatus = "closing";
|
|
9479
|
-
if (ws.readyState === import_ws5.default.OPEN) {
|
|
9480
|
-
seqNo++;
|
|
9481
|
-
ws.send(
|
|
9482
|
-
JSON.stringify({
|
|
9483
|
-
message: "EndOfStream",
|
|
9484
|
-
last_seq_no: seqNo
|
|
9485
|
-
})
|
|
9486
|
-
);
|
|
9487
|
-
}
|
|
9488
|
-
return new Promise((resolve) => {
|
|
9489
|
-
const timeout = setTimeout(() => {
|
|
9490
|
-
ws.terminate();
|
|
9491
|
-
sessionStatus = "closed";
|
|
9492
|
-
resolve();
|
|
9493
|
-
}, 5e3);
|
|
9494
|
-
const onMsg = (data) => {
|
|
9495
|
-
try {
|
|
9496
|
-
const msg = JSON.parse(data.toString());
|
|
9497
|
-
if (msg.message === "EndOfTranscript") {
|
|
9498
|
-
ws.removeListener("message", onMsg);
|
|
9499
|
-
clearTimeout(timeout);
|
|
9500
|
-
ws.close();
|
|
9501
|
-
}
|
|
9502
|
-
} catch {
|
|
9503
|
-
}
|
|
9504
|
-
};
|
|
9505
|
-
ws.on("message", onMsg);
|
|
9506
|
-
ws.once("close", () => {
|
|
9507
|
-
clearTimeout(timeout);
|
|
9508
|
-
sessionStatus = "closed";
|
|
9509
|
-
resolve();
|
|
9510
|
-
});
|
|
9511
|
-
});
|
|
9512
|
-
}
|
|
9513
|
-
};
|
|
9514
|
-
}
|
|
9515
|
-
/**
|
|
9516
|
-
* Handle incoming Speechmatics real-time WebSocket messages
|
|
9517
|
-
*/
|
|
9518
|
-
handleStreamingMessage(message, callbacks, utteranceResults) {
|
|
9519
|
-
switch (message.message) {
|
|
9520
|
-
case "RecognitionStarted": {
|
|
9521
|
-
break;
|
|
9522
|
-
}
|
|
9523
|
-
case "AddPartialTranscript": {
|
|
9524
|
-
const results = message.results || [];
|
|
9525
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9526
|
-
if (text) {
|
|
9527
|
-
callbacks?.onTranscript?.({
|
|
9528
|
-
type: "transcript",
|
|
9529
|
-
text,
|
|
9530
|
-
isFinal: false,
|
|
9531
|
-
words: this.extractWordsFromResults(results),
|
|
9532
|
-
data: message
|
|
9533
|
-
});
|
|
9534
|
-
}
|
|
9535
|
-
break;
|
|
9536
|
-
}
|
|
9537
|
-
case "AddTranscript": {
|
|
9538
|
-
const results = message.results || [];
|
|
9539
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9540
|
-
if (utteranceResults) {
|
|
9541
|
-
utteranceResults.push(...results);
|
|
9542
|
-
}
|
|
9543
|
-
if (text) {
|
|
9544
|
-
callbacks?.onTranscript?.({
|
|
9545
|
-
type: "transcript",
|
|
9546
|
-
text,
|
|
9547
|
-
isFinal: true,
|
|
9548
|
-
words: this.extractWordsFromResults(results),
|
|
9549
|
-
data: message
|
|
9550
|
-
});
|
|
9551
|
-
}
|
|
9552
|
-
break;
|
|
9553
|
-
}
|
|
9554
|
-
case "EndOfUtterance": {
|
|
9555
|
-
if (utteranceResults && utteranceResults.length > 0) {
|
|
9556
|
-
const text = buildTextFromSpeechmaticsResults(utteranceResults);
|
|
9557
|
-
const words = this.extractWordsFromResults(utteranceResults);
|
|
9558
|
-
const utterances = buildUtterancesFromWords(words);
|
|
9559
|
-
if (utterances.length > 0) {
|
|
9560
|
-
for (const utt of utterances) {
|
|
9561
|
-
callbacks?.onUtterance?.(utt);
|
|
9562
|
-
}
|
|
9563
|
-
} else if (text) {
|
|
9564
|
-
callbacks?.onUtterance?.({
|
|
9565
|
-
text,
|
|
9566
|
-
start: words.length > 0 ? words[0].start : 0,
|
|
9567
|
-
end: words.length > 0 ? words[words.length - 1].end : 0,
|
|
9568
|
-
words
|
|
9569
|
-
});
|
|
9570
|
-
}
|
|
9571
|
-
utteranceResults.length = 0;
|
|
9572
|
-
}
|
|
9573
|
-
break;
|
|
9574
|
-
}
|
|
9575
|
-
case "AudioAdded": {
|
|
9576
|
-
break;
|
|
9577
|
-
}
|
|
9578
|
-
case "EndOfTranscript": {
|
|
9579
|
-
break;
|
|
9580
|
-
}
|
|
9581
|
-
case "Info":
|
|
9582
|
-
case "Warning": {
|
|
9583
|
-
callbacks?.onMetadata?.(message);
|
|
9584
|
-
break;
|
|
9585
|
-
}
|
|
9586
|
-
case "Error": {
|
|
9587
|
-
const errMsg = message;
|
|
9588
|
-
callbacks?.onError?.({
|
|
9589
|
-
code: errMsg.type || "SPEECHMATICS_ERROR",
|
|
9590
|
-
message: errMsg.reason || "Unknown error",
|
|
9591
|
-
details: message
|
|
9592
|
-
});
|
|
9593
|
-
break;
|
|
9594
|
-
}
|
|
9595
|
-
default: {
|
|
9596
|
-
callbacks?.onMetadata?.(message);
|
|
9597
|
-
break;
|
|
9598
|
-
}
|
|
9599
|
-
}
|
|
9600
|
-
}
|
|
9601
|
-
/**
|
|
9602
|
-
* Extract unified Word[] from Speechmatics recognition results
|
|
9603
|
-
*/
|
|
9604
|
-
extractWordsFromResults(results) {
|
|
9605
|
-
return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
|
|
9606
|
-
word: result.alternatives?.[0]?.content || "",
|
|
9607
|
-
start: result.start_time,
|
|
9608
|
-
end: result.end_time,
|
|
9609
|
-
confidence: result.alternatives?.[0]?.confidence,
|
|
9610
|
-
speaker: result.alternatives?.[0]?.speaker
|
|
9611
|
-
}));
|
|
9612
|
-
}
|
|
9613
9263
|
/**
|
|
9614
9264
|
* Normalize Speechmatics status to unified status
|
|
9615
9265
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9678,9 +9328,6 @@ function createSpeechmaticsAdapter(config) {
|
|
|
9678
9328
|
return adapter;
|
|
9679
9329
|
}
|
|
9680
9330
|
|
|
9681
|
-
// src/adapters/soniox-adapter.ts
|
|
9682
|
-
var import_axios9 = __toESM(require("axios"));
|
|
9683
|
-
|
|
9684
9331
|
// src/generated/soniox/schema/transcriptionStatus.ts
|
|
9685
9332
|
var TranscriptionStatus = {
|
|
9686
9333
|
queued: "queued",
|
|
@@ -9689,6 +9336,57 @@ var TranscriptionStatus = {
|
|
|
9689
9336
|
error: "error"
|
|
9690
9337
|
};
|
|
9691
9338
|
|
|
9339
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9340
|
+
var import_axios9 = __toESM(require("axios"));
|
|
9341
|
+
|
|
9342
|
+
// src/generated/soniox/schema/index.ts
|
|
9343
|
+
var schema_exports4 = {};
|
|
9344
|
+
__export(schema_exports4, {
|
|
9345
|
+
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
9346
|
+
TranscriptionMode: () => TranscriptionMode,
|
|
9347
|
+
TranscriptionStatus: () => TranscriptionStatus,
|
|
9348
|
+
TranslationConfigType: () => TranslationConfigType
|
|
9349
|
+
});
|
|
9350
|
+
|
|
9351
|
+
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
9352
|
+
var TemporaryApiKeyUsageType = {
|
|
9353
|
+
transcribe_websocket: "transcribe_websocket"
|
|
9354
|
+
};
|
|
9355
|
+
|
|
9356
|
+
// src/generated/soniox/schema/transcriptionMode.ts
|
|
9357
|
+
var TranscriptionMode = {
|
|
9358
|
+
real_time: "real_time",
|
|
9359
|
+
async: "async"
|
|
9360
|
+
};
|
|
9361
|
+
|
|
9362
|
+
// src/generated/soniox/schema/translationConfigType.ts
|
|
9363
|
+
var TranslationConfigType = {
|
|
9364
|
+
one_way: "one_way",
|
|
9365
|
+
two_way: "two_way"
|
|
9366
|
+
};
|
|
9367
|
+
|
|
9368
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9369
|
+
var uploadFile = (uploadFileBody2, options) => {
|
|
9370
|
+
const formData = new FormData();
|
|
9371
|
+
if (uploadFileBody2.client_reference_id !== void 0 && uploadFileBody2.client_reference_id !== null) {
|
|
9372
|
+
formData.append("client_reference_id", uploadFileBody2.client_reference_id);
|
|
9373
|
+
}
|
|
9374
|
+
formData.append("file", uploadFileBody2.file);
|
|
9375
|
+
return import_axios9.default.post("/v1/files", formData, options);
|
|
9376
|
+
};
|
|
9377
|
+
var createTranscription2 = (createTranscriptionPayload, options) => {
|
|
9378
|
+
return import_axios9.default.post("/v1/transcriptions", createTranscriptionPayload, options);
|
|
9379
|
+
};
|
|
9380
|
+
var getTranscription = (transcriptionId, options) => {
|
|
9381
|
+
return import_axios9.default.get(`/v1/transcriptions/${transcriptionId}`, options);
|
|
9382
|
+
};
|
|
9383
|
+
var getTranscriptionTranscript = (transcriptionId, options) => {
|
|
9384
|
+
return import_axios9.default.get(`/v1/transcriptions/${transcriptionId}/transcript`, options);
|
|
9385
|
+
};
|
|
9386
|
+
var getModels = (options) => {
|
|
9387
|
+
return import_axios9.default.get("/v1/models", options);
|
|
9388
|
+
};
|
|
9389
|
+
|
|
9692
9390
|
// src/adapters/soniox-adapter.ts
|
|
9693
9391
|
var SonioxAdapter = class extends BaseAdapter {
|
|
9694
9392
|
constructor() {
|
|
@@ -9743,11 +9441,17 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9743
9441
|
}
|
|
9744
9442
|
}
|
|
9745
9443
|
/**
|
|
9746
|
-
* Get the base URL for API requests
|
|
9444
|
+
* Get the base URL for API requests (no /v1 suffix — generated functions include /v1 in paths)
|
|
9747
9445
|
*/
|
|
9748
9446
|
get baseUrl() {
|
|
9749
9447
|
if (this.config?.baseUrl) return this.config.baseUrl;
|
|
9750
|
-
return `https://${this.getRegionalHost()}
|
|
9448
|
+
return `https://${this.getRegionalHost()}`;
|
|
9449
|
+
}
|
|
9450
|
+
/**
|
|
9451
|
+
* Build axios config with Soniox Bearer auth
|
|
9452
|
+
*/
|
|
9453
|
+
getAxiosConfig() {
|
|
9454
|
+
return super.getAxiosConfig("Authorization", (key) => `Bearer ${key}`);
|
|
9751
9455
|
}
|
|
9752
9456
|
initialize(config) {
|
|
9753
9457
|
super.initialize(config);
|
|
@@ -9757,15 +9461,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9757
9461
|
if (config.model) {
|
|
9758
9462
|
this.defaultModel = config.model;
|
|
9759
9463
|
}
|
|
9760
|
-
this.client = import_axios9.default.create({
|
|
9761
|
-
baseURL: this.baseUrl,
|
|
9762
|
-
timeout: config.timeout || 12e4,
|
|
9763
|
-
headers: {
|
|
9764
|
-
Authorization: `Bearer ${config.apiKey}`,
|
|
9765
|
-
"Content-Type": "application/json",
|
|
9766
|
-
...config.headers
|
|
9767
|
-
}
|
|
9768
|
-
});
|
|
9769
9464
|
}
|
|
9770
9465
|
/**
|
|
9771
9466
|
* Get current region
|
|
@@ -9795,23 +9490,12 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9795
9490
|
*/
|
|
9796
9491
|
setRegion(region) {
|
|
9797
9492
|
this.region = region;
|
|
9798
|
-
if (this.config?.apiKey) {
|
|
9799
|
-
this.client = import_axios9.default.create({
|
|
9800
|
-
baseURL: this.baseUrl,
|
|
9801
|
-
timeout: this.config.timeout || 12e4,
|
|
9802
|
-
headers: {
|
|
9803
|
-
Authorization: `Bearer ${this.config.apiKey}`,
|
|
9804
|
-
"Content-Type": "application/json",
|
|
9805
|
-
...this.config.headers
|
|
9806
|
-
}
|
|
9807
|
-
});
|
|
9808
|
-
}
|
|
9809
9493
|
}
|
|
9810
9494
|
/**
|
|
9811
9495
|
* Submit audio for transcription
|
|
9812
9496
|
*
|
|
9813
|
-
*
|
|
9814
|
-
*
|
|
9497
|
+
* Uses the async v1 API: createTranscription returns status `queued`,
|
|
9498
|
+
* then polls until completed (or returns immediately if webhook is set).
|
|
9815
9499
|
*
|
|
9816
9500
|
* @param audio - Audio input (URL or file)
|
|
9817
9501
|
* @param options - Transcription options
|
|
@@ -9820,21 +9504,44 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9820
9504
|
async transcribe(audio, options) {
|
|
9821
9505
|
this.validateConfig();
|
|
9822
9506
|
try {
|
|
9823
|
-
const
|
|
9824
|
-
|
|
9825
|
-
};
|
|
9826
|
-
if (audio.type === "url") {
|
|
9827
|
-
requestBody.audio_url = audio.url;
|
|
9828
|
-
} else if (audio.type === "file") {
|
|
9829
|
-
const formData = new FormData();
|
|
9507
|
+
const sonioxOpts = options?.soniox;
|
|
9508
|
+
if (audio.type === "file") {
|
|
9830
9509
|
const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
|
|
9831
|
-
|
|
9832
|
-
const
|
|
9833
|
-
|
|
9834
|
-
|
|
9835
|
-
|
|
9836
|
-
|
|
9837
|
-
|
|
9510
|
+
const uploadBody = { file: audioBlob };
|
|
9511
|
+
const fileResp = await uploadFile(uploadBody, this.getAxiosConfig());
|
|
9512
|
+
const payload = {
|
|
9513
|
+
...sonioxOpts,
|
|
9514
|
+
model: options?.model || this.defaultModel,
|
|
9515
|
+
file_id: fileResp.data.id,
|
|
9516
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9517
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9518
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9519
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9520
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9521
|
+
};
|
|
9522
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9523
|
+
const meta = createResp.data;
|
|
9524
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9525
|
+
return this.normalizeTranscription(meta);
|
|
9526
|
+
}
|
|
9527
|
+
return this.pollForCompletion(meta.id);
|
|
9528
|
+
} else if (audio.type === "url") {
|
|
9529
|
+
const payload = {
|
|
9530
|
+
...sonioxOpts,
|
|
9531
|
+
model: options?.model || this.defaultModel,
|
|
9532
|
+
audio_url: audio.url,
|
|
9533
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9534
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9535
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9536
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9537
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9538
|
+
};
|
|
9539
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9540
|
+
const meta = createResp.data;
|
|
9541
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9542
|
+
return this.normalizeTranscription(meta);
|
|
9543
|
+
}
|
|
9544
|
+
return this.pollForCompletion(meta.id);
|
|
9838
9545
|
} else {
|
|
9839
9546
|
return {
|
|
9840
9547
|
success: false,
|
|
@@ -9845,38 +9552,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9845
9552
|
}
|
|
9846
9553
|
};
|
|
9847
9554
|
}
|
|
9848
|
-
if (options?.language) {
|
|
9849
|
-
requestBody.language_hints = [options.language];
|
|
9850
|
-
}
|
|
9851
|
-
if (options?.diarization) {
|
|
9852
|
-
requestBody.enable_speaker_diarization = true;
|
|
9853
|
-
}
|
|
9854
|
-
if (options?.languageDetection) {
|
|
9855
|
-
requestBody.enable_language_identification = true;
|
|
9856
|
-
}
|
|
9857
|
-
if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9858
|
-
requestBody.context = {
|
|
9859
|
-
terms: options.customVocabulary
|
|
9860
|
-
};
|
|
9861
|
-
}
|
|
9862
|
-
if (options?.webhookUrl) {
|
|
9863
|
-
requestBody.webhook_url = options.webhookUrl;
|
|
9864
|
-
}
|
|
9865
|
-
const response = await this.client.post("/transcriptions", requestBody);
|
|
9866
|
-
const transcriptionId = response.data.id;
|
|
9867
|
-
if (options?.webhookUrl) {
|
|
9868
|
-
return {
|
|
9869
|
-
success: true,
|
|
9870
|
-
provider: this.name,
|
|
9871
|
-
data: {
|
|
9872
|
-
id: transcriptionId,
|
|
9873
|
-
text: "",
|
|
9874
|
-
status: "queued"
|
|
9875
|
-
},
|
|
9876
|
-
raw: response.data
|
|
9877
|
-
};
|
|
9878
|
-
}
|
|
9879
|
-
return await this.pollForCompletion(transcriptionId);
|
|
9880
9555
|
} catch (error) {
|
|
9881
9556
|
return this.createErrorResponse(error);
|
|
9882
9557
|
}
|
|
@@ -9884,9 +9559,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9884
9559
|
/**
|
|
9885
9560
|
* Get transcription result by ID
|
|
9886
9561
|
*
|
|
9887
|
-
*
|
|
9888
|
-
*
|
|
9889
|
-
* when completed.
|
|
9562
|
+
* Fetches transcription metadata and, if completed, the transcript text/tokens.
|
|
9563
|
+
* Used by pollForCompletion() for async polling.
|
|
9890
9564
|
*
|
|
9891
9565
|
* @param transcriptId - Transcript ID
|
|
9892
9566
|
* @returns Transcription response
|
|
@@ -9894,39 +9568,20 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9894
9568
|
async getTranscript(transcriptId) {
|
|
9895
9569
|
this.validateConfig();
|
|
9896
9570
|
try {
|
|
9897
|
-
const
|
|
9898
|
-
const
|
|
9899
|
-
if (
|
|
9900
|
-
|
|
9901
|
-
|
|
9902
|
-
|
|
9903
|
-
|
|
9904
|
-
|
|
9905
|
-
|
|
9906
|
-
|
|
9907
|
-
|
|
9908
|
-
|
|
9909
|
-
if (job.status !== "completed") {
|
|
9910
|
-
return {
|
|
9911
|
-
success: true,
|
|
9912
|
-
provider: this.name,
|
|
9913
|
-
data: {
|
|
9914
|
-
id: job.id,
|
|
9915
|
-
text: "",
|
|
9916
|
-
status: job.status
|
|
9917
|
-
},
|
|
9918
|
-
raw: job
|
|
9919
|
-
};
|
|
9571
|
+
const metaResp = await getTranscription(transcriptId, this.getAxiosConfig());
|
|
9572
|
+
const meta = metaResp.data;
|
|
9573
|
+
if (meta.status === TranscriptionStatus.completed) {
|
|
9574
|
+
try {
|
|
9575
|
+
const transcriptResp = await getTranscriptionTranscript(
|
|
9576
|
+
transcriptId,
|
|
9577
|
+
this.getAxiosConfig()
|
|
9578
|
+
);
|
|
9579
|
+
return this.normalizeTranscription(meta, transcriptResp.data);
|
|
9580
|
+
} catch (transcriptError) {
|
|
9581
|
+
return this.createErrorResponse(transcriptError);
|
|
9582
|
+
}
|
|
9920
9583
|
}
|
|
9921
|
-
|
|
9922
|
-
`/transcriptions/${transcriptId}/transcript`
|
|
9923
|
-
);
|
|
9924
|
-
return this.normalizeResponse({
|
|
9925
|
-
...transcriptResponse.data,
|
|
9926
|
-
// Carry over job metadata
|
|
9927
|
-
id: job.id,
|
|
9928
|
-
audio_duration_ms: job.audio_duration_ms
|
|
9929
|
-
});
|
|
9584
|
+
return this.normalizeTranscription(meta);
|
|
9930
9585
|
} catch (error) {
|
|
9931
9586
|
return this.createErrorResponse(error);
|
|
9932
9587
|
}
|
|
@@ -9946,51 +9601,50 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9946
9601
|
const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9947
9602
|
const createdAt = /* @__PURE__ */ new Date();
|
|
9948
9603
|
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
|
|
9949
|
-
const wsUrl = `${wsBase}/transcribe-websocket
|
|
9950
|
-
|
|
9951
|
-
const
|
|
9952
|
-
|
|
9953
|
-
|
|
9954
|
-
model: modelId
|
|
9955
|
-
};
|
|
9956
|
-
if (sonioxOpts?.audioFormat) {
|
|
9957
|
-
initMessage.audio_format = sonioxOpts.audioFormat;
|
|
9958
|
-
} else if (options?.encoding) {
|
|
9604
|
+
const wsUrl = new URL(`${wsBase}/transcribe-websocket`);
|
|
9605
|
+
wsUrl.searchParams.set("api_key", this.config.apiKey);
|
|
9606
|
+
const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-preview";
|
|
9607
|
+
wsUrl.searchParams.set("model", modelId);
|
|
9608
|
+
if (options?.encoding) {
|
|
9959
9609
|
const encodingMap = {
|
|
9960
9610
|
linear16: "pcm_s16le",
|
|
9961
9611
|
pcm: "pcm_s16le",
|
|
9962
9612
|
mulaw: "mulaw",
|
|
9963
9613
|
alaw: "alaw"
|
|
9964
9614
|
};
|
|
9965
|
-
|
|
9615
|
+
wsUrl.searchParams.set("audio_format", encodingMap[options.encoding] || options.encoding);
|
|
9966
9616
|
}
|
|
9967
|
-
if (
|
|
9968
|
-
|
|
9617
|
+
if (options?.sampleRate) {
|
|
9618
|
+
wsUrl.searchParams.set("sample_rate", options.sampleRate.toString());
|
|
9969
9619
|
}
|
|
9970
|
-
if (
|
|
9971
|
-
|
|
9620
|
+
if (options?.channels) {
|
|
9621
|
+
wsUrl.searchParams.set("num_channels", options.channels.toString());
|
|
9972
9622
|
}
|
|
9623
|
+
const sonioxOpts = options?.sonioxStreaming;
|
|
9973
9624
|
if (sonioxOpts) {
|
|
9974
9625
|
if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
|
|
9975
|
-
|
|
9626
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify(sonioxOpts.languageHints));
|
|
9976
9627
|
}
|
|
9977
9628
|
if (sonioxOpts.enableLanguageIdentification) {
|
|
9978
|
-
|
|
9629
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9979
9630
|
}
|
|
9980
9631
|
if (sonioxOpts.enableEndpointDetection) {
|
|
9981
|
-
|
|
9632
|
+
wsUrl.searchParams.set("enable_endpoint_detection", "true");
|
|
9982
9633
|
}
|
|
9983
9634
|
if (sonioxOpts.enableSpeakerDiarization) {
|
|
9984
|
-
|
|
9635
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
9985
9636
|
}
|
|
9986
9637
|
if (sonioxOpts.context) {
|
|
9987
|
-
|
|
9638
|
+
wsUrl.searchParams.set(
|
|
9639
|
+
"context",
|
|
9640
|
+
typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
|
|
9641
|
+
);
|
|
9988
9642
|
}
|
|
9989
9643
|
if (sonioxOpts.translation) {
|
|
9990
|
-
|
|
9644
|
+
wsUrl.searchParams.set("translation", JSON.stringify(sonioxOpts.translation));
|
|
9991
9645
|
}
|
|
9992
9646
|
if (sonioxOpts.clientReferenceId) {
|
|
9993
|
-
|
|
9647
|
+
wsUrl.searchParams.set("client_reference_id", sonioxOpts.clientReferenceId);
|
|
9994
9648
|
}
|
|
9995
9649
|
}
|
|
9996
9650
|
if (!sonioxOpts?.languageHints && options?.language) {
|
|
@@ -9999,33 +9653,24 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9999
9653
|
`[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
|
|
10000
9654
|
);
|
|
10001
9655
|
}
|
|
10002
|
-
|
|
9656
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify([options.language]));
|
|
10003
9657
|
}
|
|
10004
9658
|
if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
|
|
10005
|
-
|
|
9659
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
10006
9660
|
}
|
|
10007
9661
|
if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
|
|
10008
|
-
|
|
9662
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9663
|
+
}
|
|
9664
|
+
if (options?.interimResults !== false) {
|
|
10009
9665
|
}
|
|
10010
9666
|
let status = "connecting";
|
|
10011
9667
|
let openedAt = null;
|
|
10012
9668
|
let receivedData = false;
|
|
10013
9669
|
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : require("ws");
|
|
10014
|
-
const ws = new WebSocketImpl(wsUrl);
|
|
9670
|
+
const ws = new WebSocketImpl(wsUrl.toString());
|
|
10015
9671
|
ws.onopen = () => {
|
|
10016
|
-
openedAt = Date.now();
|
|
10017
|
-
const initPayload = JSON.stringify(initMessage);
|
|
10018
|
-
if (callbacks?.onRawMessage) {
|
|
10019
|
-
callbacks.onRawMessage({
|
|
10020
|
-
provider: this.name,
|
|
10021
|
-
direction: "outgoing",
|
|
10022
|
-
timestamp: Date.now(),
|
|
10023
|
-
payload: initPayload,
|
|
10024
|
-
messageType: "init"
|
|
10025
|
-
});
|
|
10026
|
-
}
|
|
10027
|
-
ws.send(initPayload);
|
|
10028
9672
|
status = "open";
|
|
9673
|
+
openedAt = Date.now();
|
|
10029
9674
|
callbacks?.onOpen?.();
|
|
10030
9675
|
};
|
|
10031
9676
|
ws.onmessage = (event) => {
|
|
@@ -10034,7 +9679,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10034
9679
|
let messageType;
|
|
10035
9680
|
try {
|
|
10036
9681
|
const data = JSON.parse(rawPayload);
|
|
10037
|
-
|
|
9682
|
+
const errorMessage = data.error_message || data.error;
|
|
9683
|
+
if (errorMessage) {
|
|
10038
9684
|
messageType = "error";
|
|
10039
9685
|
} else if (data.finished) {
|
|
10040
9686
|
messageType = "finished";
|
|
@@ -10050,10 +9696,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10050
9696
|
messageType
|
|
10051
9697
|
});
|
|
10052
9698
|
}
|
|
10053
|
-
if (
|
|
9699
|
+
if (errorMessage) {
|
|
10054
9700
|
callbacks?.onError?.({
|
|
10055
9701
|
code: data.error_code?.toString() || "STREAM_ERROR",
|
|
10056
|
-
message:
|
|
9702
|
+
message: errorMessage
|
|
10057
9703
|
});
|
|
10058
9704
|
return;
|
|
10059
9705
|
}
|
|
@@ -10067,7 +9713,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10067
9713
|
start: token.start_ms ? token.start_ms / 1e3 : 0,
|
|
10068
9714
|
end: token.end_ms ? token.end_ms / 1e3 : 0,
|
|
10069
9715
|
confidence: token.confidence,
|
|
10070
|
-
speaker: token.speaker
|
|
9716
|
+
speaker: token.speaker ?? void 0
|
|
10071
9717
|
}));
|
|
10072
9718
|
const text = data.text || data.tokens.map((t) => t.text).join("");
|
|
10073
9719
|
const isFinal = data.tokens.every((t) => t.is_final);
|
|
@@ -10076,8 +9722,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10076
9722
|
text,
|
|
10077
9723
|
isFinal,
|
|
10078
9724
|
words,
|
|
10079
|
-
speaker: data.tokens[0]?.speaker,
|
|
10080
|
-
language: data.tokens[0]?.language,
|
|
9725
|
+
speaker: data.tokens[0]?.speaker ?? void 0,
|
|
9726
|
+
language: data.tokens[0]?.language ?? void 0,
|
|
10081
9727
|
confidence: data.tokens[0]?.confidence
|
|
10082
9728
|
};
|
|
10083
9729
|
callbacks?.onTranscript?.(event2);
|
|
@@ -10104,10 +9750,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10104
9750
|
ws.onclose = (event) => {
|
|
10105
9751
|
status = "closed";
|
|
10106
9752
|
const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
|
|
10107
|
-
const
|
|
10108
|
-
if (
|
|
9753
|
+
const isImmediateClose = timeSinceOpen !== null && timeSinceOpen < 1e3 && !receivedData;
|
|
9754
|
+
if (isImmediateClose && event.code === 1e3) {
|
|
10109
9755
|
const errorMessage = [
|
|
10110
|
-
"Soniox closed connection
|
|
9756
|
+
"Soniox closed connection immediately after opening.",
|
|
10111
9757
|
`Current config: region=${this.region}, model=${modelId}`,
|
|
10112
9758
|
"Likely causes:",
|
|
10113
9759
|
" - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
|
|
@@ -10193,7 +9839,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10193
9839
|
async getModels() {
|
|
10194
9840
|
this.validateConfig();
|
|
10195
9841
|
try {
|
|
10196
|
-
const response = await this.
|
|
9842
|
+
const response = await getModels(this.getAxiosConfig());
|
|
10197
9843
|
return response.data.models || [];
|
|
10198
9844
|
} catch (error) {
|
|
10199
9845
|
console.error("Failed to fetch Soniox models:", error);
|
|
@@ -10225,11 +9871,44 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10225
9871
|
return buildUtterancesFromWords(words);
|
|
10226
9872
|
}
|
|
10227
9873
|
/**
|
|
10228
|
-
* Normalize
|
|
9874
|
+
* Normalize v1 API response to unified format
|
|
9875
|
+
*
|
|
9876
|
+
* @param meta - Transcription metadata from getTranscription/createTranscription
|
|
9877
|
+
* @param transcript - Transcript data (text/tokens), only present when status is completed
|
|
10229
9878
|
*/
|
|
10230
|
-
|
|
10231
|
-
|
|
10232
|
-
|
|
9879
|
+
normalizeTranscription(meta, transcript) {
|
|
9880
|
+
if (meta.status === TranscriptionStatus.error) {
|
|
9881
|
+
return {
|
|
9882
|
+
success: false,
|
|
9883
|
+
provider: this.name,
|
|
9884
|
+
data: {
|
|
9885
|
+
id: meta.id,
|
|
9886
|
+
text: "",
|
|
9887
|
+
status: "error"
|
|
9888
|
+
},
|
|
9889
|
+
error: {
|
|
9890
|
+
code: meta.error_type || "TRANSCRIPTION_ERROR",
|
|
9891
|
+
message: meta.error_message || "Transcription failed"
|
|
9892
|
+
},
|
|
9893
|
+
raw: { meta, transcript }
|
|
9894
|
+
};
|
|
9895
|
+
}
|
|
9896
|
+
if (!transcript) {
|
|
9897
|
+
return {
|
|
9898
|
+
success: true,
|
|
9899
|
+
provider: this.name,
|
|
9900
|
+
data: {
|
|
9901
|
+
id: meta.id,
|
|
9902
|
+
text: "",
|
|
9903
|
+
status: meta.status,
|
|
9904
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0
|
|
9905
|
+
},
|
|
9906
|
+
raw: { meta }
|
|
9907
|
+
};
|
|
9908
|
+
}
|
|
9909
|
+
const tokens = transcript.tokens || [];
|
|
9910
|
+
const text = transcript.text || tokens.map((t) => t.text).join("");
|
|
9911
|
+
const words = tokens.filter((t) => t.start_ms !== void 0 && t.end_ms !== void 0).map((token) => ({
|
|
10233
9912
|
word: token.text,
|
|
10234
9913
|
start: token.start_ms / 1e3,
|
|
10235
9914
|
end: token.end_ms / 1e3,
|
|
@@ -10237,33 +9916,32 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10237
9916
|
speaker: token.speaker ?? void 0
|
|
10238
9917
|
}));
|
|
10239
9918
|
const speakerSet = /* @__PURE__ */ new Set();
|
|
10240
|
-
|
|
10241
|
-
if (
|
|
10242
|
-
}
|
|
9919
|
+
tokens.forEach((t) => {
|
|
9920
|
+
if (t.speaker) speakerSet.add(String(t.speaker));
|
|
9921
|
+
});
|
|
10243
9922
|
const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
|
|
10244
9923
|
id,
|
|
10245
9924
|
label: `Speaker ${id}`
|
|
10246
9925
|
})) : void 0;
|
|
10247
|
-
const utterances =
|
|
9926
|
+
const utterances = this.buildUtterancesFromTokens(tokens);
|
|
10248
9927
|
const language = tokens.find((t) => t.language)?.language ?? void 0;
|
|
10249
9928
|
return {
|
|
10250
9929
|
success: true,
|
|
10251
9930
|
provider: this.name,
|
|
10252
9931
|
data: {
|
|
10253
|
-
id:
|
|
9932
|
+
id: meta.id,
|
|
10254
9933
|
text,
|
|
10255
9934
|
status: TranscriptionStatus.completed,
|
|
10256
9935
|
language,
|
|
10257
|
-
duration:
|
|
9936
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0,
|
|
10258
9937
|
speakers,
|
|
10259
9938
|
words: words.length > 0 ? words : void 0,
|
|
10260
9939
|
utterances: utterances.length > 0 ? utterances : void 0
|
|
10261
9940
|
},
|
|
10262
9941
|
tracking: {
|
|
10263
|
-
requestId:
|
|
10264
|
-
processingTimeMs: response.total_audio_proc_ms
|
|
9942
|
+
requestId: meta.id
|
|
10265
9943
|
},
|
|
10266
|
-
raw:
|
|
9944
|
+
raw: { meta, transcript }
|
|
10267
9945
|
};
|
|
10268
9946
|
}
|
|
10269
9947
|
};
|
|
@@ -10419,29 +10097,11 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10419
10097
|
}
|
|
10420
10098
|
}
|
|
10421
10099
|
}
|
|
10422
|
-
if (options?.webhookUrl) {
|
|
10423
|
-
if (!formData.has("webhook")) {
|
|
10424
|
-
formData.append("webhook", "true");
|
|
10425
|
-
}
|
|
10426
|
-
}
|
|
10427
10100
|
const response = await this.client.post("/v1/speech-to-text", formData, {
|
|
10428
10101
|
headers: {
|
|
10429
10102
|
"Content-Type": "multipart/form-data"
|
|
10430
10103
|
}
|
|
10431
10104
|
});
|
|
10432
|
-
if (options?.webhookUrl) {
|
|
10433
|
-
const transcriptionId = response.data.transcription_id || response.data.id || `elevenlabs_${Date.now()}`;
|
|
10434
|
-
return {
|
|
10435
|
-
success: true,
|
|
10436
|
-
provider: this.name,
|
|
10437
|
-
data: {
|
|
10438
|
-
id: transcriptionId,
|
|
10439
|
-
text: "",
|
|
10440
|
-
status: "queued"
|
|
10441
|
-
},
|
|
10442
|
-
raw: response.data
|
|
10443
|
-
};
|
|
10444
|
-
}
|
|
10445
10105
|
return this.normalizeResponse(response.data);
|
|
10446
10106
|
} catch (error) {
|
|
10447
10107
|
return this.createErrorResponse(error);
|
|
@@ -10755,7 +10415,7 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10755
10415
|
}
|
|
10756
10416
|
}
|
|
10757
10417
|
}
|
|
10758
|
-
const transcriptionId =
|
|
10418
|
+
const transcriptionId = response.transcription_id || chunks[0]?.transcription_id || `elevenlabs_${Date.now()}`;
|
|
10759
10419
|
return {
|
|
10760
10420
|
success: true,
|
|
10761
10421
|
provider: this.name,
|
|
@@ -36673,12 +36333,10 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
|
|
|
36673
36333
|
var streaming_types_zod_exports = {};
|
|
36674
36334
|
__export(streaming_types_zod_exports, {
|
|
36675
36335
|
sonioxAudioFormatSchema: () => sonioxAudioFormatSchema,
|
|
36676
|
-
sonioxAutoDetectedAudioFormatSchema: () => sonioxAutoDetectedAudioFormatSchema,
|
|
36677
36336
|
sonioxContextGeneralItemSchema: () => sonioxContextGeneralItemSchema,
|
|
36678
36337
|
sonioxContextSchema: () => sonioxContextSchema,
|
|
36679
36338
|
sonioxErrorStatusSchema: () => sonioxErrorStatusSchema,
|
|
36680
36339
|
sonioxOneWayTranslationSchema: () => sonioxOneWayTranslationSchema,
|
|
36681
|
-
sonioxPcmAudioEncodingSchema: () => sonioxPcmAudioEncodingSchema,
|
|
36682
36340
|
sonioxRealtimeModelSchema: () => sonioxRealtimeModelSchema,
|
|
36683
36341
|
sonioxRecorderStateSchema: () => sonioxRecorderStateSchema,
|
|
36684
36342
|
sonioxStreamingResponseSchema: () => sonioxStreamingResponseSchema,
|
|
@@ -36692,7 +36350,7 @@ __export(streaming_types_zod_exports, {
|
|
|
36692
36350
|
streamingUpdateConfigParams: () => streamingUpdateConfigParams3
|
|
36693
36351
|
});
|
|
36694
36352
|
var import_zod11 = require("zod");
|
|
36695
|
-
var
|
|
36353
|
+
var sonioxAudioFormatSchema = import_zod11.z.enum([
|
|
36696
36354
|
"auto",
|
|
36697
36355
|
"aac",
|
|
36698
36356
|
"aiff",
|
|
@@ -36702,10 +36360,7 @@ var sonioxAutoDetectedAudioFormatSchema = import_zod11.z.enum([
|
|
|
36702
36360
|
"mp3",
|
|
36703
36361
|
"ogg",
|
|
36704
36362
|
"wav",
|
|
36705
|
-
"webm"
|
|
36706
|
-
]);
|
|
36707
|
-
var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
|
|
36708
|
-
// Signed PCM
|
|
36363
|
+
"webm",
|
|
36709
36364
|
"pcm_s8",
|
|
36710
36365
|
"pcm_s16le",
|
|
36711
36366
|
"pcm_s16be",
|
|
@@ -36713,7 +36368,6 @@ var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
|
|
|
36713
36368
|
"pcm_s24be",
|
|
36714
36369
|
"pcm_s32le",
|
|
36715
36370
|
"pcm_s32be",
|
|
36716
|
-
// Unsigned PCM
|
|
36717
36371
|
"pcm_u8",
|
|
36718
36372
|
"pcm_u16le",
|
|
36719
36373
|
"pcm_u16be",
|
|
@@ -36721,86 +36375,81 @@ var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
|
|
|
36721
36375
|
"pcm_u24be",
|
|
36722
36376
|
"pcm_u32le",
|
|
36723
36377
|
"pcm_u32be",
|
|
36724
|
-
// Float PCM
|
|
36725
36378
|
"pcm_f32le",
|
|
36726
36379
|
"pcm_f32be",
|
|
36727
36380
|
"pcm_f64le",
|
|
36728
36381
|
"pcm_f64be",
|
|
36729
|
-
// Companded
|
|
36730
36382
|
"mulaw",
|
|
36731
36383
|
"alaw"
|
|
36732
36384
|
]);
|
|
36733
|
-
var sonioxAudioFormatSchema = import_zod11.z.union([
|
|
36734
|
-
sonioxAutoDetectedAudioFormatSchema,
|
|
36735
|
-
sonioxPcmAudioEncodingSchema
|
|
36736
|
-
]);
|
|
36737
36385
|
var sonioxOneWayTranslationSchema = import_zod11.z.object({
|
|
36738
36386
|
type: import_zod11.z.literal("one_way"),
|
|
36739
|
-
target_language: import_zod11.z.string()
|
|
36387
|
+
target_language: import_zod11.z.string()
|
|
36740
36388
|
});
|
|
36741
36389
|
var sonioxTwoWayTranslationSchema = import_zod11.z.object({
|
|
36742
36390
|
type: import_zod11.z.literal("two_way"),
|
|
36743
|
-
language_a: import_zod11.z.string()
|
|
36744
|
-
language_b: import_zod11.z.string()
|
|
36391
|
+
language_a: import_zod11.z.string(),
|
|
36392
|
+
language_b: import_zod11.z.string()
|
|
36745
36393
|
});
|
|
36746
36394
|
var sonioxTranslationConfigSchema = import_zod11.z.union([
|
|
36747
36395
|
sonioxOneWayTranslationSchema,
|
|
36748
36396
|
sonioxTwoWayTranslationSchema
|
|
36749
36397
|
]);
|
|
36750
36398
|
var sonioxContextGeneralItemSchema = import_zod11.z.object({
|
|
36751
|
-
key: import_zod11.z.string()
|
|
36752
|
-
value: import_zod11.z.string()
|
|
36399
|
+
key: import_zod11.z.string(),
|
|
36400
|
+
value: import_zod11.z.string()
|
|
36753
36401
|
});
|
|
36754
36402
|
var sonioxTranslationTermSchema = import_zod11.z.object({
|
|
36755
|
-
source: import_zod11.z.string()
|
|
36756
|
-
target: import_zod11.z.string()
|
|
36403
|
+
source: import_zod11.z.string(),
|
|
36404
|
+
target: import_zod11.z.string()
|
|
36757
36405
|
});
|
|
36758
36406
|
var sonioxStructuredContextSchema = import_zod11.z.object({
|
|
36759
|
-
general: import_zod11.z.array(sonioxContextGeneralItemSchema).optional()
|
|
36760
|
-
text: import_zod11.z.string().optional()
|
|
36761
|
-
terms: import_zod11.z.array(import_zod11.z.string()).optional()
|
|
36762
|
-
translation_terms: import_zod11.z.array(sonioxTranslationTermSchema).optional()
|
|
36407
|
+
general: import_zod11.z.array(sonioxContextGeneralItemSchema).optional(),
|
|
36408
|
+
text: import_zod11.z.string().optional(),
|
|
36409
|
+
terms: import_zod11.z.array(import_zod11.z.string()).optional(),
|
|
36410
|
+
translation_terms: import_zod11.z.array(sonioxTranslationTermSchema).optional()
|
|
36763
36411
|
});
|
|
36764
36412
|
var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
|
|
36765
36413
|
var sonioxRealtimeModelSchema = import_zod11.z.enum([
|
|
36414
|
+
"stt-rt-v4",
|
|
36766
36415
|
"stt-rt-v3",
|
|
36767
36416
|
"stt-rt-preview",
|
|
36768
36417
|
"stt-rt-v3-preview",
|
|
36769
36418
|
"stt-rt-preview-v2"
|
|
36770
36419
|
]);
|
|
36771
36420
|
var streamingTranscriberParams3 = import_zod11.z.object({
|
|
36772
|
-
model: sonioxRealtimeModelSchema
|
|
36773
|
-
audioFormat: sonioxAudioFormatSchema.optional()
|
|
36774
|
-
sampleRate: import_zod11.z.number().optional()
|
|
36775
|
-
numChannels: import_zod11.z.number().
|
|
36776
|
-
languageHints: import_zod11.z.array(import_zod11.z.string()).optional()
|
|
36777
|
-
context: sonioxContextSchema.optional()
|
|
36778
|
-
enableSpeakerDiarization: import_zod11.z.boolean().optional()
|
|
36779
|
-
enableLanguageIdentification: import_zod11.z.boolean().optional()
|
|
36780
|
-
enableEndpointDetection: import_zod11.z.boolean().optional()
|
|
36781
|
-
translation: sonioxTranslationConfigSchema.optional()
|
|
36782
|
-
clientReferenceId: import_zod11.z.string().optional()
|
|
36783
|
-
});
|
|
36784
|
-
var sonioxTranslationStatusSchema = import_zod11.z.enum(["
|
|
36421
|
+
model: sonioxRealtimeModelSchema,
|
|
36422
|
+
audioFormat: sonioxAudioFormatSchema.optional(),
|
|
36423
|
+
sampleRate: import_zod11.z.number().optional(),
|
|
36424
|
+
numChannels: import_zod11.z.number().optional(),
|
|
36425
|
+
languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
|
|
36426
|
+
context: sonioxContextSchema.optional(),
|
|
36427
|
+
enableSpeakerDiarization: import_zod11.z.boolean().optional(),
|
|
36428
|
+
enableLanguageIdentification: import_zod11.z.boolean().optional(),
|
|
36429
|
+
enableEndpointDetection: import_zod11.z.boolean().optional(),
|
|
36430
|
+
translation: sonioxTranslationConfigSchema.optional(),
|
|
36431
|
+
clientReferenceId: import_zod11.z.string().optional()
|
|
36432
|
+
});
|
|
36433
|
+
var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
|
|
36785
36434
|
var sonioxTokenSchema = import_zod11.z.object({
|
|
36786
|
-
text: import_zod11.z.string()
|
|
36787
|
-
start_ms: import_zod11.z.number().optional()
|
|
36788
|
-
end_ms: import_zod11.z.number().optional()
|
|
36789
|
-
confidence: import_zod11.z.number()
|
|
36790
|
-
is_final: import_zod11.z.boolean()
|
|
36791
|
-
speaker: import_zod11.z.string().optional()
|
|
36792
|
-
|
|
36793
|
-
|
|
36794
|
-
|
|
36435
|
+
text: import_zod11.z.string(),
|
|
36436
|
+
start_ms: import_zod11.z.number().optional(),
|
|
36437
|
+
end_ms: import_zod11.z.number().optional(),
|
|
36438
|
+
confidence: import_zod11.z.number(),
|
|
36439
|
+
is_final: import_zod11.z.boolean(),
|
|
36440
|
+
speaker: import_zod11.z.string().optional(),
|
|
36441
|
+
translation_status: sonioxTranslationStatusSchema.optional(),
|
|
36442
|
+
language: import_zod11.z.string().optional(),
|
|
36443
|
+
source_language: import_zod11.z.string().optional()
|
|
36795
36444
|
});
|
|
36796
36445
|
var sonioxStreamingResponseSchema = import_zod11.z.object({
|
|
36797
|
-
text: import_zod11.z.string()
|
|
36798
|
-
tokens: import_zod11.z.array(sonioxTokenSchema)
|
|
36799
|
-
final_audio_proc_ms: import_zod11.z.number()
|
|
36800
|
-
total_audio_proc_ms: import_zod11.z.number()
|
|
36801
|
-
finished: import_zod11.z.boolean().optional()
|
|
36802
|
-
|
|
36803
|
-
|
|
36446
|
+
text: import_zod11.z.string(),
|
|
36447
|
+
tokens: import_zod11.z.array(sonioxTokenSchema),
|
|
36448
|
+
final_audio_proc_ms: import_zod11.z.number(),
|
|
36449
|
+
total_audio_proc_ms: import_zod11.z.number(),
|
|
36450
|
+
finished: import_zod11.z.boolean().optional(),
|
|
36451
|
+
error_code: import_zod11.z.number().optional(),
|
|
36452
|
+
error_message: import_zod11.z.string().optional()
|
|
36804
36453
|
});
|
|
36805
36454
|
var sonioxRecorderStateSchema = import_zod11.z.enum([
|
|
36806
36455
|
"Init",
|
|
@@ -37366,8 +37015,8 @@ var BatchOnlyProviders = AllProviders.filter(
|
|
|
37366
37015
|
);
|
|
37367
37016
|
|
|
37368
37017
|
// src/generated/deepgram/schema/index.ts
|
|
37369
|
-
var
|
|
37370
|
-
__export(
|
|
37018
|
+
var schema_exports5 = {};
|
|
37019
|
+
__export(schema_exports5, {
|
|
37371
37020
|
V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
|
|
37372
37021
|
V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
|
|
37373
37022
|
V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
|
|
@@ -37622,8 +37271,8 @@ var V1SpeakPostParametersSampleRate = {
|
|
|
37622
37271
|
};
|
|
37623
37272
|
|
|
37624
37273
|
// src/generated/openai/schema/index.ts
|
|
37625
|
-
var
|
|
37626
|
-
__export(
|
|
37274
|
+
var schema_exports6 = {};
|
|
37275
|
+
__export(schema_exports6, {
|
|
37627
37276
|
AudioResponseFormat: () => AudioResponseFormat,
|
|
37628
37277
|
CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
|
|
37629
37278
|
CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
|
|
@@ -37963,8 +37612,8 @@ var VoiceResourceObject = {
|
|
|
37963
37612
|
};
|
|
37964
37613
|
|
|
37965
37614
|
// src/generated/speechmatics/schema/index.ts
|
|
37966
|
-
var
|
|
37967
|
-
__export(
|
|
37615
|
+
var schema_exports7 = {};
|
|
37616
|
+
__export(schema_exports7, {
|
|
37968
37617
|
AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
|
|
37969
37618
|
ErrorResponseError: () => ErrorResponseError,
|
|
37970
37619
|
GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
|
|
@@ -38153,32 +37802,6 @@ var WrittenFormRecognitionResultType = {
|
|
|
38153
37802
|
word: "word"
|
|
38154
37803
|
};
|
|
38155
37804
|
|
|
38156
|
-
// src/generated/soniox/schema/index.ts
|
|
38157
|
-
var schema_exports7 = {};
|
|
38158
|
-
__export(schema_exports7, {
|
|
38159
|
-
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
38160
|
-
TranscriptionMode: () => TranscriptionMode,
|
|
38161
|
-
TranscriptionStatus: () => TranscriptionStatus,
|
|
38162
|
-
TranslationConfigType: () => TranslationConfigType
|
|
38163
|
-
});
|
|
38164
|
-
|
|
38165
|
-
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
38166
|
-
var TemporaryApiKeyUsageType = {
|
|
38167
|
-
transcribe_websocket: "transcribe_websocket"
|
|
38168
|
-
};
|
|
38169
|
-
|
|
38170
|
-
// src/generated/soniox/schema/transcriptionMode.ts
|
|
38171
|
-
var TranscriptionMode = {
|
|
38172
|
-
real_time: "real_time",
|
|
38173
|
-
async: "async"
|
|
38174
|
-
};
|
|
38175
|
-
|
|
38176
|
-
// src/generated/soniox/schema/translationConfigType.ts
|
|
38177
|
-
var TranslationConfigType = {
|
|
38178
|
-
one_way: "one_way",
|
|
38179
|
-
two_way: "two_way"
|
|
38180
|
-
};
|
|
38181
|
-
|
|
38182
37805
|
// src/generated/elevenlabs/schema/index.ts
|
|
38183
37806
|
var schema_exports8 = {};
|
|
38184
37807
|
__export(schema_exports8, {
|