voice-router-dev 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/constants.d.mts +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/{field-configs-DYiUtRUz.d.mts → field-configs-CH0lgAe8.d.mts} +5665 -5721
- package/dist/{field-configs-DYiUtRUz.d.ts → field-configs-CH0lgAe8.d.ts} +5665 -5721
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +42 -51
- package/dist/field-configs.mjs +42 -51
- package/dist/index.d.mts +921 -1270
- package/dist/index.d.ts +921 -1270
- package/dist/index.js +330 -707
- package/dist/index.mjs +330 -707
- package/dist/{speechToTextChunkResponseModel-CI-Aqxcr.d.ts → speechToTextChunkResponseModel-BY2lGyZ3.d.ts} +319 -1
- package/dist/{speechToTextChunkResponseModel-D8VJ-wz6.d.mts → speechToTextChunkResponseModel-KayxDiZ7.d.mts} +319 -1
- package/dist/webhooks.d.mts +1 -1
- package/dist/webhooks.d.ts +1 -1
- package/package.json +2 -1
package/dist/index.mjs
CHANGED
|
@@ -5835,23 +5835,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
5835
5835
|
"AssemblyAI adapter currently only supports URL-based audio input. Use audio.type='url'"
|
|
5836
5836
|
);
|
|
5837
5837
|
}
|
|
5838
|
-
const
|
|
5839
|
-
|
|
5840
|
-
|
|
5841
|
-
|
|
5842
|
-
|
|
5843
|
-
|
|
5838
|
+
const passthrough = options?.assemblyai;
|
|
5839
|
+
let speechModels;
|
|
5840
|
+
if (passthrough?.speech_model != null && !passthrough.speech_models) {
|
|
5841
|
+
speechModels = [passthrough.speech_model];
|
|
5842
|
+
} else if (passthrough?.speech_models) {
|
|
5843
|
+
speechModels = passthrough.speech_models;
|
|
5844
5844
|
}
|
|
5845
|
+
const { speech_model: _deprecated, ...typedOpts } = passthrough ?? {};
|
|
5845
5846
|
const request = {
|
|
5846
|
-
...
|
|
5847
|
+
...typedOpts,
|
|
5847
5848
|
audio_url: audioUrl,
|
|
5848
5849
|
// speech_models is required — default to universal-3-pro
|
|
5849
|
-
speech_models:
|
|
5850
|
-
"universal-3-pro"
|
|
5851
|
-
],
|
|
5850
|
+
speech_models: speechModels ?? ["universal-3-pro"],
|
|
5852
5851
|
// Enable punctuation and formatting by default
|
|
5853
|
-
punctuate:
|
|
5854
|
-
format_text:
|
|
5852
|
+
punctuate: typedOpts.punctuate ?? true,
|
|
5853
|
+
format_text: typedOpts.format_text ?? true
|
|
5855
5854
|
};
|
|
5856
5855
|
if (options) {
|
|
5857
5856
|
if (options.model) {
|
|
@@ -5899,22 +5898,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
5899
5898
|
normalizeResponse(response) {
|
|
5900
5899
|
let status;
|
|
5901
5900
|
switch (response.status) {
|
|
5902
|
-
case
|
|
5901
|
+
case "queued":
|
|
5903
5902
|
status = "queued";
|
|
5904
5903
|
break;
|
|
5905
|
-
case
|
|
5904
|
+
case "processing":
|
|
5906
5905
|
status = "processing";
|
|
5907
5906
|
break;
|
|
5908
|
-
case
|
|
5907
|
+
case "completed":
|
|
5909
5908
|
status = "completed";
|
|
5910
5909
|
break;
|
|
5911
|
-
case
|
|
5910
|
+
case "error":
|
|
5912
5911
|
status = "error";
|
|
5913
5912
|
break;
|
|
5914
5913
|
default:
|
|
5915
5914
|
status = "queued";
|
|
5916
5915
|
}
|
|
5917
|
-
if (response.status ===
|
|
5916
|
+
if (response.status === "error") {
|
|
5918
5917
|
return {
|
|
5919
5918
|
success: false,
|
|
5920
5919
|
provider: this.name,
|
|
@@ -6566,8 +6565,10 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6566
6565
|
/**
|
|
6567
6566
|
* Submit audio for transcription
|
|
6568
6567
|
*
|
|
6569
|
-
* Sends audio to Deepgram API for transcription. Deepgram processes
|
|
6570
|
-
* synchronously and returns results immediately
|
|
6568
|
+
* Sends audio to Deepgram API for transcription. Deepgram normally processes
|
|
6569
|
+
* synchronously and returns results immediately. When `webhookUrl` is set,
|
|
6570
|
+
* Deepgram can instead return an async callback acknowledgment containing a
|
|
6571
|
+
* request ID.
|
|
6571
6572
|
*
|
|
6572
6573
|
* @param audio - Audio input (URL or file buffer)
|
|
6573
6574
|
* @param options - Transcription options
|
|
@@ -6618,17 +6619,59 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6618
6619
|
{ params }
|
|
6619
6620
|
).then((res) => res.data);
|
|
6620
6621
|
} else if (audio.type === "file") {
|
|
6621
|
-
response = await this.client.post(
|
|
6622
|
-
|
|
6623
|
-
|
|
6624
|
-
|
|
6622
|
+
response = await this.client.post(
|
|
6623
|
+
"/listen",
|
|
6624
|
+
audio.file,
|
|
6625
|
+
{
|
|
6626
|
+
params,
|
|
6627
|
+
headers: {
|
|
6628
|
+
"Content-Type": "audio/*"
|
|
6629
|
+
}
|
|
6625
6630
|
}
|
|
6626
|
-
|
|
6631
|
+
).then((res) => res.data);
|
|
6627
6632
|
} else {
|
|
6628
6633
|
throw new Error(
|
|
6629
6634
|
"Deepgram adapter does not support stream type for pre-recorded transcription. Use transcribeStream() for real-time streaming."
|
|
6630
6635
|
);
|
|
6631
6636
|
}
|
|
6637
|
+
if (options?.webhookUrl) {
|
|
6638
|
+
const requestId = ("request_id" in response ? response.request_id : void 0) || ("metadata" in response ? response.metadata?.request_id : void 0);
|
|
6639
|
+
if (!requestId) {
|
|
6640
|
+
return {
|
|
6641
|
+
success: false,
|
|
6642
|
+
provider: this.name,
|
|
6643
|
+
error: {
|
|
6644
|
+
code: "MISSING_REQUEST_ID",
|
|
6645
|
+
message: "Deepgram callback mode did not return a request ID"
|
|
6646
|
+
},
|
|
6647
|
+
raw: response
|
|
6648
|
+
};
|
|
6649
|
+
}
|
|
6650
|
+
return {
|
|
6651
|
+
success: true,
|
|
6652
|
+
provider: this.name,
|
|
6653
|
+
data: {
|
|
6654
|
+
id: requestId,
|
|
6655
|
+
text: "",
|
|
6656
|
+
status: "queued"
|
|
6657
|
+
},
|
|
6658
|
+
tracking: {
|
|
6659
|
+
requestId
|
|
6660
|
+
},
|
|
6661
|
+
raw: response
|
|
6662
|
+
};
|
|
6663
|
+
}
|
|
6664
|
+
if (!("results" in response) || !("metadata" in response)) {
|
|
6665
|
+
return {
|
|
6666
|
+
success: false,
|
|
6667
|
+
provider: this.name,
|
|
6668
|
+
error: {
|
|
6669
|
+
code: "INVALID_RESPONSE",
|
|
6670
|
+
message: "Deepgram did not return a synchronous transcription payload"
|
|
6671
|
+
},
|
|
6672
|
+
raw: response
|
|
6673
|
+
};
|
|
6674
|
+
}
|
|
6632
6675
|
return this.normalizeResponse(response);
|
|
6633
6676
|
} catch (error) {
|
|
6634
6677
|
return this.createErrorResponse(error);
|
|
@@ -7289,7 +7332,8 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
7289
7332
|
break;
|
|
7290
7333
|
}
|
|
7291
7334
|
case "Metadata": {
|
|
7292
|
-
|
|
7335
|
+
const { type: _, ...metadata } = message;
|
|
7336
|
+
callbacks?.onMetadata?.(metadata);
|
|
7293
7337
|
break;
|
|
7294
7338
|
}
|
|
7295
7339
|
case "Error": {
|
|
@@ -7725,10 +7769,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
7725
7769
|
contentUrls: [audio.url],
|
|
7726
7770
|
properties: this.buildTranscriptionProperties(options)
|
|
7727
7771
|
};
|
|
7728
|
-
const response = await transcriptionsCreate(
|
|
7729
|
-
transcriptionRequest,
|
|
7730
|
-
this.getAxiosConfig()
|
|
7731
|
-
);
|
|
7772
|
+
const response = await transcriptionsCreate(transcriptionRequest, this.getAxiosConfig());
|
|
7732
7773
|
const transcription = response.data;
|
|
7733
7774
|
const transcriptId = transcription.self?.split("/").pop() || "";
|
|
7734
7775
|
return await this.pollForCompletion(transcriptId);
|
|
@@ -8268,7 +8309,6 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
8268
8309
|
const request = {
|
|
8269
8310
|
...options?.openai,
|
|
8270
8311
|
file: audioData,
|
|
8271
|
-
// Buffer/Blob both accepted at runtime; generated type expects Blob
|
|
8272
8312
|
model
|
|
8273
8313
|
};
|
|
8274
8314
|
if (options?.language) {
|
|
@@ -8288,11 +8328,7 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
8288
8328
|
request.response_format = OpenAIResponseFormat.json;
|
|
8289
8329
|
}
|
|
8290
8330
|
const response = await createTranscription(request, this.getAxiosConfig());
|
|
8291
|
-
return this.normalizeResponse(
|
|
8292
|
-
response.data,
|
|
8293
|
-
model,
|
|
8294
|
-
isDiarization
|
|
8295
|
-
);
|
|
8331
|
+
return this.normalizeResponse(response.data, model, isDiarization);
|
|
8296
8332
|
} catch (error) {
|
|
8297
8333
|
return this.createErrorResponse(error);
|
|
8298
8334
|
}
|
|
@@ -8699,7 +8735,6 @@ function createOpenAIWhisperAdapter(config) {
|
|
|
8699
8735
|
|
|
8700
8736
|
// src/adapters/speechmatics-adapter.ts
|
|
8701
8737
|
import axios8 from "axios";
|
|
8702
|
-
import WebSocket6 from "ws";
|
|
8703
8738
|
|
|
8704
8739
|
// src/generated/speechmatics/schema/notificationConfigContentsItem.ts
|
|
8705
8740
|
var NotificationConfigContentsItem = {
|
|
@@ -8749,7 +8784,8 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8749
8784
|
super(...arguments);
|
|
8750
8785
|
this.name = "speechmatics";
|
|
8751
8786
|
this.capabilities = {
|
|
8752
|
-
streaming:
|
|
8787
|
+
streaming: false,
|
|
8788
|
+
// Batch only (streaming available via separate WebSocket API)
|
|
8753
8789
|
diarization: true,
|
|
8754
8790
|
wordTimestamps: true,
|
|
8755
8791
|
languageDetection: false,
|
|
@@ -8884,16 +8920,13 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8884
8920
|
jobConfig.fetch_data = {
|
|
8885
8921
|
url: audio.url
|
|
8886
8922
|
};
|
|
8887
|
-
|
|
8888
|
-
|
|
8889
|
-
requestBody = formData;
|
|
8890
|
-
headers = { "Content-Type": "multipart/form-data" };
|
|
8923
|
+
requestBody = { config: JSON.stringify(jobConfig) };
|
|
8924
|
+
headers = { "Content-Type": "application/json" };
|
|
8891
8925
|
} else if (audio.type === "file") {
|
|
8892
|
-
|
|
8893
|
-
|
|
8894
|
-
|
|
8895
|
-
|
|
8896
|
-
requestBody = formData;
|
|
8926
|
+
requestBody = {
|
|
8927
|
+
config: JSON.stringify(jobConfig),
|
|
8928
|
+
data_file: audio.file
|
|
8929
|
+
};
|
|
8897
8930
|
headers = { "Content-Type": "multipart/form-data" };
|
|
8898
8931
|
} else {
|
|
8899
8932
|
return {
|
|
@@ -8998,389 +9031,6 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8998
9031
|
throw error;
|
|
8999
9032
|
}
|
|
9000
9033
|
}
|
|
9001
|
-
/**
|
|
9002
|
-
* Build WebSocket URL for real-time streaming
|
|
9003
|
-
*
|
|
9004
|
-
* Note: Real-time API uses a different host from the batch API:
|
|
9005
|
-
* - Batch: {region}.asr.api.speechmatics.com
|
|
9006
|
-
* - Real-time: {region}.rt.speechmatics.com
|
|
9007
|
-
*
|
|
9008
|
-
* @param region - Regional endpoint identifier
|
|
9009
|
-
* @returns WebSocket URL for real-time API
|
|
9010
|
-
*/
|
|
9011
|
-
getRegionalWsUrl(region) {
|
|
9012
|
-
if (this.config?.wsBaseUrl) {
|
|
9013
|
-
return this.config.wsBaseUrl;
|
|
9014
|
-
}
|
|
9015
|
-
const rtRegionMap = {
|
|
9016
|
-
eu1: "eu",
|
|
9017
|
-
eu2: "eu",
|
|
9018
|
-
us1: "us",
|
|
9019
|
-
us2: "us",
|
|
9020
|
-
au1: "eu"
|
|
9021
|
-
// No AU RT endpoint — fall back to EU
|
|
9022
|
-
};
|
|
9023
|
-
const rtPrefix = rtRegionMap[region || ""] || "eu";
|
|
9024
|
-
return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
|
|
9025
|
-
}
|
|
9026
|
-
/**
|
|
9027
|
-
* Stream audio for real-time transcription via WebSocket
|
|
9028
|
-
*
|
|
9029
|
-
* Connects to Speechmatics' real-time API and sends audio chunks
|
|
9030
|
-
* for transcription with results returned via callbacks.
|
|
9031
|
-
*
|
|
9032
|
-
* @param options - Streaming configuration options
|
|
9033
|
-
* @param callbacks - Event callbacks for transcription results
|
|
9034
|
-
* @returns Promise that resolves with a StreamingSession
|
|
9035
|
-
*
|
|
9036
|
-
* @example Basic streaming
|
|
9037
|
-
* ```typescript
|
|
9038
|
-
* const session = await adapter.transcribeStream({
|
|
9039
|
-
* language: 'en',
|
|
9040
|
-
* speechmaticsStreaming: {
|
|
9041
|
-
* enablePartials: true,
|
|
9042
|
-
* operatingPoint: 'enhanced'
|
|
9043
|
-
* }
|
|
9044
|
-
* }, {
|
|
9045
|
-
* onTranscript: (event) => console.log(event.text),
|
|
9046
|
-
* onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
|
|
9047
|
-
* onError: (error) => console.error(error)
|
|
9048
|
-
* });
|
|
9049
|
-
*
|
|
9050
|
-
* await session.sendAudio({ data: audioBuffer });
|
|
9051
|
-
* await session.close();
|
|
9052
|
-
* ```
|
|
9053
|
-
*/
|
|
9054
|
-
async transcribeStream(options, callbacks) {
|
|
9055
|
-
this.validateConfig();
|
|
9056
|
-
const smOpts = options?.speechmaticsStreaming || {};
|
|
9057
|
-
const region = smOpts.region || this.config?.region;
|
|
9058
|
-
const wsUrl = this.getRegionalWsUrl(region);
|
|
9059
|
-
const ws = new WebSocket6(wsUrl, {
|
|
9060
|
-
headers: {
|
|
9061
|
-
Authorization: `Bearer ${this.config.apiKey}`
|
|
9062
|
-
}
|
|
9063
|
-
});
|
|
9064
|
-
let sessionStatus = "connecting";
|
|
9065
|
-
const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
9066
|
-
let seqNo = 0;
|
|
9067
|
-
let utteranceResults = [];
|
|
9068
|
-
const sessionReady = new Promise((resolve, reject) => {
|
|
9069
|
-
const timeout = setTimeout(() => {
|
|
9070
|
-
reject(new Error("WebSocket connection timeout"));
|
|
9071
|
-
}, 1e4);
|
|
9072
|
-
let wsOpen = false;
|
|
9073
|
-
ws.once("error", (error) => {
|
|
9074
|
-
clearTimeout(timeout);
|
|
9075
|
-
reject(error);
|
|
9076
|
-
});
|
|
9077
|
-
ws.once("open", () => {
|
|
9078
|
-
wsOpen = true;
|
|
9079
|
-
const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
|
|
9080
|
-
const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
|
|
9081
|
-
const startMsg = {
|
|
9082
|
-
message: "StartRecognition",
|
|
9083
|
-
audio_format: {
|
|
9084
|
-
type: "raw",
|
|
9085
|
-
encoding,
|
|
9086
|
-
sample_rate: sampleRate
|
|
9087
|
-
},
|
|
9088
|
-
transcription_config: {
|
|
9089
|
-
language: smOpts.language || options?.language || "en",
|
|
9090
|
-
enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
|
|
9091
|
-
}
|
|
9092
|
-
};
|
|
9093
|
-
const txConfig = startMsg.transcription_config;
|
|
9094
|
-
if (smOpts.domain) txConfig.domain = smOpts.domain;
|
|
9095
|
-
if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
|
|
9096
|
-
if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
|
|
9097
|
-
if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
|
|
9098
|
-
if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
|
|
9099
|
-
if (smOpts.diarization === "speaker" || options?.diarization) {
|
|
9100
|
-
txConfig.diarization = "speaker";
|
|
9101
|
-
if (smOpts.maxSpeakers) {
|
|
9102
|
-
txConfig.speaker_diarization_config = {
|
|
9103
|
-
max_speakers: smOpts.maxSpeakers
|
|
9104
|
-
};
|
|
9105
|
-
} else if (options?.speakersExpected) {
|
|
9106
|
-
txConfig.speaker_diarization_config = {
|
|
9107
|
-
max_speakers: options.speakersExpected
|
|
9108
|
-
};
|
|
9109
|
-
}
|
|
9110
|
-
}
|
|
9111
|
-
if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
|
|
9112
|
-
txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
|
|
9113
|
-
content: word
|
|
9114
|
-
}));
|
|
9115
|
-
} else if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9116
|
-
txConfig.additional_vocab = options.customVocabulary.map((word) => ({
|
|
9117
|
-
content: word
|
|
9118
|
-
}));
|
|
9119
|
-
}
|
|
9120
|
-
if (smOpts.conversationConfig) {
|
|
9121
|
-
txConfig.conversation_config = {
|
|
9122
|
-
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9123
|
-
};
|
|
9124
|
-
}
|
|
9125
|
-
const startPayload = JSON.stringify(startMsg);
|
|
9126
|
-
if (callbacks?.onRawMessage) {
|
|
9127
|
-
callbacks.onRawMessage({
|
|
9128
|
-
provider: "speechmatics",
|
|
9129
|
-
direction: "outgoing",
|
|
9130
|
-
timestamp: Date.now(),
|
|
9131
|
-
payload: startPayload,
|
|
9132
|
-
messageType: "StartRecognition"
|
|
9133
|
-
});
|
|
9134
|
-
}
|
|
9135
|
-
ws.send(startPayload);
|
|
9136
|
-
});
|
|
9137
|
-
const onMessage = (data) => {
|
|
9138
|
-
const rawPayload = data.toString();
|
|
9139
|
-
try {
|
|
9140
|
-
const msg = JSON.parse(rawPayload);
|
|
9141
|
-
if (msg.message === "RecognitionStarted") {
|
|
9142
|
-
clearTimeout(timeout);
|
|
9143
|
-
ws.removeListener("message", onMessage);
|
|
9144
|
-
ws.emit("message", data);
|
|
9145
|
-
resolve();
|
|
9146
|
-
} else if (msg.message === "Error") {
|
|
9147
|
-
clearTimeout(timeout);
|
|
9148
|
-
ws.removeListener("message", onMessage);
|
|
9149
|
-
reject(new Error(msg.reason || "Recognition failed to start"));
|
|
9150
|
-
}
|
|
9151
|
-
} catch {
|
|
9152
|
-
}
|
|
9153
|
-
};
|
|
9154
|
-
ws.on("message", onMessage);
|
|
9155
|
-
});
|
|
9156
|
-
ws.on("message", (data) => {
|
|
9157
|
-
const rawPayload = data.toString();
|
|
9158
|
-
try {
|
|
9159
|
-
const message = JSON.parse(rawPayload);
|
|
9160
|
-
if (callbacks?.onRawMessage) {
|
|
9161
|
-
callbacks.onRawMessage({
|
|
9162
|
-
provider: "speechmatics",
|
|
9163
|
-
direction: "incoming",
|
|
9164
|
-
timestamp: Date.now(),
|
|
9165
|
-
payload: rawPayload,
|
|
9166
|
-
messageType: message.message
|
|
9167
|
-
});
|
|
9168
|
-
}
|
|
9169
|
-
this.handleStreamingMessage(message, callbacks, utteranceResults);
|
|
9170
|
-
} catch (error) {
|
|
9171
|
-
if (callbacks?.onRawMessage) {
|
|
9172
|
-
callbacks.onRawMessage({
|
|
9173
|
-
provider: "speechmatics",
|
|
9174
|
-
direction: "incoming",
|
|
9175
|
-
timestamp: Date.now(),
|
|
9176
|
-
payload: rawPayload,
|
|
9177
|
-
messageType: "parse_error"
|
|
9178
|
-
});
|
|
9179
|
-
}
|
|
9180
|
-
callbacks?.onError?.({
|
|
9181
|
-
code: "PARSE_ERROR",
|
|
9182
|
-
message: "Failed to parse WebSocket message",
|
|
9183
|
-
details: error
|
|
9184
|
-
});
|
|
9185
|
-
}
|
|
9186
|
-
});
|
|
9187
|
-
ws.on("error", (error) => {
|
|
9188
|
-
callbacks?.onError?.({
|
|
9189
|
-
code: "WEBSOCKET_ERROR",
|
|
9190
|
-
message: error.message,
|
|
9191
|
-
details: error
|
|
9192
|
-
});
|
|
9193
|
-
});
|
|
9194
|
-
ws.on("close", (code, reason) => {
|
|
9195
|
-
sessionStatus = "closed";
|
|
9196
|
-
callbacks?.onClose?.(code, reason.toString());
|
|
9197
|
-
});
|
|
9198
|
-
await sessionReady;
|
|
9199
|
-
sessionStatus = "open";
|
|
9200
|
-
callbacks?.onOpen?.();
|
|
9201
|
-
return {
|
|
9202
|
-
id: sessionId,
|
|
9203
|
-
provider: this.name,
|
|
9204
|
-
createdAt: /* @__PURE__ */ new Date(),
|
|
9205
|
-
getStatus: () => sessionStatus,
|
|
9206
|
-
sendAudio: async (chunk) => {
|
|
9207
|
-
if (sessionStatus !== "open") {
|
|
9208
|
-
throw new Error(`Cannot send audio: session is ${sessionStatus}`);
|
|
9209
|
-
}
|
|
9210
|
-
if (ws.readyState !== WebSocket6.OPEN) {
|
|
9211
|
-
throw new Error("WebSocket is not open");
|
|
9212
|
-
}
|
|
9213
|
-
if (callbacks?.onRawMessage) {
|
|
9214
|
-
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9215
|
-
chunk.data.byteOffset,
|
|
9216
|
-
chunk.data.byteOffset + chunk.data.byteLength
|
|
9217
|
-
);
|
|
9218
|
-
callbacks.onRawMessage({
|
|
9219
|
-
provider: this.name,
|
|
9220
|
-
direction: "outgoing",
|
|
9221
|
-
timestamp: Date.now(),
|
|
9222
|
-
payload: audioPayload,
|
|
9223
|
-
messageType: "audio"
|
|
9224
|
-
});
|
|
9225
|
-
}
|
|
9226
|
-
ws.send(chunk.data);
|
|
9227
|
-
seqNo++;
|
|
9228
|
-
if (chunk.isLast) {
|
|
9229
|
-
const endMsg = JSON.stringify({
|
|
9230
|
-
message: "EndOfStream",
|
|
9231
|
-
last_seq_no: seqNo
|
|
9232
|
-
});
|
|
9233
|
-
if (callbacks?.onRawMessage) {
|
|
9234
|
-
callbacks.onRawMessage({
|
|
9235
|
-
provider: this.name,
|
|
9236
|
-
direction: "outgoing",
|
|
9237
|
-
timestamp: Date.now(),
|
|
9238
|
-
payload: endMsg,
|
|
9239
|
-
messageType: "EndOfStream"
|
|
9240
|
-
});
|
|
9241
|
-
}
|
|
9242
|
-
ws.send(endMsg);
|
|
9243
|
-
}
|
|
9244
|
-
},
|
|
9245
|
-
close: async () => {
|
|
9246
|
-
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
9247
|
-
return;
|
|
9248
|
-
}
|
|
9249
|
-
sessionStatus = "closing";
|
|
9250
|
-
if (ws.readyState === WebSocket6.OPEN) {
|
|
9251
|
-
seqNo++;
|
|
9252
|
-
ws.send(
|
|
9253
|
-
JSON.stringify({
|
|
9254
|
-
message: "EndOfStream",
|
|
9255
|
-
last_seq_no: seqNo
|
|
9256
|
-
})
|
|
9257
|
-
);
|
|
9258
|
-
}
|
|
9259
|
-
return new Promise((resolve) => {
|
|
9260
|
-
const timeout = setTimeout(() => {
|
|
9261
|
-
ws.terminate();
|
|
9262
|
-
sessionStatus = "closed";
|
|
9263
|
-
resolve();
|
|
9264
|
-
}, 5e3);
|
|
9265
|
-
const onMsg = (data) => {
|
|
9266
|
-
try {
|
|
9267
|
-
const msg = JSON.parse(data.toString());
|
|
9268
|
-
if (msg.message === "EndOfTranscript") {
|
|
9269
|
-
ws.removeListener("message", onMsg);
|
|
9270
|
-
clearTimeout(timeout);
|
|
9271
|
-
ws.close();
|
|
9272
|
-
}
|
|
9273
|
-
} catch {
|
|
9274
|
-
}
|
|
9275
|
-
};
|
|
9276
|
-
ws.on("message", onMsg);
|
|
9277
|
-
ws.once("close", () => {
|
|
9278
|
-
clearTimeout(timeout);
|
|
9279
|
-
sessionStatus = "closed";
|
|
9280
|
-
resolve();
|
|
9281
|
-
});
|
|
9282
|
-
});
|
|
9283
|
-
}
|
|
9284
|
-
};
|
|
9285
|
-
}
|
|
9286
|
-
/**
|
|
9287
|
-
* Handle incoming Speechmatics real-time WebSocket messages
|
|
9288
|
-
*/
|
|
9289
|
-
handleStreamingMessage(message, callbacks, utteranceResults) {
|
|
9290
|
-
switch (message.message) {
|
|
9291
|
-
case "RecognitionStarted": {
|
|
9292
|
-
break;
|
|
9293
|
-
}
|
|
9294
|
-
case "AddPartialTranscript": {
|
|
9295
|
-
const results = message.results || [];
|
|
9296
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9297
|
-
if (text) {
|
|
9298
|
-
callbacks?.onTranscript?.({
|
|
9299
|
-
type: "transcript",
|
|
9300
|
-
text,
|
|
9301
|
-
isFinal: false,
|
|
9302
|
-
words: this.extractWordsFromResults(results),
|
|
9303
|
-
data: message
|
|
9304
|
-
});
|
|
9305
|
-
}
|
|
9306
|
-
break;
|
|
9307
|
-
}
|
|
9308
|
-
case "AddTranscript": {
|
|
9309
|
-
const results = message.results || [];
|
|
9310
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9311
|
-
if (utteranceResults) {
|
|
9312
|
-
utteranceResults.push(...results);
|
|
9313
|
-
}
|
|
9314
|
-
if (text) {
|
|
9315
|
-
callbacks?.onTranscript?.({
|
|
9316
|
-
type: "transcript",
|
|
9317
|
-
text,
|
|
9318
|
-
isFinal: true,
|
|
9319
|
-
words: this.extractWordsFromResults(results),
|
|
9320
|
-
data: message
|
|
9321
|
-
});
|
|
9322
|
-
}
|
|
9323
|
-
break;
|
|
9324
|
-
}
|
|
9325
|
-
case "EndOfUtterance": {
|
|
9326
|
-
if (utteranceResults && utteranceResults.length > 0) {
|
|
9327
|
-
const text = buildTextFromSpeechmaticsResults(utteranceResults);
|
|
9328
|
-
const words = this.extractWordsFromResults(utteranceResults);
|
|
9329
|
-
const utterances = buildUtterancesFromWords(words);
|
|
9330
|
-
if (utterances.length > 0) {
|
|
9331
|
-
for (const utt of utterances) {
|
|
9332
|
-
callbacks?.onUtterance?.(utt);
|
|
9333
|
-
}
|
|
9334
|
-
} else if (text) {
|
|
9335
|
-
callbacks?.onUtterance?.({
|
|
9336
|
-
text,
|
|
9337
|
-
start: words.length > 0 ? words[0].start : 0,
|
|
9338
|
-
end: words.length > 0 ? words[words.length - 1].end : 0,
|
|
9339
|
-
words
|
|
9340
|
-
});
|
|
9341
|
-
}
|
|
9342
|
-
utteranceResults.length = 0;
|
|
9343
|
-
}
|
|
9344
|
-
break;
|
|
9345
|
-
}
|
|
9346
|
-
case "AudioAdded": {
|
|
9347
|
-
break;
|
|
9348
|
-
}
|
|
9349
|
-
case "EndOfTranscript": {
|
|
9350
|
-
break;
|
|
9351
|
-
}
|
|
9352
|
-
case "Info":
|
|
9353
|
-
case "Warning": {
|
|
9354
|
-
callbacks?.onMetadata?.(message);
|
|
9355
|
-
break;
|
|
9356
|
-
}
|
|
9357
|
-
case "Error": {
|
|
9358
|
-
const errMsg = message;
|
|
9359
|
-
callbacks?.onError?.({
|
|
9360
|
-
code: errMsg.type || "SPEECHMATICS_ERROR",
|
|
9361
|
-
message: errMsg.reason || "Unknown error",
|
|
9362
|
-
details: message
|
|
9363
|
-
});
|
|
9364
|
-
break;
|
|
9365
|
-
}
|
|
9366
|
-
default: {
|
|
9367
|
-
callbacks?.onMetadata?.(message);
|
|
9368
|
-
break;
|
|
9369
|
-
}
|
|
9370
|
-
}
|
|
9371
|
-
}
|
|
9372
|
-
/**
|
|
9373
|
-
* Extract unified Word[] from Speechmatics recognition results
|
|
9374
|
-
*/
|
|
9375
|
-
extractWordsFromResults(results) {
|
|
9376
|
-
return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
|
|
9377
|
-
word: result.alternatives?.[0]?.content || "",
|
|
9378
|
-
start: result.start_time,
|
|
9379
|
-
end: result.end_time,
|
|
9380
|
-
confidence: result.alternatives?.[0]?.confidence,
|
|
9381
|
-
speaker: result.alternatives?.[0]?.speaker
|
|
9382
|
-
}));
|
|
9383
|
-
}
|
|
9384
9034
|
/**
|
|
9385
9035
|
* Normalize Speechmatics status to unified status
|
|
9386
9036
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9449,9 +9099,6 @@ function createSpeechmaticsAdapter(config) {
|
|
|
9449
9099
|
return adapter;
|
|
9450
9100
|
}
|
|
9451
9101
|
|
|
9452
|
-
// src/adapters/soniox-adapter.ts
|
|
9453
|
-
import axios9 from "axios";
|
|
9454
|
-
|
|
9455
9102
|
// src/generated/soniox/schema/transcriptionStatus.ts
|
|
9456
9103
|
var TranscriptionStatus = {
|
|
9457
9104
|
queued: "queued",
|
|
@@ -9460,6 +9107,57 @@ var TranscriptionStatus = {
|
|
|
9460
9107
|
error: "error"
|
|
9461
9108
|
};
|
|
9462
9109
|
|
|
9110
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9111
|
+
import axios9 from "axios";
|
|
9112
|
+
|
|
9113
|
+
// src/generated/soniox/schema/index.ts
|
|
9114
|
+
var schema_exports4 = {};
|
|
9115
|
+
__export(schema_exports4, {
|
|
9116
|
+
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
9117
|
+
TranscriptionMode: () => TranscriptionMode,
|
|
9118
|
+
TranscriptionStatus: () => TranscriptionStatus,
|
|
9119
|
+
TranslationConfigType: () => TranslationConfigType
|
|
9120
|
+
});
|
|
9121
|
+
|
|
9122
|
+
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
9123
|
+
var TemporaryApiKeyUsageType = {
|
|
9124
|
+
transcribe_websocket: "transcribe_websocket"
|
|
9125
|
+
};
|
|
9126
|
+
|
|
9127
|
+
// src/generated/soniox/schema/transcriptionMode.ts
|
|
9128
|
+
var TranscriptionMode = {
|
|
9129
|
+
real_time: "real_time",
|
|
9130
|
+
async: "async"
|
|
9131
|
+
};
|
|
9132
|
+
|
|
9133
|
+
// src/generated/soniox/schema/translationConfigType.ts
|
|
9134
|
+
var TranslationConfigType = {
|
|
9135
|
+
one_way: "one_way",
|
|
9136
|
+
two_way: "two_way"
|
|
9137
|
+
};
|
|
9138
|
+
|
|
9139
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9140
|
+
var uploadFile = (uploadFileBody2, options) => {
|
|
9141
|
+
const formData = new FormData();
|
|
9142
|
+
if (uploadFileBody2.client_reference_id !== void 0 && uploadFileBody2.client_reference_id !== null) {
|
|
9143
|
+
formData.append("client_reference_id", uploadFileBody2.client_reference_id);
|
|
9144
|
+
}
|
|
9145
|
+
formData.append("file", uploadFileBody2.file);
|
|
9146
|
+
return axios9.post("/v1/files", formData, options);
|
|
9147
|
+
};
|
|
9148
|
+
var createTranscription2 = (createTranscriptionPayload, options) => {
|
|
9149
|
+
return axios9.post("/v1/transcriptions", createTranscriptionPayload, options);
|
|
9150
|
+
};
|
|
9151
|
+
var getTranscription = (transcriptionId, options) => {
|
|
9152
|
+
return axios9.get(`/v1/transcriptions/${transcriptionId}`, options);
|
|
9153
|
+
};
|
|
9154
|
+
var getTranscriptionTranscript = (transcriptionId, options) => {
|
|
9155
|
+
return axios9.get(`/v1/transcriptions/${transcriptionId}/transcript`, options);
|
|
9156
|
+
};
|
|
9157
|
+
var getModels = (options) => {
|
|
9158
|
+
return axios9.get("/v1/models", options);
|
|
9159
|
+
};
|
|
9160
|
+
|
|
9463
9161
|
// src/adapters/soniox-adapter.ts
|
|
9464
9162
|
var SonioxAdapter = class extends BaseAdapter {
|
|
9465
9163
|
constructor() {
|
|
@@ -9514,11 +9212,17 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9514
9212
|
}
|
|
9515
9213
|
}
|
|
9516
9214
|
/**
|
|
9517
|
-
* Get the base URL for API requests
|
|
9215
|
+
* Get the base URL for API requests (no /v1 suffix — generated functions include /v1 in paths)
|
|
9518
9216
|
*/
|
|
9519
9217
|
get baseUrl() {
|
|
9520
9218
|
if (this.config?.baseUrl) return this.config.baseUrl;
|
|
9521
|
-
return `https://${this.getRegionalHost()}
|
|
9219
|
+
return `https://${this.getRegionalHost()}`;
|
|
9220
|
+
}
|
|
9221
|
+
/**
|
|
9222
|
+
* Build axios config with Soniox Bearer auth
|
|
9223
|
+
*/
|
|
9224
|
+
getAxiosConfig() {
|
|
9225
|
+
return super.getAxiosConfig("Authorization", (key) => `Bearer ${key}`);
|
|
9522
9226
|
}
|
|
9523
9227
|
initialize(config) {
|
|
9524
9228
|
super.initialize(config);
|
|
@@ -9528,15 +9232,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9528
9232
|
if (config.model) {
|
|
9529
9233
|
this.defaultModel = config.model;
|
|
9530
9234
|
}
|
|
9531
|
-
this.client = axios9.create({
|
|
9532
|
-
baseURL: this.baseUrl,
|
|
9533
|
-
timeout: config.timeout || 12e4,
|
|
9534
|
-
headers: {
|
|
9535
|
-
Authorization: `Bearer ${config.apiKey}`,
|
|
9536
|
-
"Content-Type": "application/json",
|
|
9537
|
-
...config.headers
|
|
9538
|
-
}
|
|
9539
|
-
});
|
|
9540
9235
|
}
|
|
9541
9236
|
/**
|
|
9542
9237
|
* Get current region
|
|
@@ -9566,23 +9261,12 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9566
9261
|
*/
|
|
9567
9262
|
setRegion(region) {
|
|
9568
9263
|
this.region = region;
|
|
9569
|
-
if (this.config?.apiKey) {
|
|
9570
|
-
this.client = axios9.create({
|
|
9571
|
-
baseURL: this.baseUrl,
|
|
9572
|
-
timeout: this.config.timeout || 12e4,
|
|
9573
|
-
headers: {
|
|
9574
|
-
Authorization: `Bearer ${this.config.apiKey}`,
|
|
9575
|
-
"Content-Type": "application/json",
|
|
9576
|
-
...this.config.headers
|
|
9577
|
-
}
|
|
9578
|
-
});
|
|
9579
|
-
}
|
|
9580
9264
|
}
|
|
9581
9265
|
/**
|
|
9582
9266
|
* Submit audio for transcription
|
|
9583
9267
|
*
|
|
9584
|
-
*
|
|
9585
|
-
*
|
|
9268
|
+
* Uses the async v1 API: createTranscription returns status `queued`,
|
|
9269
|
+
* then polls until completed (or returns immediately if webhook is set).
|
|
9586
9270
|
*
|
|
9587
9271
|
* @param audio - Audio input (URL or file)
|
|
9588
9272
|
* @param options - Transcription options
|
|
@@ -9591,21 +9275,44 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9591
9275
|
async transcribe(audio, options) {
|
|
9592
9276
|
this.validateConfig();
|
|
9593
9277
|
try {
|
|
9594
|
-
const
|
|
9595
|
-
|
|
9596
|
-
};
|
|
9597
|
-
if (audio.type === "url") {
|
|
9598
|
-
requestBody.audio_url = audio.url;
|
|
9599
|
-
} else if (audio.type === "file") {
|
|
9600
|
-
const formData = new FormData();
|
|
9278
|
+
const sonioxOpts = options?.soniox;
|
|
9279
|
+
if (audio.type === "file") {
|
|
9601
9280
|
const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
|
|
9602
|
-
|
|
9603
|
-
const
|
|
9604
|
-
|
|
9605
|
-
|
|
9606
|
-
|
|
9607
|
-
|
|
9608
|
-
|
|
9281
|
+
const uploadBody = { file: audioBlob };
|
|
9282
|
+
const fileResp = await uploadFile(uploadBody, this.getAxiosConfig());
|
|
9283
|
+
const payload = {
|
|
9284
|
+
...sonioxOpts,
|
|
9285
|
+
model: options?.model || this.defaultModel,
|
|
9286
|
+
file_id: fileResp.data.id,
|
|
9287
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9288
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9289
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9290
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9291
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9292
|
+
};
|
|
9293
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9294
|
+
const meta = createResp.data;
|
|
9295
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9296
|
+
return this.normalizeTranscription(meta);
|
|
9297
|
+
}
|
|
9298
|
+
return this.pollForCompletion(meta.id);
|
|
9299
|
+
} else if (audio.type === "url") {
|
|
9300
|
+
const payload = {
|
|
9301
|
+
...sonioxOpts,
|
|
9302
|
+
model: options?.model || this.defaultModel,
|
|
9303
|
+
audio_url: audio.url,
|
|
9304
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9305
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9306
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9307
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9308
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9309
|
+
};
|
|
9310
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9311
|
+
const meta = createResp.data;
|
|
9312
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9313
|
+
return this.normalizeTranscription(meta);
|
|
9314
|
+
}
|
|
9315
|
+
return this.pollForCompletion(meta.id);
|
|
9609
9316
|
} else {
|
|
9610
9317
|
return {
|
|
9611
9318
|
success: false,
|
|
@@ -9616,38 +9323,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9616
9323
|
}
|
|
9617
9324
|
};
|
|
9618
9325
|
}
|
|
9619
|
-
if (options?.language) {
|
|
9620
|
-
requestBody.language_hints = [options.language];
|
|
9621
|
-
}
|
|
9622
|
-
if (options?.diarization) {
|
|
9623
|
-
requestBody.enable_speaker_diarization = true;
|
|
9624
|
-
}
|
|
9625
|
-
if (options?.languageDetection) {
|
|
9626
|
-
requestBody.enable_language_identification = true;
|
|
9627
|
-
}
|
|
9628
|
-
if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9629
|
-
requestBody.context = {
|
|
9630
|
-
terms: options.customVocabulary
|
|
9631
|
-
};
|
|
9632
|
-
}
|
|
9633
|
-
if (options?.webhookUrl) {
|
|
9634
|
-
requestBody.webhook_url = options.webhookUrl;
|
|
9635
|
-
}
|
|
9636
|
-
const response = await this.client.post("/transcriptions", requestBody);
|
|
9637
|
-
const transcriptionId = response.data.id;
|
|
9638
|
-
if (options?.webhookUrl) {
|
|
9639
|
-
return {
|
|
9640
|
-
success: true,
|
|
9641
|
-
provider: this.name,
|
|
9642
|
-
data: {
|
|
9643
|
-
id: transcriptionId,
|
|
9644
|
-
text: "",
|
|
9645
|
-
status: "queued"
|
|
9646
|
-
},
|
|
9647
|
-
raw: response.data
|
|
9648
|
-
};
|
|
9649
|
-
}
|
|
9650
|
-
return await this.pollForCompletion(transcriptionId);
|
|
9651
9326
|
} catch (error) {
|
|
9652
9327
|
return this.createErrorResponse(error);
|
|
9653
9328
|
}
|
|
@@ -9655,9 +9330,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9655
9330
|
/**
|
|
9656
9331
|
* Get transcription result by ID
|
|
9657
9332
|
*
|
|
9658
|
-
*
|
|
9659
|
-
*
|
|
9660
|
-
* when completed.
|
|
9333
|
+
* Fetches transcription metadata and, if completed, the transcript text/tokens.
|
|
9334
|
+
* Used by pollForCompletion() for async polling.
|
|
9661
9335
|
*
|
|
9662
9336
|
* @param transcriptId - Transcript ID
|
|
9663
9337
|
* @returns Transcription response
|
|
@@ -9665,39 +9339,20 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9665
9339
|
async getTranscript(transcriptId) {
|
|
9666
9340
|
this.validateConfig();
|
|
9667
9341
|
try {
|
|
9668
|
-
const
|
|
9669
|
-
const
|
|
9670
|
-
if (
|
|
9671
|
-
|
|
9672
|
-
|
|
9673
|
-
|
|
9674
|
-
|
|
9675
|
-
|
|
9676
|
-
|
|
9677
|
-
|
|
9678
|
-
|
|
9679
|
-
|
|
9680
|
-
if (job.status !== "completed") {
|
|
9681
|
-
return {
|
|
9682
|
-
success: true,
|
|
9683
|
-
provider: this.name,
|
|
9684
|
-
data: {
|
|
9685
|
-
id: job.id,
|
|
9686
|
-
text: "",
|
|
9687
|
-
status: job.status
|
|
9688
|
-
},
|
|
9689
|
-
raw: job
|
|
9690
|
-
};
|
|
9342
|
+
const metaResp = await getTranscription(transcriptId, this.getAxiosConfig());
|
|
9343
|
+
const meta = metaResp.data;
|
|
9344
|
+
if (meta.status === TranscriptionStatus.completed) {
|
|
9345
|
+
try {
|
|
9346
|
+
const transcriptResp = await getTranscriptionTranscript(
|
|
9347
|
+
transcriptId,
|
|
9348
|
+
this.getAxiosConfig()
|
|
9349
|
+
);
|
|
9350
|
+
return this.normalizeTranscription(meta, transcriptResp.data);
|
|
9351
|
+
} catch (transcriptError) {
|
|
9352
|
+
return this.createErrorResponse(transcriptError);
|
|
9353
|
+
}
|
|
9691
9354
|
}
|
|
9692
|
-
|
|
9693
|
-
`/transcriptions/${transcriptId}/transcript`
|
|
9694
|
-
);
|
|
9695
|
-
return this.normalizeResponse({
|
|
9696
|
-
...transcriptResponse.data,
|
|
9697
|
-
// Carry over job metadata
|
|
9698
|
-
id: job.id,
|
|
9699
|
-
audio_duration_ms: job.audio_duration_ms
|
|
9700
|
-
});
|
|
9355
|
+
return this.normalizeTranscription(meta);
|
|
9701
9356
|
} catch (error) {
|
|
9702
9357
|
return this.createErrorResponse(error);
|
|
9703
9358
|
}
|
|
@@ -9717,51 +9372,50 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9717
9372
|
const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9718
9373
|
const createdAt = /* @__PURE__ */ new Date();
|
|
9719
9374
|
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
|
|
9720
|
-
const wsUrl = `${wsBase}/transcribe-websocket
|
|
9721
|
-
|
|
9722
|
-
const
|
|
9723
|
-
|
|
9724
|
-
|
|
9725
|
-
model: modelId
|
|
9726
|
-
};
|
|
9727
|
-
if (sonioxOpts?.audioFormat) {
|
|
9728
|
-
initMessage.audio_format = sonioxOpts.audioFormat;
|
|
9729
|
-
} else if (options?.encoding) {
|
|
9375
|
+
const wsUrl = new URL(`${wsBase}/transcribe-websocket`);
|
|
9376
|
+
wsUrl.searchParams.set("api_key", this.config.apiKey);
|
|
9377
|
+
const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-preview";
|
|
9378
|
+
wsUrl.searchParams.set("model", modelId);
|
|
9379
|
+
if (options?.encoding) {
|
|
9730
9380
|
const encodingMap = {
|
|
9731
9381
|
linear16: "pcm_s16le",
|
|
9732
9382
|
pcm: "pcm_s16le",
|
|
9733
9383
|
mulaw: "mulaw",
|
|
9734
9384
|
alaw: "alaw"
|
|
9735
9385
|
};
|
|
9736
|
-
|
|
9386
|
+
wsUrl.searchParams.set("audio_format", encodingMap[options.encoding] || options.encoding);
|
|
9737
9387
|
}
|
|
9738
|
-
if (
|
|
9739
|
-
|
|
9388
|
+
if (options?.sampleRate) {
|
|
9389
|
+
wsUrl.searchParams.set("sample_rate", options.sampleRate.toString());
|
|
9740
9390
|
}
|
|
9741
|
-
if (
|
|
9742
|
-
|
|
9391
|
+
if (options?.channels) {
|
|
9392
|
+
wsUrl.searchParams.set("num_channels", options.channels.toString());
|
|
9743
9393
|
}
|
|
9394
|
+
const sonioxOpts = options?.sonioxStreaming;
|
|
9744
9395
|
if (sonioxOpts) {
|
|
9745
9396
|
if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
|
|
9746
|
-
|
|
9397
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify(sonioxOpts.languageHints));
|
|
9747
9398
|
}
|
|
9748
9399
|
if (sonioxOpts.enableLanguageIdentification) {
|
|
9749
|
-
|
|
9400
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9750
9401
|
}
|
|
9751
9402
|
if (sonioxOpts.enableEndpointDetection) {
|
|
9752
|
-
|
|
9403
|
+
wsUrl.searchParams.set("enable_endpoint_detection", "true");
|
|
9753
9404
|
}
|
|
9754
9405
|
if (sonioxOpts.enableSpeakerDiarization) {
|
|
9755
|
-
|
|
9406
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
9756
9407
|
}
|
|
9757
9408
|
if (sonioxOpts.context) {
|
|
9758
|
-
|
|
9409
|
+
wsUrl.searchParams.set(
|
|
9410
|
+
"context",
|
|
9411
|
+
typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
|
|
9412
|
+
);
|
|
9759
9413
|
}
|
|
9760
9414
|
if (sonioxOpts.translation) {
|
|
9761
|
-
|
|
9415
|
+
wsUrl.searchParams.set("translation", JSON.stringify(sonioxOpts.translation));
|
|
9762
9416
|
}
|
|
9763
9417
|
if (sonioxOpts.clientReferenceId) {
|
|
9764
|
-
|
|
9418
|
+
wsUrl.searchParams.set("client_reference_id", sonioxOpts.clientReferenceId);
|
|
9765
9419
|
}
|
|
9766
9420
|
}
|
|
9767
9421
|
if (!sonioxOpts?.languageHints && options?.language) {
|
|
@@ -9770,33 +9424,24 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9770
9424
|
`[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
|
|
9771
9425
|
);
|
|
9772
9426
|
}
|
|
9773
|
-
|
|
9427
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify([options.language]));
|
|
9774
9428
|
}
|
|
9775
9429
|
if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
|
|
9776
|
-
|
|
9430
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
9777
9431
|
}
|
|
9778
9432
|
if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
|
|
9779
|
-
|
|
9433
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9434
|
+
}
|
|
9435
|
+
if (options?.interimResults !== false) {
|
|
9780
9436
|
}
|
|
9781
9437
|
let status = "connecting";
|
|
9782
9438
|
let openedAt = null;
|
|
9783
9439
|
let receivedData = false;
|
|
9784
9440
|
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : __require("ws");
|
|
9785
|
-
const ws = new WebSocketImpl(wsUrl);
|
|
9441
|
+
const ws = new WebSocketImpl(wsUrl.toString());
|
|
9786
9442
|
ws.onopen = () => {
|
|
9787
|
-
openedAt = Date.now();
|
|
9788
|
-
const initPayload = JSON.stringify(initMessage);
|
|
9789
|
-
if (callbacks?.onRawMessage) {
|
|
9790
|
-
callbacks.onRawMessage({
|
|
9791
|
-
provider: this.name,
|
|
9792
|
-
direction: "outgoing",
|
|
9793
|
-
timestamp: Date.now(),
|
|
9794
|
-
payload: initPayload,
|
|
9795
|
-
messageType: "init"
|
|
9796
|
-
});
|
|
9797
|
-
}
|
|
9798
|
-
ws.send(initPayload);
|
|
9799
9443
|
status = "open";
|
|
9444
|
+
openedAt = Date.now();
|
|
9800
9445
|
callbacks?.onOpen?.();
|
|
9801
9446
|
};
|
|
9802
9447
|
ws.onmessage = (event) => {
|
|
@@ -9805,7 +9450,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9805
9450
|
let messageType;
|
|
9806
9451
|
try {
|
|
9807
9452
|
const data = JSON.parse(rawPayload);
|
|
9808
|
-
|
|
9453
|
+
const errorMessage = data.error_message || data.error;
|
|
9454
|
+
if (errorMessage) {
|
|
9809
9455
|
messageType = "error";
|
|
9810
9456
|
} else if (data.finished) {
|
|
9811
9457
|
messageType = "finished";
|
|
@@ -9821,10 +9467,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9821
9467
|
messageType
|
|
9822
9468
|
});
|
|
9823
9469
|
}
|
|
9824
|
-
if (
|
|
9470
|
+
if (errorMessage) {
|
|
9825
9471
|
callbacks?.onError?.({
|
|
9826
9472
|
code: data.error_code?.toString() || "STREAM_ERROR",
|
|
9827
|
-
message:
|
|
9473
|
+
message: errorMessage
|
|
9828
9474
|
});
|
|
9829
9475
|
return;
|
|
9830
9476
|
}
|
|
@@ -9838,7 +9484,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9838
9484
|
start: token.start_ms ? token.start_ms / 1e3 : 0,
|
|
9839
9485
|
end: token.end_ms ? token.end_ms / 1e3 : 0,
|
|
9840
9486
|
confidence: token.confidence,
|
|
9841
|
-
speaker: token.speaker
|
|
9487
|
+
speaker: token.speaker ?? void 0
|
|
9842
9488
|
}));
|
|
9843
9489
|
const text = data.text || data.tokens.map((t) => t.text).join("");
|
|
9844
9490
|
const isFinal = data.tokens.every((t) => t.is_final);
|
|
@@ -9847,8 +9493,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9847
9493
|
text,
|
|
9848
9494
|
isFinal,
|
|
9849
9495
|
words,
|
|
9850
|
-
speaker: data.tokens[0]?.speaker,
|
|
9851
|
-
language: data.tokens[0]?.language,
|
|
9496
|
+
speaker: data.tokens[0]?.speaker ?? void 0,
|
|
9497
|
+
language: data.tokens[0]?.language ?? void 0,
|
|
9852
9498
|
confidence: data.tokens[0]?.confidence
|
|
9853
9499
|
};
|
|
9854
9500
|
callbacks?.onTranscript?.(event2);
|
|
@@ -9875,10 +9521,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9875
9521
|
ws.onclose = (event) => {
|
|
9876
9522
|
status = "closed";
|
|
9877
9523
|
const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
|
|
9878
|
-
const
|
|
9879
|
-
if (
|
|
9524
|
+
const isImmediateClose = timeSinceOpen !== null && timeSinceOpen < 1e3 && !receivedData;
|
|
9525
|
+
if (isImmediateClose && event.code === 1e3) {
|
|
9880
9526
|
const errorMessage = [
|
|
9881
|
-
"Soniox closed connection
|
|
9527
|
+
"Soniox closed connection immediately after opening.",
|
|
9882
9528
|
`Current config: region=${this.region}, model=${modelId}`,
|
|
9883
9529
|
"Likely causes:",
|
|
9884
9530
|
" - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
|
|
@@ -9964,7 +9610,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9964
9610
|
async getModels() {
|
|
9965
9611
|
this.validateConfig();
|
|
9966
9612
|
try {
|
|
9967
|
-
const response = await this.
|
|
9613
|
+
const response = await getModels(this.getAxiosConfig());
|
|
9968
9614
|
return response.data.models || [];
|
|
9969
9615
|
} catch (error) {
|
|
9970
9616
|
console.error("Failed to fetch Soniox models:", error);
|
|
@@ -9996,11 +9642,44 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9996
9642
|
return buildUtterancesFromWords(words);
|
|
9997
9643
|
}
|
|
9998
9644
|
/**
|
|
9999
|
-
* Normalize
|
|
9645
|
+
* Normalize v1 API response to unified format
|
|
9646
|
+
*
|
|
9647
|
+
* @param meta - Transcription metadata from getTranscription/createTranscription
|
|
9648
|
+
* @param transcript - Transcript data (text/tokens), only present when status is completed
|
|
10000
9649
|
*/
|
|
10001
|
-
|
|
10002
|
-
|
|
10003
|
-
|
|
9650
|
+
normalizeTranscription(meta, transcript) {
|
|
9651
|
+
if (meta.status === TranscriptionStatus.error) {
|
|
9652
|
+
return {
|
|
9653
|
+
success: false,
|
|
9654
|
+
provider: this.name,
|
|
9655
|
+
data: {
|
|
9656
|
+
id: meta.id,
|
|
9657
|
+
text: "",
|
|
9658
|
+
status: "error"
|
|
9659
|
+
},
|
|
9660
|
+
error: {
|
|
9661
|
+
code: meta.error_type || "TRANSCRIPTION_ERROR",
|
|
9662
|
+
message: meta.error_message || "Transcription failed"
|
|
9663
|
+
},
|
|
9664
|
+
raw: { meta, transcript }
|
|
9665
|
+
};
|
|
9666
|
+
}
|
|
9667
|
+
if (!transcript) {
|
|
9668
|
+
return {
|
|
9669
|
+
success: true,
|
|
9670
|
+
provider: this.name,
|
|
9671
|
+
data: {
|
|
9672
|
+
id: meta.id,
|
|
9673
|
+
text: "",
|
|
9674
|
+
status: meta.status,
|
|
9675
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0
|
|
9676
|
+
},
|
|
9677
|
+
raw: { meta }
|
|
9678
|
+
};
|
|
9679
|
+
}
|
|
9680
|
+
const tokens = transcript.tokens || [];
|
|
9681
|
+
const text = transcript.text || tokens.map((t) => t.text).join("");
|
|
9682
|
+
const words = tokens.filter((t) => t.start_ms !== void 0 && t.end_ms !== void 0).map((token) => ({
|
|
10004
9683
|
word: token.text,
|
|
10005
9684
|
start: token.start_ms / 1e3,
|
|
10006
9685
|
end: token.end_ms / 1e3,
|
|
@@ -10008,33 +9687,32 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10008
9687
|
speaker: token.speaker ?? void 0
|
|
10009
9688
|
}));
|
|
10010
9689
|
const speakerSet = /* @__PURE__ */ new Set();
|
|
10011
|
-
|
|
10012
|
-
if (
|
|
10013
|
-
}
|
|
9690
|
+
tokens.forEach((t) => {
|
|
9691
|
+
if (t.speaker) speakerSet.add(String(t.speaker));
|
|
9692
|
+
});
|
|
10014
9693
|
const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
|
|
10015
9694
|
id,
|
|
10016
9695
|
label: `Speaker ${id}`
|
|
10017
9696
|
})) : void 0;
|
|
10018
|
-
const utterances =
|
|
9697
|
+
const utterances = this.buildUtterancesFromTokens(tokens);
|
|
10019
9698
|
const language = tokens.find((t) => t.language)?.language ?? void 0;
|
|
10020
9699
|
return {
|
|
10021
9700
|
success: true,
|
|
10022
9701
|
provider: this.name,
|
|
10023
9702
|
data: {
|
|
10024
|
-
id:
|
|
9703
|
+
id: meta.id,
|
|
10025
9704
|
text,
|
|
10026
9705
|
status: TranscriptionStatus.completed,
|
|
10027
9706
|
language,
|
|
10028
|
-
duration:
|
|
9707
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0,
|
|
10029
9708
|
speakers,
|
|
10030
9709
|
words: words.length > 0 ? words : void 0,
|
|
10031
9710
|
utterances: utterances.length > 0 ? utterances : void 0
|
|
10032
9711
|
},
|
|
10033
9712
|
tracking: {
|
|
10034
|
-
requestId:
|
|
10035
|
-
processingTimeMs: response.total_audio_proc_ms
|
|
9713
|
+
requestId: meta.id
|
|
10036
9714
|
},
|
|
10037
|
-
raw:
|
|
9715
|
+
raw: { meta, transcript }
|
|
10038
9716
|
};
|
|
10039
9717
|
}
|
|
10040
9718
|
};
|
|
@@ -10190,29 +9868,11 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10190
9868
|
}
|
|
10191
9869
|
}
|
|
10192
9870
|
}
|
|
10193
|
-
if (options?.webhookUrl) {
|
|
10194
|
-
if (!formData.has("webhook")) {
|
|
10195
|
-
formData.append("webhook", "true");
|
|
10196
|
-
}
|
|
10197
|
-
}
|
|
10198
9871
|
const response = await this.client.post("/v1/speech-to-text", formData, {
|
|
10199
9872
|
headers: {
|
|
10200
9873
|
"Content-Type": "multipart/form-data"
|
|
10201
9874
|
}
|
|
10202
9875
|
});
|
|
10203
|
-
if (options?.webhookUrl) {
|
|
10204
|
-
const transcriptionId = response.data.transcription_id || response.data.id || `elevenlabs_${Date.now()}`;
|
|
10205
|
-
return {
|
|
10206
|
-
success: true,
|
|
10207
|
-
provider: this.name,
|
|
10208
|
-
data: {
|
|
10209
|
-
id: transcriptionId,
|
|
10210
|
-
text: "",
|
|
10211
|
-
status: "queued"
|
|
10212
|
-
},
|
|
10213
|
-
raw: response.data
|
|
10214
|
-
};
|
|
10215
|
-
}
|
|
10216
9876
|
return this.normalizeResponse(response.data);
|
|
10217
9877
|
} catch (error) {
|
|
10218
9878
|
return this.createErrorResponse(error);
|
|
@@ -10526,7 +10186,7 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10526
10186
|
}
|
|
10527
10187
|
}
|
|
10528
10188
|
}
|
|
10529
|
-
const transcriptionId =
|
|
10189
|
+
const transcriptionId = response.transcription_id || chunks[0]?.transcription_id || `elevenlabs_${Date.now()}`;
|
|
10530
10190
|
return {
|
|
10531
10191
|
success: true,
|
|
10532
10192
|
provider: this.name,
|
|
@@ -36444,12 +36104,10 @@ var createTemporaryApiKeyBody = zod10.object({
|
|
|
36444
36104
|
var streaming_types_zod_exports = {};
|
|
36445
36105
|
__export(streaming_types_zod_exports, {
|
|
36446
36106
|
sonioxAudioFormatSchema: () => sonioxAudioFormatSchema,
|
|
36447
|
-
sonioxAutoDetectedAudioFormatSchema: () => sonioxAutoDetectedAudioFormatSchema,
|
|
36448
36107
|
sonioxContextGeneralItemSchema: () => sonioxContextGeneralItemSchema,
|
|
36449
36108
|
sonioxContextSchema: () => sonioxContextSchema,
|
|
36450
36109
|
sonioxErrorStatusSchema: () => sonioxErrorStatusSchema,
|
|
36451
36110
|
sonioxOneWayTranslationSchema: () => sonioxOneWayTranslationSchema,
|
|
36452
|
-
sonioxPcmAudioEncodingSchema: () => sonioxPcmAudioEncodingSchema,
|
|
36453
36111
|
sonioxRealtimeModelSchema: () => sonioxRealtimeModelSchema,
|
|
36454
36112
|
sonioxRecorderStateSchema: () => sonioxRecorderStateSchema,
|
|
36455
36113
|
sonioxStreamingResponseSchema: () => sonioxStreamingResponseSchema,
|
|
@@ -36463,7 +36121,7 @@ __export(streaming_types_zod_exports, {
|
|
|
36463
36121
|
streamingUpdateConfigParams: () => streamingUpdateConfigParams3
|
|
36464
36122
|
});
|
|
36465
36123
|
import { z as zod11 } from "zod";
|
|
36466
|
-
var
|
|
36124
|
+
var sonioxAudioFormatSchema = zod11.enum([
|
|
36467
36125
|
"auto",
|
|
36468
36126
|
"aac",
|
|
36469
36127
|
"aiff",
|
|
@@ -36473,10 +36131,7 @@ var sonioxAutoDetectedAudioFormatSchema = zod11.enum([
|
|
|
36473
36131
|
"mp3",
|
|
36474
36132
|
"ogg",
|
|
36475
36133
|
"wav",
|
|
36476
|
-
"webm"
|
|
36477
|
-
]);
|
|
36478
|
-
var sonioxPcmAudioEncodingSchema = zod11.enum([
|
|
36479
|
-
// Signed PCM
|
|
36134
|
+
"webm",
|
|
36480
36135
|
"pcm_s8",
|
|
36481
36136
|
"pcm_s16le",
|
|
36482
36137
|
"pcm_s16be",
|
|
@@ -36484,7 +36139,6 @@ var sonioxPcmAudioEncodingSchema = zod11.enum([
|
|
|
36484
36139
|
"pcm_s24be",
|
|
36485
36140
|
"pcm_s32le",
|
|
36486
36141
|
"pcm_s32be",
|
|
36487
|
-
// Unsigned PCM
|
|
36488
36142
|
"pcm_u8",
|
|
36489
36143
|
"pcm_u16le",
|
|
36490
36144
|
"pcm_u16be",
|
|
@@ -36492,86 +36146,81 @@ var sonioxPcmAudioEncodingSchema = zod11.enum([
|
|
|
36492
36146
|
"pcm_u24be",
|
|
36493
36147
|
"pcm_u32le",
|
|
36494
36148
|
"pcm_u32be",
|
|
36495
|
-
// Float PCM
|
|
36496
36149
|
"pcm_f32le",
|
|
36497
36150
|
"pcm_f32be",
|
|
36498
36151
|
"pcm_f64le",
|
|
36499
36152
|
"pcm_f64be",
|
|
36500
|
-
// Companded
|
|
36501
36153
|
"mulaw",
|
|
36502
36154
|
"alaw"
|
|
36503
36155
|
]);
|
|
36504
|
-
var sonioxAudioFormatSchema = zod11.union([
|
|
36505
|
-
sonioxAutoDetectedAudioFormatSchema,
|
|
36506
|
-
sonioxPcmAudioEncodingSchema
|
|
36507
|
-
]);
|
|
36508
36156
|
var sonioxOneWayTranslationSchema = zod11.object({
|
|
36509
36157
|
type: zod11.literal("one_way"),
|
|
36510
|
-
target_language: zod11.string()
|
|
36158
|
+
target_language: zod11.string()
|
|
36511
36159
|
});
|
|
36512
36160
|
var sonioxTwoWayTranslationSchema = zod11.object({
|
|
36513
36161
|
type: zod11.literal("two_way"),
|
|
36514
|
-
language_a: zod11.string()
|
|
36515
|
-
language_b: zod11.string()
|
|
36162
|
+
language_a: zod11.string(),
|
|
36163
|
+
language_b: zod11.string()
|
|
36516
36164
|
});
|
|
36517
36165
|
var sonioxTranslationConfigSchema = zod11.union([
|
|
36518
36166
|
sonioxOneWayTranslationSchema,
|
|
36519
36167
|
sonioxTwoWayTranslationSchema
|
|
36520
36168
|
]);
|
|
36521
36169
|
var sonioxContextGeneralItemSchema = zod11.object({
|
|
36522
|
-
key: zod11.string()
|
|
36523
|
-
value: zod11.string()
|
|
36170
|
+
key: zod11.string(),
|
|
36171
|
+
value: zod11.string()
|
|
36524
36172
|
});
|
|
36525
36173
|
var sonioxTranslationTermSchema = zod11.object({
|
|
36526
|
-
source: zod11.string()
|
|
36527
|
-
target: zod11.string()
|
|
36174
|
+
source: zod11.string(),
|
|
36175
|
+
target: zod11.string()
|
|
36528
36176
|
});
|
|
36529
36177
|
var sonioxStructuredContextSchema = zod11.object({
|
|
36530
|
-
general: zod11.array(sonioxContextGeneralItemSchema).optional()
|
|
36531
|
-
text: zod11.string().optional()
|
|
36532
|
-
terms: zod11.array(zod11.string()).optional()
|
|
36533
|
-
translation_terms: zod11.array(sonioxTranslationTermSchema).optional()
|
|
36178
|
+
general: zod11.array(sonioxContextGeneralItemSchema).optional(),
|
|
36179
|
+
text: zod11.string().optional(),
|
|
36180
|
+
terms: zod11.array(zod11.string()).optional(),
|
|
36181
|
+
translation_terms: zod11.array(sonioxTranslationTermSchema).optional()
|
|
36534
36182
|
});
|
|
36535
36183
|
var sonioxContextSchema = zod11.union([sonioxStructuredContextSchema, zod11.string()]);
|
|
36536
36184
|
var sonioxRealtimeModelSchema = zod11.enum([
|
|
36185
|
+
"stt-rt-v4",
|
|
36537
36186
|
"stt-rt-v3",
|
|
36538
36187
|
"stt-rt-preview",
|
|
36539
36188
|
"stt-rt-v3-preview",
|
|
36540
36189
|
"stt-rt-preview-v2"
|
|
36541
36190
|
]);
|
|
36542
36191
|
var streamingTranscriberParams3 = zod11.object({
|
|
36543
|
-
model: sonioxRealtimeModelSchema
|
|
36544
|
-
audioFormat: sonioxAudioFormatSchema.optional()
|
|
36545
|
-
sampleRate: zod11.number().optional()
|
|
36546
|
-
numChannels: zod11.number().
|
|
36547
|
-
languageHints: zod11.array(zod11.string()).optional()
|
|
36548
|
-
context: sonioxContextSchema.optional()
|
|
36549
|
-
enableSpeakerDiarization: zod11.boolean().optional()
|
|
36550
|
-
enableLanguageIdentification: zod11.boolean().optional()
|
|
36551
|
-
enableEndpointDetection: zod11.boolean().optional()
|
|
36552
|
-
translation: sonioxTranslationConfigSchema.optional()
|
|
36553
|
-
clientReferenceId: zod11.string().optional()
|
|
36554
|
-
});
|
|
36555
|
-
var sonioxTranslationStatusSchema = zod11.enum(["
|
|
36192
|
+
model: sonioxRealtimeModelSchema,
|
|
36193
|
+
audioFormat: sonioxAudioFormatSchema.optional(),
|
|
36194
|
+
sampleRate: zod11.number().optional(),
|
|
36195
|
+
numChannels: zod11.number().optional(),
|
|
36196
|
+
languageHints: zod11.array(zod11.string()).optional(),
|
|
36197
|
+
context: sonioxContextSchema.optional(),
|
|
36198
|
+
enableSpeakerDiarization: zod11.boolean().optional(),
|
|
36199
|
+
enableLanguageIdentification: zod11.boolean().optional(),
|
|
36200
|
+
enableEndpointDetection: zod11.boolean().optional(),
|
|
36201
|
+
translation: sonioxTranslationConfigSchema.optional(),
|
|
36202
|
+
clientReferenceId: zod11.string().optional()
|
|
36203
|
+
});
|
|
36204
|
+
var sonioxTranslationStatusSchema = zod11.enum(["original", "translation", "none"]);
|
|
36556
36205
|
var sonioxTokenSchema = zod11.object({
|
|
36557
|
-
text: zod11.string()
|
|
36558
|
-
start_ms: zod11.number().optional()
|
|
36559
|
-
end_ms: zod11.number().optional()
|
|
36560
|
-
confidence: zod11.number()
|
|
36561
|
-
is_final: zod11.boolean()
|
|
36562
|
-
speaker: zod11.string().optional()
|
|
36563
|
-
|
|
36564
|
-
|
|
36565
|
-
|
|
36206
|
+
text: zod11.string(),
|
|
36207
|
+
start_ms: zod11.number().optional(),
|
|
36208
|
+
end_ms: zod11.number().optional(),
|
|
36209
|
+
confidence: zod11.number(),
|
|
36210
|
+
is_final: zod11.boolean(),
|
|
36211
|
+
speaker: zod11.string().optional(),
|
|
36212
|
+
translation_status: sonioxTranslationStatusSchema.optional(),
|
|
36213
|
+
language: zod11.string().optional(),
|
|
36214
|
+
source_language: zod11.string().optional()
|
|
36566
36215
|
});
|
|
36567
36216
|
var sonioxStreamingResponseSchema = zod11.object({
|
|
36568
|
-
text: zod11.string()
|
|
36569
|
-
tokens: zod11.array(sonioxTokenSchema)
|
|
36570
|
-
final_audio_proc_ms: zod11.number()
|
|
36571
|
-
total_audio_proc_ms: zod11.number()
|
|
36572
|
-
finished: zod11.boolean().optional()
|
|
36573
|
-
|
|
36574
|
-
|
|
36217
|
+
text: zod11.string(),
|
|
36218
|
+
tokens: zod11.array(sonioxTokenSchema),
|
|
36219
|
+
final_audio_proc_ms: zod11.number(),
|
|
36220
|
+
total_audio_proc_ms: zod11.number(),
|
|
36221
|
+
finished: zod11.boolean().optional(),
|
|
36222
|
+
error_code: zod11.number().optional(),
|
|
36223
|
+
error_message: zod11.string().optional()
|
|
36575
36224
|
});
|
|
36576
36225
|
var sonioxRecorderStateSchema = zod11.enum([
|
|
36577
36226
|
"Init",
|
|
@@ -37137,8 +36786,8 @@ var BatchOnlyProviders = AllProviders.filter(
|
|
|
37137
36786
|
);
|
|
37138
36787
|
|
|
37139
36788
|
// src/generated/deepgram/schema/index.ts
|
|
37140
|
-
var
|
|
37141
|
-
__export(
|
|
36789
|
+
var schema_exports5 = {};
|
|
36790
|
+
__export(schema_exports5, {
|
|
37142
36791
|
V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
|
|
37143
36792
|
V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
|
|
37144
36793
|
V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
|
|
@@ -37393,8 +37042,8 @@ var V1SpeakPostParametersSampleRate = {
|
|
|
37393
37042
|
};
|
|
37394
37043
|
|
|
37395
37044
|
// src/generated/openai/schema/index.ts
|
|
37396
|
-
var
|
|
37397
|
-
__export(
|
|
37045
|
+
var schema_exports6 = {};
|
|
37046
|
+
__export(schema_exports6, {
|
|
37398
37047
|
AudioResponseFormat: () => AudioResponseFormat,
|
|
37399
37048
|
CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
|
|
37400
37049
|
CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
|
|
@@ -37734,8 +37383,8 @@ var VoiceResourceObject = {
|
|
|
37734
37383
|
};
|
|
37735
37384
|
|
|
37736
37385
|
// src/generated/speechmatics/schema/index.ts
|
|
37737
|
-
var
|
|
37738
|
-
__export(
|
|
37386
|
+
var schema_exports7 = {};
|
|
37387
|
+
__export(schema_exports7, {
|
|
37739
37388
|
AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
|
|
37740
37389
|
ErrorResponseError: () => ErrorResponseError,
|
|
37741
37390
|
GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
|
|
@@ -37924,32 +37573,6 @@ var WrittenFormRecognitionResultType = {
|
|
|
37924
37573
|
word: "word"
|
|
37925
37574
|
};
|
|
37926
37575
|
|
|
37927
|
-
// src/generated/soniox/schema/index.ts
|
|
37928
|
-
var schema_exports7 = {};
|
|
37929
|
-
__export(schema_exports7, {
|
|
37930
|
-
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
37931
|
-
TranscriptionMode: () => TranscriptionMode,
|
|
37932
|
-
TranscriptionStatus: () => TranscriptionStatus,
|
|
37933
|
-
TranslationConfigType: () => TranslationConfigType
|
|
37934
|
-
});
|
|
37935
|
-
|
|
37936
|
-
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
37937
|
-
var TemporaryApiKeyUsageType = {
|
|
37938
|
-
transcribe_websocket: "transcribe_websocket"
|
|
37939
|
-
};
|
|
37940
|
-
|
|
37941
|
-
// src/generated/soniox/schema/transcriptionMode.ts
|
|
37942
|
-
var TranscriptionMode = {
|
|
37943
|
-
real_time: "real_time",
|
|
37944
|
-
async: "async"
|
|
37945
|
-
};
|
|
37946
|
-
|
|
37947
|
-
// src/generated/soniox/schema/translationConfigType.ts
|
|
37948
|
-
var TranslationConfigType = {
|
|
37949
|
-
one_way: "one_way",
|
|
37950
|
-
two_way: "two_way"
|
|
37951
|
-
};
|
|
37952
|
-
|
|
37953
37576
|
// src/generated/elevenlabs/schema/index.ts
|
|
37954
37577
|
var schema_exports8 = {};
|
|
37955
37578
|
__export(schema_exports8, {
|
|
@@ -39653,7 +39276,7 @@ export {
|
|
|
39653
39276
|
DeepgramTTSSampleRate,
|
|
39654
39277
|
DeepgramTopicMode,
|
|
39655
39278
|
DeepgramTranscriptionSchema,
|
|
39656
|
-
|
|
39279
|
+
schema_exports5 as DeepgramTypes,
|
|
39657
39280
|
deepgramAPI_zod_exports as DeepgramZodSchemas,
|
|
39658
39281
|
ElevenLabsAdapter,
|
|
39659
39282
|
ElevenLabsCapabilities,
|
|
@@ -39690,7 +39313,7 @@ export {
|
|
|
39690
39313
|
OpenAIResponseFormat,
|
|
39691
39314
|
streaming_types_exports as OpenAIStreamingTypes,
|
|
39692
39315
|
OpenAITranscriptionSchema,
|
|
39693
|
-
|
|
39316
|
+
schema_exports6 as OpenAITypes,
|
|
39694
39317
|
OpenAIWhisperAdapter,
|
|
39695
39318
|
openAIAudioRealtimeAPI_zod_exports as OpenAIZodSchemas,
|
|
39696
39319
|
ProfanityFilterMode,
|
|
@@ -39719,7 +39342,7 @@ export {
|
|
|
39719
39342
|
SonioxStreamingUpdateSchema,
|
|
39720
39343
|
streaming_types_zod_exports as SonioxStreamingZodSchemas,
|
|
39721
39344
|
SonioxTranscriptionSchema,
|
|
39722
|
-
|
|
39345
|
+
schema_exports4 as SonioxTypes,
|
|
39723
39346
|
SpeakV1ContainerParameter,
|
|
39724
39347
|
SpeakV1EncodingParameter,
|
|
39725
39348
|
SpeakV1SampleRateParameter,
|
|
@@ -39734,7 +39357,7 @@ export {
|
|
|
39734
39357
|
SpeechmaticsStreamingSchema,
|
|
39735
39358
|
SpeechmaticsStreamingUpdateSchema,
|
|
39736
39359
|
SpeechmaticsTranscriptionSchema,
|
|
39737
|
-
|
|
39360
|
+
schema_exports7 as SpeechmaticsTypes,
|
|
39738
39361
|
speechmaticsASRRESTAPI_zod_exports as SpeechmaticsZodSchemas,
|
|
39739
39362
|
StreamingProviders,
|
|
39740
39363
|
StreamingSupportedBitDepthEnum,
|