voice-router-dev 0.9.0 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +40 -0
- package/dist/constants.d.mts +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/{field-configs-DYiUtRUz.d.mts → field-configs-FbtCPxzs.d.mts} +5730 -5786
- package/dist/{field-configs-DYiUtRUz.d.ts → field-configs-FbtCPxzs.d.ts} +5730 -5786
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +42 -51
- package/dist/field-configs.mjs +42 -51
- package/dist/index.d.mts +840 -1161
- package/dist/index.d.ts +840 -1161
- package/dist/index.js +613 -701
- package/dist/index.mjs +613 -701
- package/dist/{speechToTextChunkResponseModel-CI-Aqxcr.d.ts → speechToTextChunkResponseModel-BY2lGyZ3.d.ts} +319 -1
- package/dist/{speechToTextChunkResponseModel-D8VJ-wz6.d.mts → speechToTextChunkResponseModel-KayxDiZ7.d.mts} +319 -1
- package/dist/webhooks.d.mts +1 -1
- package/dist/webhooks.d.ts +1 -1
- package/package.json +2 -1
package/dist/index.mjs
CHANGED
|
@@ -5835,23 +5835,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
5835
5835
|
"AssemblyAI adapter currently only supports URL-based audio input. Use audio.type='url'"
|
|
5836
5836
|
);
|
|
5837
5837
|
}
|
|
5838
|
-
const
|
|
5839
|
-
|
|
5840
|
-
|
|
5841
|
-
|
|
5842
|
-
|
|
5843
|
-
|
|
5838
|
+
const passthrough = options?.assemblyai;
|
|
5839
|
+
let speechModels;
|
|
5840
|
+
if (passthrough?.speech_model != null && !passthrough.speech_models) {
|
|
5841
|
+
speechModels = [passthrough.speech_model];
|
|
5842
|
+
} else if (passthrough?.speech_models) {
|
|
5843
|
+
speechModels = passthrough.speech_models;
|
|
5844
5844
|
}
|
|
5845
|
+
const { speech_model: _deprecated, ...typedOpts } = passthrough ?? {};
|
|
5845
5846
|
const request = {
|
|
5846
|
-
...
|
|
5847
|
+
...typedOpts,
|
|
5847
5848
|
audio_url: audioUrl,
|
|
5848
5849
|
// speech_models is required — default to universal-3-pro
|
|
5849
|
-
speech_models:
|
|
5850
|
-
"universal-3-pro"
|
|
5851
|
-
],
|
|
5850
|
+
speech_models: speechModels ?? ["universal-3-pro"],
|
|
5852
5851
|
// Enable punctuation and formatting by default
|
|
5853
|
-
punctuate:
|
|
5854
|
-
format_text:
|
|
5852
|
+
punctuate: typedOpts.punctuate ?? true,
|
|
5853
|
+
format_text: typedOpts.format_text ?? true
|
|
5855
5854
|
};
|
|
5856
5855
|
if (options) {
|
|
5857
5856
|
if (options.model) {
|
|
@@ -5899,22 +5898,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
5899
5898
|
normalizeResponse(response) {
|
|
5900
5899
|
let status;
|
|
5901
5900
|
switch (response.status) {
|
|
5902
|
-
case
|
|
5901
|
+
case "queued":
|
|
5903
5902
|
status = "queued";
|
|
5904
5903
|
break;
|
|
5905
|
-
case
|
|
5904
|
+
case "processing":
|
|
5906
5905
|
status = "processing";
|
|
5907
5906
|
break;
|
|
5908
|
-
case
|
|
5907
|
+
case "completed":
|
|
5909
5908
|
status = "completed";
|
|
5910
5909
|
break;
|
|
5911
|
-
case
|
|
5910
|
+
case "error":
|
|
5912
5911
|
status = "error";
|
|
5913
5912
|
break;
|
|
5914
5913
|
default:
|
|
5915
5914
|
status = "queued";
|
|
5916
5915
|
}
|
|
5917
|
-
if (response.status ===
|
|
5916
|
+
if (response.status === "error") {
|
|
5918
5917
|
return {
|
|
5919
5918
|
success: false,
|
|
5920
5919
|
provider: this.name,
|
|
@@ -6566,8 +6565,14 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6566
6565
|
/**
|
|
6567
6566
|
* Submit audio for transcription
|
|
6568
6567
|
*
|
|
6569
|
-
* Sends audio to Deepgram API for transcription. Deepgram processes
|
|
6570
|
-
* synchronously and returns results immediately
|
|
6568
|
+
* Sends audio to Deepgram API for transcription. Deepgram normally processes
|
|
6569
|
+
* synchronously and returns results immediately.
|
|
6570
|
+
*
|
|
6571
|
+
* **Callback mode:** When `webhookUrl` is set, Deepgram returns immediately
|
|
6572
|
+
* with a `request_id` (status `"queued"`). The full transcript is POSTed to
|
|
6573
|
+
* the webhook URL — this is the primary delivery mechanism. `getTranscript()`
|
|
6574
|
+
* can attempt to retrieve the result later via request history, but that
|
|
6575
|
+
* endpoint is best-effort and not a guaranteed durable store.
|
|
6571
6576
|
*
|
|
6572
6577
|
* @param audio - Audio input (URL or file buffer)
|
|
6573
6578
|
* @param options - Transcription options
|
|
@@ -6618,47 +6623,81 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6618
6623
|
{ params }
|
|
6619
6624
|
).then((res) => res.data);
|
|
6620
6625
|
} else if (audio.type === "file") {
|
|
6621
|
-
response = await this.client.post(
|
|
6622
|
-
|
|
6623
|
-
|
|
6624
|
-
|
|
6626
|
+
response = await this.client.post(
|
|
6627
|
+
"/listen",
|
|
6628
|
+
audio.file,
|
|
6629
|
+
{
|
|
6630
|
+
params,
|
|
6631
|
+
headers: {
|
|
6632
|
+
"Content-Type": "audio/*"
|
|
6633
|
+
}
|
|
6625
6634
|
}
|
|
6626
|
-
|
|
6635
|
+
).then((res) => res.data);
|
|
6627
6636
|
} else {
|
|
6628
6637
|
throw new Error(
|
|
6629
6638
|
"Deepgram adapter does not support stream type for pre-recorded transcription. Use transcribeStream() for real-time streaming."
|
|
6630
6639
|
);
|
|
6631
6640
|
}
|
|
6641
|
+
if (options?.webhookUrl) {
|
|
6642
|
+
const requestId = ("request_id" in response ? response.request_id : void 0) || ("metadata" in response ? response.metadata?.request_id : void 0);
|
|
6643
|
+
if (!requestId) {
|
|
6644
|
+
return {
|
|
6645
|
+
success: false,
|
|
6646
|
+
provider: this.name,
|
|
6647
|
+
error: {
|
|
6648
|
+
code: "MISSING_REQUEST_ID",
|
|
6649
|
+
message: "Deepgram callback mode did not return a request ID"
|
|
6650
|
+
},
|
|
6651
|
+
raw: response
|
|
6652
|
+
};
|
|
6653
|
+
}
|
|
6654
|
+
return {
|
|
6655
|
+
success: true,
|
|
6656
|
+
provider: this.name,
|
|
6657
|
+
data: {
|
|
6658
|
+
id: requestId,
|
|
6659
|
+
text: "",
|
|
6660
|
+
status: "queued"
|
|
6661
|
+
},
|
|
6662
|
+
tracking: {
|
|
6663
|
+
requestId
|
|
6664
|
+
},
|
|
6665
|
+
raw: response
|
|
6666
|
+
};
|
|
6667
|
+
}
|
|
6668
|
+
if (!("results" in response) || !("metadata" in response)) {
|
|
6669
|
+
return {
|
|
6670
|
+
success: false,
|
|
6671
|
+
provider: this.name,
|
|
6672
|
+
error: {
|
|
6673
|
+
code: "INVALID_RESPONSE",
|
|
6674
|
+
message: "Deepgram did not return a synchronous transcription payload"
|
|
6675
|
+
},
|
|
6676
|
+
raw: response
|
|
6677
|
+
};
|
|
6678
|
+
}
|
|
6632
6679
|
return this.normalizeResponse(response);
|
|
6633
6680
|
} catch (error) {
|
|
6634
6681
|
return this.createErrorResponse(error);
|
|
6635
6682
|
}
|
|
6636
6683
|
}
|
|
6637
6684
|
/**
|
|
6638
|
-
* Get transcription result by ID
|
|
6685
|
+
* Get transcription result by ID (best-effort)
|
|
6639
6686
|
*
|
|
6640
|
-
* Retrieves a previous transcription from Deepgram's request history.
|
|
6641
|
-
*
|
|
6642
|
-
* Unlike the list endpoint, getting a single request DOES include the full
|
|
6643
|
-
* transcript response. Requires `projectId` to be set during initialization.
|
|
6687
|
+
* Retrieves a previous transcription from Deepgram's request history API.
|
|
6688
|
+
* Requires `projectId` to be set during initialization.
|
|
6644
6689
|
*
|
|
6645
|
-
*
|
|
6646
|
-
*
|
|
6690
|
+
* **Important:** Deepgram's request history is best-effort. Requests may
|
|
6691
|
+
* expire or be unavailable depending on your plan and retention settings.
|
|
6692
|
+
* This is NOT a durable transcript store — for reliable retrieval, use
|
|
6693
|
+
* callback mode (`webhookUrl`) and persist the webhook payload yourself.
|
|
6647
6694
|
*
|
|
6648
|
-
*
|
|
6649
|
-
*
|
|
6650
|
-
*
|
|
6651
|
-
* adapter.initialize({
|
|
6652
|
-
* apiKey: process.env.DEEPGRAM_API_KEY,
|
|
6653
|
-
* projectId: process.env.DEEPGRAM_PROJECT_ID
|
|
6654
|
-
* })
|
|
6695
|
+
* The response field on the request history entry is cast to
|
|
6696
|
+
* `ListenV1Response` — this appears to work in practice but is not
|
|
6697
|
+
* explicitly documented by Deepgram as a guaranteed contract.
|
|
6655
6698
|
*
|
|
6656
|
-
*
|
|
6657
|
-
* if
|
|
6658
|
-
* console.log(result.data?.text)
|
|
6659
|
-
* console.log(result.data?.words)
|
|
6660
|
-
* }
|
|
6661
|
-
* ```
|
|
6699
|
+
* @param transcriptId - Request ID from a previous transcription
|
|
6700
|
+
* @returns Transcript response if still available in request history
|
|
6662
6701
|
*
|
|
6663
6702
|
* @see https://developers.deepgram.com/reference/get-request
|
|
6664
6703
|
*/
|
|
@@ -7289,7 +7328,8 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
7289
7328
|
break;
|
|
7290
7329
|
}
|
|
7291
7330
|
case "Metadata": {
|
|
7292
|
-
|
|
7331
|
+
const { type: _, ...metadata } = message;
|
|
7332
|
+
callbacks?.onMetadata?.(metadata);
|
|
7293
7333
|
break;
|
|
7294
7334
|
}
|
|
7295
7335
|
case "Error": {
|
|
@@ -7725,10 +7765,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
7725
7765
|
contentUrls: [audio.url],
|
|
7726
7766
|
properties: this.buildTranscriptionProperties(options)
|
|
7727
7767
|
};
|
|
7728
|
-
const response = await transcriptionsCreate(
|
|
7729
|
-
transcriptionRequest,
|
|
7730
|
-
this.getAxiosConfig()
|
|
7731
|
-
);
|
|
7768
|
+
const response = await transcriptionsCreate(transcriptionRequest, this.getAxiosConfig());
|
|
7732
7769
|
const transcription = response.data;
|
|
7733
7770
|
const transcriptId = transcription.self?.split("/").pop() || "";
|
|
7734
7771
|
return await this.pollForCompletion(transcriptId);
|
|
@@ -8268,7 +8305,6 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
8268
8305
|
const request = {
|
|
8269
8306
|
...options?.openai,
|
|
8270
8307
|
file: audioData,
|
|
8271
|
-
// Buffer/Blob both accepted at runtime; generated type expects Blob
|
|
8272
8308
|
model
|
|
8273
8309
|
};
|
|
8274
8310
|
if (options?.language) {
|
|
@@ -8288,11 +8324,7 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
8288
8324
|
request.response_format = OpenAIResponseFormat.json;
|
|
8289
8325
|
}
|
|
8290
8326
|
const response = await createTranscription(request, this.getAxiosConfig());
|
|
8291
|
-
return this.normalizeResponse(
|
|
8292
|
-
response.data,
|
|
8293
|
-
model,
|
|
8294
|
-
isDiarization
|
|
8295
|
-
);
|
|
8327
|
+
return this.normalizeResponse(response.data, model, isDiarization);
|
|
8296
8328
|
} catch (error) {
|
|
8297
8329
|
return this.createErrorResponse(error);
|
|
8298
8330
|
}
|
|
@@ -8699,7 +8731,6 @@ function createOpenAIWhisperAdapter(config) {
|
|
|
8699
8731
|
|
|
8700
8732
|
// src/adapters/speechmatics-adapter.ts
|
|
8701
8733
|
import axios8 from "axios";
|
|
8702
|
-
import WebSocket6 from "ws";
|
|
8703
8734
|
|
|
8704
8735
|
// src/generated/speechmatics/schema/notificationConfigContentsItem.ts
|
|
8705
8736
|
var NotificationConfigContentsItem = {
|
|
@@ -8884,16 +8915,13 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8884
8915
|
jobConfig.fetch_data = {
|
|
8885
8916
|
url: audio.url
|
|
8886
8917
|
};
|
|
8887
|
-
|
|
8888
|
-
|
|
8889
|
-
requestBody = formData;
|
|
8890
|
-
headers = { "Content-Type": "multipart/form-data" };
|
|
8918
|
+
requestBody = { config: JSON.stringify(jobConfig) };
|
|
8919
|
+
headers = { "Content-Type": "application/json" };
|
|
8891
8920
|
} else if (audio.type === "file") {
|
|
8892
|
-
|
|
8893
|
-
|
|
8894
|
-
|
|
8895
|
-
|
|
8896
|
-
requestBody = formData;
|
|
8921
|
+
requestBody = {
|
|
8922
|
+
config: JSON.stringify(jobConfig),
|
|
8923
|
+
data_file: audio.file
|
|
8924
|
+
};
|
|
8897
8925
|
headers = { "Content-Type": "multipart/form-data" };
|
|
8898
8926
|
} else {
|
|
8899
8927
|
return {
|
|
@@ -8999,216 +9027,224 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8999
9027
|
}
|
|
9000
9028
|
}
|
|
9001
9029
|
/**
|
|
9002
|
-
*
|
|
9030
|
+
* Get the regional WebSocket host for real-time streaming
|
|
9003
9031
|
*
|
|
9004
|
-
*
|
|
9005
|
-
* - Batch: {region}.asr.api.speechmatics.com
|
|
9006
|
-
* - Real-time: {region}.rt.speechmatics.com
|
|
9007
|
-
*
|
|
9008
|
-
* @param region - Regional endpoint identifier
|
|
9009
|
-
* @returns WebSocket URL for real-time API
|
|
9032
|
+
* Speechmatics RT uses a different host pattern: {region}.rt.speechmatics.com
|
|
9010
9033
|
*/
|
|
9011
|
-
|
|
9012
|
-
|
|
9013
|
-
|
|
9014
|
-
}
|
|
9015
|
-
const rtRegionMap = {
|
|
9016
|
-
eu1: "eu",
|
|
9017
|
-
eu2: "eu",
|
|
9018
|
-
us1: "us",
|
|
9019
|
-
us2: "us",
|
|
9020
|
-
au1: "eu"
|
|
9021
|
-
// No AU RT endpoint — fall back to EU
|
|
9022
|
-
};
|
|
9023
|
-
const rtPrefix = rtRegionMap[region || ""] || "eu";
|
|
9024
|
-
return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
|
|
9034
|
+
getRegionalWsHost(region) {
|
|
9035
|
+
const regionPrefix = region || "eu1";
|
|
9036
|
+
return `${regionPrefix}.rt.speechmatics.com`;
|
|
9025
9037
|
}
|
|
9026
9038
|
/**
|
|
9027
|
-
* Stream audio for real-time transcription
|
|
9028
|
-
*
|
|
9029
|
-
* Connects to Speechmatics' real-time API and sends audio chunks
|
|
9030
|
-
* for transcription with results returned via callbacks.
|
|
9039
|
+
* Stream audio for real-time transcription
|
|
9031
9040
|
*
|
|
9032
|
-
*
|
|
9033
|
-
*
|
|
9034
|
-
*
|
|
9041
|
+
* Creates a WebSocket connection to the Speechmatics Real-Time API.
|
|
9042
|
+
* Protocol: send StartRecognition config, then AddAudio binary frames,
|
|
9043
|
+
* receive AddPartialTranscript/AddTranscript/EndOfUtterance messages.
|
|
9035
9044
|
*
|
|
9036
|
-
* @
|
|
9037
|
-
*
|
|
9038
|
-
*
|
|
9039
|
-
* language: 'en',
|
|
9040
|
-
* speechmaticsStreaming: {
|
|
9041
|
-
* enablePartials: true,
|
|
9042
|
-
* operatingPoint: 'enhanced'
|
|
9043
|
-
* }
|
|
9044
|
-
* }, {
|
|
9045
|
-
* onTranscript: (event) => console.log(event.text),
|
|
9046
|
-
* onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
|
|
9047
|
-
* onError: (error) => console.error(error)
|
|
9048
|
-
* });
|
|
9045
|
+
* @param options - Streaming configuration
|
|
9046
|
+
* @param callbacks - Event callbacks
|
|
9047
|
+
* @returns StreamingSession for sending audio and closing
|
|
9049
9048
|
*
|
|
9050
|
-
*
|
|
9051
|
-
* await session.close();
|
|
9052
|
-
* ```
|
|
9049
|
+
* @see https://docs.speechmatics.com/rt-api-ref
|
|
9053
9050
|
*/
|
|
9054
9051
|
async transcribeStream(options, callbacks) {
|
|
9055
9052
|
this.validateConfig();
|
|
9056
|
-
const
|
|
9057
|
-
const
|
|
9058
|
-
const
|
|
9059
|
-
const
|
|
9060
|
-
|
|
9061
|
-
|
|
9062
|
-
|
|
9063
|
-
|
|
9064
|
-
|
|
9065
|
-
const
|
|
9066
|
-
|
|
9067
|
-
|
|
9068
|
-
|
|
9069
|
-
|
|
9070
|
-
|
|
9071
|
-
|
|
9072
|
-
|
|
9073
|
-
|
|
9074
|
-
|
|
9075
|
-
|
|
9076
|
-
}
|
|
9077
|
-
|
|
9078
|
-
|
|
9079
|
-
|
|
9080
|
-
|
|
9081
|
-
const startMsg = {
|
|
9082
|
-
message: "StartRecognition",
|
|
9083
|
-
audio_format: {
|
|
9084
|
-
type: "raw",
|
|
9085
|
-
encoding,
|
|
9086
|
-
sample_rate: sampleRate
|
|
9087
|
-
},
|
|
9088
|
-
transcription_config: {
|
|
9089
|
-
language: smOpts.language || options?.language || "en",
|
|
9090
|
-
enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
|
|
9091
|
-
}
|
|
9092
|
-
};
|
|
9093
|
-
const txConfig = startMsg.transcription_config;
|
|
9094
|
-
if (smOpts.domain) txConfig.domain = smOpts.domain;
|
|
9095
|
-
if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
|
|
9096
|
-
if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
|
|
9097
|
-
if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
|
|
9098
|
-
if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
|
|
9099
|
-
if (smOpts.diarization === "speaker" || options?.diarization) {
|
|
9100
|
-
txConfig.diarization = "speaker";
|
|
9101
|
-
if (smOpts.maxSpeakers) {
|
|
9102
|
-
txConfig.speaker_diarization_config = {
|
|
9103
|
-
max_speakers: smOpts.maxSpeakers
|
|
9104
|
-
};
|
|
9105
|
-
} else if (options?.speakersExpected) {
|
|
9106
|
-
txConfig.speaker_diarization_config = {
|
|
9107
|
-
max_speakers: options.speakersExpected
|
|
9108
|
-
};
|
|
9109
|
-
}
|
|
9110
|
-
}
|
|
9111
|
-
if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
|
|
9112
|
-
txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
|
|
9113
|
-
content: word
|
|
9114
|
-
}));
|
|
9115
|
-
} else if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9116
|
-
txConfig.additional_vocab = options.customVocabulary.map((word) => ({
|
|
9117
|
-
content: word
|
|
9118
|
-
}));
|
|
9119
|
-
}
|
|
9120
|
-
if (smOpts.conversationConfig) {
|
|
9121
|
-
txConfig.conversation_config = {
|
|
9122
|
-
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9123
|
-
};
|
|
9124
|
-
}
|
|
9125
|
-
const startPayload = JSON.stringify(startMsg);
|
|
9126
|
-
if (callbacks?.onRawMessage) {
|
|
9127
|
-
callbacks.onRawMessage({
|
|
9128
|
-
provider: "speechmatics",
|
|
9129
|
-
direction: "outgoing",
|
|
9130
|
-
timestamp: Date.now(),
|
|
9131
|
-
payload: startPayload,
|
|
9132
|
-
messageType: "StartRecognition"
|
|
9133
|
-
});
|
|
9053
|
+
const sessionId = `speechmatics_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9054
|
+
const createdAt = /* @__PURE__ */ new Date();
|
|
9055
|
+
const smOpts = options?.speechmaticsStreaming;
|
|
9056
|
+
const region = smOpts?.region || this.config?.region;
|
|
9057
|
+
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost(region)}`);
|
|
9058
|
+
const wsUrl = `${wsBase}/v2`;
|
|
9059
|
+
let status = "connecting";
|
|
9060
|
+
let recognitionStarted = false;
|
|
9061
|
+
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : __require("ws");
|
|
9062
|
+
const ws = new WebSocketImpl(wsUrl);
|
|
9063
|
+
const language = smOpts?.language || options?.language || "en";
|
|
9064
|
+
const transcriptionConfig = {
|
|
9065
|
+
language,
|
|
9066
|
+
enable_entities: smOpts?.enableEntities ?? options?.entityDetection ?? false,
|
|
9067
|
+
enable_partials: smOpts?.enablePartials ?? options?.interimResults !== false,
|
|
9068
|
+
operating_point: smOpts?.operatingPoint || OperatingPoint.enhanced,
|
|
9069
|
+
...smOpts?.maxDelay !== void 0 && { max_delay: smOpts.maxDelay },
|
|
9070
|
+
...smOpts?.maxDelayMode && {
|
|
9071
|
+
max_delay_mode: smOpts.maxDelayMode
|
|
9072
|
+
},
|
|
9073
|
+
...smOpts?.domain && { domain: smOpts.domain },
|
|
9074
|
+
...(options?.diarization || smOpts?.diarization === TranscriptionConfigDiarization.speaker) && {
|
|
9075
|
+
diarization: TranscriptionConfigDiarization.speaker,
|
|
9076
|
+
...smOpts?.maxSpeakers !== void 0 && {
|
|
9077
|
+
speaker_diarization_config: { max_speakers: smOpts.maxSpeakers }
|
|
9134
9078
|
}
|
|
9135
|
-
|
|
9136
|
-
|
|
9137
|
-
|
|
9138
|
-
|
|
9139
|
-
|
|
9140
|
-
|
|
9141
|
-
|
|
9142
|
-
|
|
9143
|
-
|
|
9144
|
-
|
|
9145
|
-
|
|
9146
|
-
|
|
9147
|
-
|
|
9148
|
-
|
|
9149
|
-
|
|
9150
|
-
|
|
9151
|
-
|
|
9079
|
+
},
|
|
9080
|
+
...(options?.customVocabulary?.length || smOpts?.additionalVocab?.length) && {
|
|
9081
|
+
additional_vocab: (smOpts?.additionalVocab || options?.customVocabulary || []).map(
|
|
9082
|
+
(term) => ({ content: term })
|
|
9083
|
+
)
|
|
9084
|
+
}
|
|
9085
|
+
};
|
|
9086
|
+
const startRecognition = {
|
|
9087
|
+
message: "StartRecognition",
|
|
9088
|
+
audio_format: {
|
|
9089
|
+
type: "raw",
|
|
9090
|
+
encoding: smOpts?.encoding || "pcm_s16le",
|
|
9091
|
+
sample_rate: smOpts?.sampleRate || options?.sampleRate || 16e3
|
|
9092
|
+
},
|
|
9093
|
+
transcription_config: transcriptionConfig,
|
|
9094
|
+
...smOpts?.conversationConfig && {
|
|
9095
|
+
conversation_config: {
|
|
9096
|
+
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9152
9097
|
}
|
|
9153
|
-
}
|
|
9154
|
-
|
|
9155
|
-
|
|
9156
|
-
|
|
9157
|
-
const
|
|
9098
|
+
}
|
|
9099
|
+
};
|
|
9100
|
+
ws.onopen = () => {
|
|
9101
|
+
status = "open";
|
|
9102
|
+
const msg = JSON.stringify(startRecognition);
|
|
9103
|
+
if (callbacks?.onRawMessage) {
|
|
9104
|
+
callbacks.onRawMessage({
|
|
9105
|
+
provider: this.name,
|
|
9106
|
+
direction: "outgoing",
|
|
9107
|
+
timestamp: Date.now(),
|
|
9108
|
+
payload: msg,
|
|
9109
|
+
messageType: "StartRecognition"
|
|
9110
|
+
});
|
|
9111
|
+
}
|
|
9112
|
+
ws.send(msg);
|
|
9113
|
+
};
|
|
9114
|
+
ws.onmessage = (event) => {
|
|
9115
|
+
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
9158
9116
|
try {
|
|
9159
|
-
const
|
|
9117
|
+
const data = JSON.parse(rawPayload);
|
|
9118
|
+
const messageType = data.message;
|
|
9160
9119
|
if (callbacks?.onRawMessage) {
|
|
9161
9120
|
callbacks.onRawMessage({
|
|
9162
|
-
provider:
|
|
9121
|
+
provider: this.name,
|
|
9163
9122
|
direction: "incoming",
|
|
9164
9123
|
timestamp: Date.now(),
|
|
9165
9124
|
payload: rawPayload,
|
|
9166
|
-
messageType
|
|
9125
|
+
messageType
|
|
9167
9126
|
});
|
|
9168
9127
|
}
|
|
9169
|
-
|
|
9170
|
-
|
|
9171
|
-
|
|
9172
|
-
|
|
9173
|
-
|
|
9174
|
-
|
|
9175
|
-
|
|
9176
|
-
|
|
9177
|
-
|
|
9178
|
-
}
|
|
9128
|
+
switch (messageType) {
|
|
9129
|
+
case "RecognitionStarted": {
|
|
9130
|
+
recognitionStarted = true;
|
|
9131
|
+
callbacks?.onOpen?.();
|
|
9132
|
+
callbacks?.onMetadata?.({
|
|
9133
|
+
id: data.id,
|
|
9134
|
+
languagePackInfo: data.language_pack_info
|
|
9135
|
+
});
|
|
9136
|
+
break;
|
|
9137
|
+
}
|
|
9138
|
+
case "AddPartialTranscript": {
|
|
9139
|
+
const partial = data;
|
|
9140
|
+
const words = this.resultsToWords(partial.results);
|
|
9141
|
+
callbacks?.onTranscript?.({
|
|
9142
|
+
type: "transcript",
|
|
9143
|
+
text: partial.metadata.transcript,
|
|
9144
|
+
isFinal: false,
|
|
9145
|
+
words,
|
|
9146
|
+
speaker: words[0]?.speaker,
|
|
9147
|
+
confidence: partial.results[0]?.alternatives?.[0]?.confidence,
|
|
9148
|
+
channel: partial.channel ? parseInt(partial.channel) : void 0
|
|
9149
|
+
});
|
|
9150
|
+
break;
|
|
9151
|
+
}
|
|
9152
|
+
case "AddTranscript": {
|
|
9153
|
+
const final = data;
|
|
9154
|
+
const words = this.resultsToWords(final.results);
|
|
9155
|
+
callbacks?.onTranscript?.({
|
|
9156
|
+
type: "transcript",
|
|
9157
|
+
text: final.metadata.transcript,
|
|
9158
|
+
isFinal: true,
|
|
9159
|
+
words,
|
|
9160
|
+
speaker: words[0]?.speaker,
|
|
9161
|
+
confidence: final.results[0]?.alternatives?.[0]?.confidence,
|
|
9162
|
+
channel: final.channel ? parseInt(final.channel) : void 0
|
|
9163
|
+
});
|
|
9164
|
+
if (options?.diarization || smOpts?.diarization === "speaker") {
|
|
9165
|
+
const utterances = buildUtterancesFromWords(words);
|
|
9166
|
+
for (const utterance of utterances) {
|
|
9167
|
+
callbacks?.onUtterance?.(utterance);
|
|
9168
|
+
}
|
|
9169
|
+
}
|
|
9170
|
+
break;
|
|
9171
|
+
}
|
|
9172
|
+
case "EndOfUtterance": {
|
|
9173
|
+
break;
|
|
9174
|
+
}
|
|
9175
|
+
case "EndOfTranscript": {
|
|
9176
|
+
callbacks?.onClose?.(1e3, "Transcription complete");
|
|
9177
|
+
break;
|
|
9178
|
+
}
|
|
9179
|
+
case "Error": {
|
|
9180
|
+
const err = data;
|
|
9181
|
+
callbacks?.onError?.({
|
|
9182
|
+
code: err.type || "SPEECHMATICS_ERROR",
|
|
9183
|
+
message: err.reason || "Unknown error"
|
|
9184
|
+
});
|
|
9185
|
+
break;
|
|
9186
|
+
}
|
|
9187
|
+
case "Warning": {
|
|
9188
|
+
const warn = data;
|
|
9189
|
+
callbacks?.onMetadata?.({
|
|
9190
|
+
warning: warn.type,
|
|
9191
|
+
reason: warn.reason
|
|
9192
|
+
});
|
|
9193
|
+
break;
|
|
9194
|
+
}
|
|
9195
|
+
case "Info": {
|
|
9196
|
+
callbacks?.onMetadata?.(data);
|
|
9197
|
+
break;
|
|
9198
|
+
}
|
|
9199
|
+
case "AudioAdded":
|
|
9200
|
+
case "ChannelAudioAdded":
|
|
9201
|
+
break;
|
|
9202
|
+
default:
|
|
9203
|
+
callbacks?.onMetadata?.(data);
|
|
9204
|
+
break;
|
|
9179
9205
|
}
|
|
9206
|
+
} catch (error) {
|
|
9180
9207
|
callbacks?.onError?.({
|
|
9181
9208
|
code: "PARSE_ERROR",
|
|
9182
|
-
message:
|
|
9183
|
-
details: error
|
|
9209
|
+
message: `Failed to parse message: ${error}`
|
|
9184
9210
|
});
|
|
9185
9211
|
}
|
|
9186
|
-
}
|
|
9187
|
-
ws.
|
|
9212
|
+
};
|
|
9213
|
+
ws.onerror = () => {
|
|
9188
9214
|
callbacks?.onError?.({
|
|
9189
9215
|
code: "WEBSOCKET_ERROR",
|
|
9190
|
-
message: error
|
|
9191
|
-
details: error
|
|
9216
|
+
message: "WebSocket error occurred"
|
|
9192
9217
|
});
|
|
9218
|
+
};
|
|
9219
|
+
ws.onclose = (event) => {
|
|
9220
|
+
status = "closed";
|
|
9221
|
+
callbacks?.onClose?.(event.code, event.reason);
|
|
9222
|
+
};
|
|
9223
|
+
await new Promise((resolve, reject) => {
|
|
9224
|
+
const timeout = setTimeout(() => {
|
|
9225
|
+
reject(new Error("WebSocket connection timeout"));
|
|
9226
|
+
}, 1e4);
|
|
9227
|
+
const checkReady = () => {
|
|
9228
|
+
if (recognitionStarted) {
|
|
9229
|
+
clearTimeout(timeout);
|
|
9230
|
+
resolve();
|
|
9231
|
+
} else if (status === "closed") {
|
|
9232
|
+
clearTimeout(timeout);
|
|
9233
|
+
reject(new Error("WebSocket connection failed"));
|
|
9234
|
+
} else {
|
|
9235
|
+
setTimeout(checkReady, 100);
|
|
9236
|
+
}
|
|
9237
|
+
};
|
|
9238
|
+
checkReady();
|
|
9193
9239
|
});
|
|
9194
|
-
ws.on("close", (code, reason) => {
|
|
9195
|
-
sessionStatus = "closed";
|
|
9196
|
-
callbacks?.onClose?.(code, reason.toString());
|
|
9197
|
-
});
|
|
9198
|
-
await sessionReady;
|
|
9199
|
-
sessionStatus = "open";
|
|
9200
|
-
callbacks?.onOpen?.();
|
|
9201
9240
|
return {
|
|
9202
9241
|
id: sessionId,
|
|
9203
9242
|
provider: this.name,
|
|
9204
|
-
createdAt
|
|
9205
|
-
getStatus: () =>
|
|
9243
|
+
createdAt,
|
|
9244
|
+
getStatus: () => status,
|
|
9206
9245
|
sendAudio: async (chunk) => {
|
|
9207
|
-
if (
|
|
9208
|
-
throw new Error(
|
|
9209
|
-
}
|
|
9210
|
-
if (ws.readyState !== WebSocket6.OPEN) {
|
|
9211
|
-
throw new Error("WebSocket is not open");
|
|
9246
|
+
if (status !== "open") {
|
|
9247
|
+
throw new Error("Session is not open");
|
|
9212
9248
|
}
|
|
9213
9249
|
if (callbacks?.onRawMessage) {
|
|
9214
9250
|
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
@@ -9224,12 +9260,11 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9224
9260
|
});
|
|
9225
9261
|
}
|
|
9226
9262
|
ws.send(chunk.data);
|
|
9227
|
-
|
|
9228
|
-
|
|
9229
|
-
|
|
9230
|
-
|
|
9231
|
-
|
|
9232
|
-
});
|
|
9263
|
+
},
|
|
9264
|
+
close: async () => {
|
|
9265
|
+
if (status === "open") {
|
|
9266
|
+
status = "closing";
|
|
9267
|
+
const endMsg = JSON.stringify({ message: "EndOfStream", last_seq_no: 0 });
|
|
9233
9268
|
if (callbacks?.onRawMessage) {
|
|
9234
9269
|
callbacks.onRawMessage({
|
|
9235
9270
|
provider: this.name,
|
|
@@ -9241,144 +9276,19 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9241
9276
|
}
|
|
9242
9277
|
ws.send(endMsg);
|
|
9243
9278
|
}
|
|
9244
|
-
},
|
|
9245
|
-
close: async () => {
|
|
9246
|
-
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
9247
|
-
return;
|
|
9248
|
-
}
|
|
9249
|
-
sessionStatus = "closing";
|
|
9250
|
-
if (ws.readyState === WebSocket6.OPEN) {
|
|
9251
|
-
seqNo++;
|
|
9252
|
-
ws.send(
|
|
9253
|
-
JSON.stringify({
|
|
9254
|
-
message: "EndOfStream",
|
|
9255
|
-
last_seq_no: seqNo
|
|
9256
|
-
})
|
|
9257
|
-
);
|
|
9258
|
-
}
|
|
9259
|
-
return new Promise((resolve) => {
|
|
9260
|
-
const timeout = setTimeout(() => {
|
|
9261
|
-
ws.terminate();
|
|
9262
|
-
sessionStatus = "closed";
|
|
9263
|
-
resolve();
|
|
9264
|
-
}, 5e3);
|
|
9265
|
-
const onMsg = (data) => {
|
|
9266
|
-
try {
|
|
9267
|
-
const msg = JSON.parse(data.toString());
|
|
9268
|
-
if (msg.message === "EndOfTranscript") {
|
|
9269
|
-
ws.removeListener("message", onMsg);
|
|
9270
|
-
clearTimeout(timeout);
|
|
9271
|
-
ws.close();
|
|
9272
|
-
}
|
|
9273
|
-
} catch {
|
|
9274
|
-
}
|
|
9275
|
-
};
|
|
9276
|
-
ws.on("message", onMsg);
|
|
9277
|
-
ws.once("close", () => {
|
|
9278
|
-
clearTimeout(timeout);
|
|
9279
|
-
sessionStatus = "closed";
|
|
9280
|
-
resolve();
|
|
9281
|
-
});
|
|
9282
|
-
});
|
|
9283
9279
|
}
|
|
9284
9280
|
};
|
|
9285
9281
|
}
|
|
9286
9282
|
/**
|
|
9287
|
-
*
|
|
9288
|
-
*/
|
|
9289
|
-
handleStreamingMessage(message, callbacks, utteranceResults) {
|
|
9290
|
-
switch (message.message) {
|
|
9291
|
-
case "RecognitionStarted": {
|
|
9292
|
-
break;
|
|
9293
|
-
}
|
|
9294
|
-
case "AddPartialTranscript": {
|
|
9295
|
-
const results = message.results || [];
|
|
9296
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9297
|
-
if (text) {
|
|
9298
|
-
callbacks?.onTranscript?.({
|
|
9299
|
-
type: "transcript",
|
|
9300
|
-
text,
|
|
9301
|
-
isFinal: false,
|
|
9302
|
-
words: this.extractWordsFromResults(results),
|
|
9303
|
-
data: message
|
|
9304
|
-
});
|
|
9305
|
-
}
|
|
9306
|
-
break;
|
|
9307
|
-
}
|
|
9308
|
-
case "AddTranscript": {
|
|
9309
|
-
const results = message.results || [];
|
|
9310
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9311
|
-
if (utteranceResults) {
|
|
9312
|
-
utteranceResults.push(...results);
|
|
9313
|
-
}
|
|
9314
|
-
if (text) {
|
|
9315
|
-
callbacks?.onTranscript?.({
|
|
9316
|
-
type: "transcript",
|
|
9317
|
-
text,
|
|
9318
|
-
isFinal: true,
|
|
9319
|
-
words: this.extractWordsFromResults(results),
|
|
9320
|
-
data: message
|
|
9321
|
-
});
|
|
9322
|
-
}
|
|
9323
|
-
break;
|
|
9324
|
-
}
|
|
9325
|
-
case "EndOfUtterance": {
|
|
9326
|
-
if (utteranceResults && utteranceResults.length > 0) {
|
|
9327
|
-
const text = buildTextFromSpeechmaticsResults(utteranceResults);
|
|
9328
|
-
const words = this.extractWordsFromResults(utteranceResults);
|
|
9329
|
-
const utterances = buildUtterancesFromWords(words);
|
|
9330
|
-
if (utterances.length > 0) {
|
|
9331
|
-
for (const utt of utterances) {
|
|
9332
|
-
callbacks?.onUtterance?.(utt);
|
|
9333
|
-
}
|
|
9334
|
-
} else if (text) {
|
|
9335
|
-
callbacks?.onUtterance?.({
|
|
9336
|
-
text,
|
|
9337
|
-
start: words.length > 0 ? words[0].start : 0,
|
|
9338
|
-
end: words.length > 0 ? words[words.length - 1].end : 0,
|
|
9339
|
-
words
|
|
9340
|
-
});
|
|
9341
|
-
}
|
|
9342
|
-
utteranceResults.length = 0;
|
|
9343
|
-
}
|
|
9344
|
-
break;
|
|
9345
|
-
}
|
|
9346
|
-
case "AudioAdded": {
|
|
9347
|
-
break;
|
|
9348
|
-
}
|
|
9349
|
-
case "EndOfTranscript": {
|
|
9350
|
-
break;
|
|
9351
|
-
}
|
|
9352
|
-
case "Info":
|
|
9353
|
-
case "Warning": {
|
|
9354
|
-
callbacks?.onMetadata?.(message);
|
|
9355
|
-
break;
|
|
9356
|
-
}
|
|
9357
|
-
case "Error": {
|
|
9358
|
-
const errMsg = message;
|
|
9359
|
-
callbacks?.onError?.({
|
|
9360
|
-
code: errMsg.type || "SPEECHMATICS_ERROR",
|
|
9361
|
-
message: errMsg.reason || "Unknown error",
|
|
9362
|
-
details: message
|
|
9363
|
-
});
|
|
9364
|
-
break;
|
|
9365
|
-
}
|
|
9366
|
-
default: {
|
|
9367
|
-
callbacks?.onMetadata?.(message);
|
|
9368
|
-
break;
|
|
9369
|
-
}
|
|
9370
|
-
}
|
|
9371
|
-
}
|
|
9372
|
-
/**
|
|
9373
|
-
* Extract unified Word[] from Speechmatics recognition results
|
|
9283
|
+
* Convert Speechmatics RecognitionResult[] to unified Word[]
|
|
9374
9284
|
*/
|
|
9375
|
-
|
|
9376
|
-
return results.filter((r) => r.type === "word"
|
|
9377
|
-
word:
|
|
9378
|
-
start:
|
|
9379
|
-
end:
|
|
9380
|
-
confidence:
|
|
9381
|
-
speaker:
|
|
9285
|
+
resultsToWords(results) {
|
|
9286
|
+
return results.filter((r) => r.type === "word").map((r) => ({
|
|
9287
|
+
word: r.alternatives?.[0]?.content || "",
|
|
9288
|
+
start: r.start_time,
|
|
9289
|
+
end: r.end_time,
|
|
9290
|
+
confidence: r.alternatives?.[0]?.confidence,
|
|
9291
|
+
speaker: r.alternatives?.[0]?.speaker
|
|
9382
9292
|
}));
|
|
9383
9293
|
}
|
|
9384
9294
|
/**
|
|
@@ -9449,9 +9359,6 @@ function createSpeechmaticsAdapter(config) {
|
|
|
9449
9359
|
return adapter;
|
|
9450
9360
|
}
|
|
9451
9361
|
|
|
9452
|
-
// src/adapters/soniox-adapter.ts
|
|
9453
|
-
import axios9 from "axios";
|
|
9454
|
-
|
|
9455
9362
|
// src/generated/soniox/schema/transcriptionStatus.ts
|
|
9456
9363
|
var TranscriptionStatus = {
|
|
9457
9364
|
queued: "queued",
|
|
@@ -9460,6 +9367,57 @@ var TranscriptionStatus = {
|
|
|
9460
9367
|
error: "error"
|
|
9461
9368
|
};
|
|
9462
9369
|
|
|
9370
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9371
|
+
import axios9 from "axios";
|
|
9372
|
+
|
|
9373
|
+
// src/generated/soniox/schema/index.ts
|
|
9374
|
+
var schema_exports4 = {};
|
|
9375
|
+
__export(schema_exports4, {
|
|
9376
|
+
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
9377
|
+
TranscriptionMode: () => TranscriptionMode,
|
|
9378
|
+
TranscriptionStatus: () => TranscriptionStatus,
|
|
9379
|
+
TranslationConfigType: () => TranslationConfigType
|
|
9380
|
+
});
|
|
9381
|
+
|
|
9382
|
+
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
9383
|
+
var TemporaryApiKeyUsageType = {
|
|
9384
|
+
transcribe_websocket: "transcribe_websocket"
|
|
9385
|
+
};
|
|
9386
|
+
|
|
9387
|
+
// src/generated/soniox/schema/transcriptionMode.ts
|
|
9388
|
+
var TranscriptionMode = {
|
|
9389
|
+
real_time: "real_time",
|
|
9390
|
+
async: "async"
|
|
9391
|
+
};
|
|
9392
|
+
|
|
9393
|
+
// src/generated/soniox/schema/translationConfigType.ts
|
|
9394
|
+
var TranslationConfigType = {
|
|
9395
|
+
one_way: "one_way",
|
|
9396
|
+
two_way: "two_way"
|
|
9397
|
+
};
|
|
9398
|
+
|
|
9399
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9400
|
+
var uploadFile = (uploadFileBody2, options) => {
|
|
9401
|
+
const formData = new FormData();
|
|
9402
|
+
if (uploadFileBody2.client_reference_id !== void 0 && uploadFileBody2.client_reference_id !== null) {
|
|
9403
|
+
formData.append("client_reference_id", uploadFileBody2.client_reference_id);
|
|
9404
|
+
}
|
|
9405
|
+
formData.append("file", uploadFileBody2.file);
|
|
9406
|
+
return axios9.post("/v1/files", formData, options);
|
|
9407
|
+
};
|
|
9408
|
+
var createTranscription2 = (createTranscriptionPayload, options) => {
|
|
9409
|
+
return axios9.post("/v1/transcriptions", createTranscriptionPayload, options);
|
|
9410
|
+
};
|
|
9411
|
+
var getTranscription = (transcriptionId, options) => {
|
|
9412
|
+
return axios9.get(`/v1/transcriptions/${transcriptionId}`, options);
|
|
9413
|
+
};
|
|
9414
|
+
var getTranscriptionTranscript = (transcriptionId, options) => {
|
|
9415
|
+
return axios9.get(`/v1/transcriptions/${transcriptionId}/transcript`, options);
|
|
9416
|
+
};
|
|
9417
|
+
var getModels = (options) => {
|
|
9418
|
+
return axios9.get("/v1/models", options);
|
|
9419
|
+
};
|
|
9420
|
+
|
|
9463
9421
|
// src/adapters/soniox-adapter.ts
|
|
9464
9422
|
var SonioxAdapter = class extends BaseAdapter {
|
|
9465
9423
|
constructor() {
|
|
@@ -9514,11 +9472,17 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9514
9472
|
}
|
|
9515
9473
|
}
|
|
9516
9474
|
/**
|
|
9517
|
-
* Get the base URL for API requests
|
|
9475
|
+
* Get the base URL for API requests (no /v1 suffix — generated functions include /v1 in paths)
|
|
9518
9476
|
*/
|
|
9519
9477
|
get baseUrl() {
|
|
9520
9478
|
if (this.config?.baseUrl) return this.config.baseUrl;
|
|
9521
|
-
return `https://${this.getRegionalHost()}
|
|
9479
|
+
return `https://${this.getRegionalHost()}`;
|
|
9480
|
+
}
|
|
9481
|
+
/**
|
|
9482
|
+
* Build axios config with Soniox Bearer auth
|
|
9483
|
+
*/
|
|
9484
|
+
getAxiosConfig() {
|
|
9485
|
+
return super.getAxiosConfig("Authorization", (key) => `Bearer ${key}`);
|
|
9522
9486
|
}
|
|
9523
9487
|
initialize(config) {
|
|
9524
9488
|
super.initialize(config);
|
|
@@ -9528,15 +9492,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9528
9492
|
if (config.model) {
|
|
9529
9493
|
this.defaultModel = config.model;
|
|
9530
9494
|
}
|
|
9531
|
-
this.client = axios9.create({
|
|
9532
|
-
baseURL: this.baseUrl,
|
|
9533
|
-
timeout: config.timeout || 12e4,
|
|
9534
|
-
headers: {
|
|
9535
|
-
Authorization: `Bearer ${config.apiKey}`,
|
|
9536
|
-
"Content-Type": "application/json",
|
|
9537
|
-
...config.headers
|
|
9538
|
-
}
|
|
9539
|
-
});
|
|
9540
9495
|
}
|
|
9541
9496
|
/**
|
|
9542
9497
|
* Get current region
|
|
@@ -9566,23 +9521,12 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9566
9521
|
*/
|
|
9567
9522
|
setRegion(region) {
|
|
9568
9523
|
this.region = region;
|
|
9569
|
-
if (this.config?.apiKey) {
|
|
9570
|
-
this.client = axios9.create({
|
|
9571
|
-
baseURL: this.baseUrl,
|
|
9572
|
-
timeout: this.config.timeout || 12e4,
|
|
9573
|
-
headers: {
|
|
9574
|
-
Authorization: `Bearer ${this.config.apiKey}`,
|
|
9575
|
-
"Content-Type": "application/json",
|
|
9576
|
-
...this.config.headers
|
|
9577
|
-
}
|
|
9578
|
-
});
|
|
9579
|
-
}
|
|
9580
9524
|
}
|
|
9581
9525
|
/**
|
|
9582
9526
|
* Submit audio for transcription
|
|
9583
9527
|
*
|
|
9584
|
-
*
|
|
9585
|
-
*
|
|
9528
|
+
* Uses the async v1 API: createTranscription returns status `queued`,
|
|
9529
|
+
* then polls until completed (or returns immediately if webhook is set).
|
|
9586
9530
|
*
|
|
9587
9531
|
* @param audio - Audio input (URL or file)
|
|
9588
9532
|
* @param options - Transcription options
|
|
@@ -9591,21 +9535,44 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9591
9535
|
async transcribe(audio, options) {
|
|
9592
9536
|
this.validateConfig();
|
|
9593
9537
|
try {
|
|
9594
|
-
const
|
|
9595
|
-
|
|
9596
|
-
};
|
|
9597
|
-
if (audio.type === "url") {
|
|
9598
|
-
requestBody.audio_url = audio.url;
|
|
9599
|
-
} else if (audio.type === "file") {
|
|
9600
|
-
const formData = new FormData();
|
|
9538
|
+
const sonioxOpts = options?.soniox;
|
|
9539
|
+
if (audio.type === "file") {
|
|
9601
9540
|
const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
|
|
9602
|
-
|
|
9603
|
-
const
|
|
9604
|
-
|
|
9605
|
-
|
|
9606
|
-
|
|
9607
|
-
|
|
9608
|
-
|
|
9541
|
+
const uploadBody = { file: audioBlob };
|
|
9542
|
+
const fileResp = await uploadFile(uploadBody, this.getAxiosConfig());
|
|
9543
|
+
const payload = {
|
|
9544
|
+
...sonioxOpts,
|
|
9545
|
+
model: options?.model || this.defaultModel,
|
|
9546
|
+
file_id: fileResp.data.id,
|
|
9547
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9548
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9549
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9550
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9551
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9552
|
+
};
|
|
9553
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9554
|
+
const meta = createResp.data;
|
|
9555
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9556
|
+
return this.normalizeTranscription(meta);
|
|
9557
|
+
}
|
|
9558
|
+
return this.pollForCompletion(meta.id);
|
|
9559
|
+
} else if (audio.type === "url") {
|
|
9560
|
+
const payload = {
|
|
9561
|
+
...sonioxOpts,
|
|
9562
|
+
model: options?.model || this.defaultModel,
|
|
9563
|
+
audio_url: audio.url,
|
|
9564
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9565
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9566
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9567
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9568
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9569
|
+
};
|
|
9570
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9571
|
+
const meta = createResp.data;
|
|
9572
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9573
|
+
return this.normalizeTranscription(meta);
|
|
9574
|
+
}
|
|
9575
|
+
return this.pollForCompletion(meta.id);
|
|
9609
9576
|
} else {
|
|
9610
9577
|
return {
|
|
9611
9578
|
success: false,
|
|
@@ -9616,38 +9583,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9616
9583
|
}
|
|
9617
9584
|
};
|
|
9618
9585
|
}
|
|
9619
|
-
if (options?.language) {
|
|
9620
|
-
requestBody.language_hints = [options.language];
|
|
9621
|
-
}
|
|
9622
|
-
if (options?.diarization) {
|
|
9623
|
-
requestBody.enable_speaker_diarization = true;
|
|
9624
|
-
}
|
|
9625
|
-
if (options?.languageDetection) {
|
|
9626
|
-
requestBody.enable_language_identification = true;
|
|
9627
|
-
}
|
|
9628
|
-
if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9629
|
-
requestBody.context = {
|
|
9630
|
-
terms: options.customVocabulary
|
|
9631
|
-
};
|
|
9632
|
-
}
|
|
9633
|
-
if (options?.webhookUrl) {
|
|
9634
|
-
requestBody.webhook_url = options.webhookUrl;
|
|
9635
|
-
}
|
|
9636
|
-
const response = await this.client.post("/transcriptions", requestBody);
|
|
9637
|
-
const transcriptionId = response.data.id;
|
|
9638
|
-
if (options?.webhookUrl) {
|
|
9639
|
-
return {
|
|
9640
|
-
success: true,
|
|
9641
|
-
provider: this.name,
|
|
9642
|
-
data: {
|
|
9643
|
-
id: transcriptionId,
|
|
9644
|
-
text: "",
|
|
9645
|
-
status: "queued"
|
|
9646
|
-
},
|
|
9647
|
-
raw: response.data
|
|
9648
|
-
};
|
|
9649
|
-
}
|
|
9650
|
-
return await this.pollForCompletion(transcriptionId);
|
|
9651
9586
|
} catch (error) {
|
|
9652
9587
|
return this.createErrorResponse(error);
|
|
9653
9588
|
}
|
|
@@ -9655,9 +9590,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9655
9590
|
/**
|
|
9656
9591
|
* Get transcription result by ID
|
|
9657
9592
|
*
|
|
9658
|
-
*
|
|
9659
|
-
*
|
|
9660
|
-
* when completed.
|
|
9593
|
+
* Fetches transcription metadata and, if completed, the transcript text/tokens.
|
|
9594
|
+
* Used by pollForCompletion() for async polling.
|
|
9661
9595
|
*
|
|
9662
9596
|
* @param transcriptId - Transcript ID
|
|
9663
9597
|
* @returns Transcription response
|
|
@@ -9665,39 +9599,20 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9665
9599
|
async getTranscript(transcriptId) {
|
|
9666
9600
|
this.validateConfig();
|
|
9667
9601
|
try {
|
|
9668
|
-
const
|
|
9669
|
-
const
|
|
9670
|
-
if (
|
|
9671
|
-
|
|
9672
|
-
|
|
9673
|
-
|
|
9674
|
-
|
|
9675
|
-
|
|
9676
|
-
|
|
9677
|
-
|
|
9678
|
-
|
|
9679
|
-
|
|
9680
|
-
if (job.status !== "completed") {
|
|
9681
|
-
return {
|
|
9682
|
-
success: true,
|
|
9683
|
-
provider: this.name,
|
|
9684
|
-
data: {
|
|
9685
|
-
id: job.id,
|
|
9686
|
-
text: "",
|
|
9687
|
-
status: job.status
|
|
9688
|
-
},
|
|
9689
|
-
raw: job
|
|
9690
|
-
};
|
|
9602
|
+
const metaResp = await getTranscription(transcriptId, this.getAxiosConfig());
|
|
9603
|
+
const meta = metaResp.data;
|
|
9604
|
+
if (meta.status === TranscriptionStatus.completed) {
|
|
9605
|
+
try {
|
|
9606
|
+
const transcriptResp = await getTranscriptionTranscript(
|
|
9607
|
+
transcriptId,
|
|
9608
|
+
this.getAxiosConfig()
|
|
9609
|
+
);
|
|
9610
|
+
return this.normalizeTranscription(meta, transcriptResp.data);
|
|
9611
|
+
} catch (transcriptError) {
|
|
9612
|
+
return this.createErrorResponse(transcriptError);
|
|
9613
|
+
}
|
|
9691
9614
|
}
|
|
9692
|
-
|
|
9693
|
-
`/transcriptions/${transcriptId}/transcript`
|
|
9694
|
-
);
|
|
9695
|
-
return this.normalizeResponse({
|
|
9696
|
-
...transcriptResponse.data,
|
|
9697
|
-
// Carry over job metadata
|
|
9698
|
-
id: job.id,
|
|
9699
|
-
audio_duration_ms: job.audio_duration_ms
|
|
9700
|
-
});
|
|
9615
|
+
return this.normalizeTranscription(meta);
|
|
9701
9616
|
} catch (error) {
|
|
9702
9617
|
return this.createErrorResponse(error);
|
|
9703
9618
|
}
|
|
@@ -9717,51 +9632,50 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9717
9632
|
const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9718
9633
|
const createdAt = /* @__PURE__ */ new Date();
|
|
9719
9634
|
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
|
|
9720
|
-
const wsUrl = `${wsBase}/transcribe-websocket
|
|
9721
|
-
|
|
9722
|
-
const
|
|
9723
|
-
|
|
9724
|
-
|
|
9725
|
-
model: modelId
|
|
9726
|
-
};
|
|
9727
|
-
if (sonioxOpts?.audioFormat) {
|
|
9728
|
-
initMessage.audio_format = sonioxOpts.audioFormat;
|
|
9729
|
-
} else if (options?.encoding) {
|
|
9635
|
+
const wsUrl = new URL(`${wsBase}/transcribe-websocket`);
|
|
9636
|
+
wsUrl.searchParams.set("api_key", this.config.apiKey);
|
|
9637
|
+
const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-preview";
|
|
9638
|
+
wsUrl.searchParams.set("model", modelId);
|
|
9639
|
+
if (options?.encoding) {
|
|
9730
9640
|
const encodingMap = {
|
|
9731
9641
|
linear16: "pcm_s16le",
|
|
9732
9642
|
pcm: "pcm_s16le",
|
|
9733
9643
|
mulaw: "mulaw",
|
|
9734
9644
|
alaw: "alaw"
|
|
9735
9645
|
};
|
|
9736
|
-
|
|
9646
|
+
wsUrl.searchParams.set("audio_format", encodingMap[options.encoding] || options.encoding);
|
|
9737
9647
|
}
|
|
9738
|
-
if (
|
|
9739
|
-
|
|
9648
|
+
if (options?.sampleRate) {
|
|
9649
|
+
wsUrl.searchParams.set("sample_rate", options.sampleRate.toString());
|
|
9740
9650
|
}
|
|
9741
|
-
if (
|
|
9742
|
-
|
|
9651
|
+
if (options?.channels) {
|
|
9652
|
+
wsUrl.searchParams.set("num_channels", options.channels.toString());
|
|
9743
9653
|
}
|
|
9654
|
+
const sonioxOpts = options?.sonioxStreaming;
|
|
9744
9655
|
if (sonioxOpts) {
|
|
9745
9656
|
if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
|
|
9746
|
-
|
|
9657
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify(sonioxOpts.languageHints));
|
|
9747
9658
|
}
|
|
9748
9659
|
if (sonioxOpts.enableLanguageIdentification) {
|
|
9749
|
-
|
|
9660
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9750
9661
|
}
|
|
9751
9662
|
if (sonioxOpts.enableEndpointDetection) {
|
|
9752
|
-
|
|
9663
|
+
wsUrl.searchParams.set("enable_endpoint_detection", "true");
|
|
9753
9664
|
}
|
|
9754
9665
|
if (sonioxOpts.enableSpeakerDiarization) {
|
|
9755
|
-
|
|
9666
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
9756
9667
|
}
|
|
9757
9668
|
if (sonioxOpts.context) {
|
|
9758
|
-
|
|
9669
|
+
wsUrl.searchParams.set(
|
|
9670
|
+
"context",
|
|
9671
|
+
typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
|
|
9672
|
+
);
|
|
9759
9673
|
}
|
|
9760
9674
|
if (sonioxOpts.translation) {
|
|
9761
|
-
|
|
9675
|
+
wsUrl.searchParams.set("translation", JSON.stringify(sonioxOpts.translation));
|
|
9762
9676
|
}
|
|
9763
9677
|
if (sonioxOpts.clientReferenceId) {
|
|
9764
|
-
|
|
9678
|
+
wsUrl.searchParams.set("client_reference_id", sonioxOpts.clientReferenceId);
|
|
9765
9679
|
}
|
|
9766
9680
|
}
|
|
9767
9681
|
if (!sonioxOpts?.languageHints && options?.language) {
|
|
@@ -9770,33 +9684,24 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9770
9684
|
`[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
|
|
9771
9685
|
);
|
|
9772
9686
|
}
|
|
9773
|
-
|
|
9687
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify([options.language]));
|
|
9774
9688
|
}
|
|
9775
9689
|
if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
|
|
9776
|
-
|
|
9690
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
9777
9691
|
}
|
|
9778
9692
|
if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
|
|
9779
|
-
|
|
9693
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9694
|
+
}
|
|
9695
|
+
if (options?.interimResults !== false) {
|
|
9780
9696
|
}
|
|
9781
9697
|
let status = "connecting";
|
|
9782
9698
|
let openedAt = null;
|
|
9783
9699
|
let receivedData = false;
|
|
9784
9700
|
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : __require("ws");
|
|
9785
|
-
const ws = new WebSocketImpl(wsUrl);
|
|
9701
|
+
const ws = new WebSocketImpl(wsUrl.toString());
|
|
9786
9702
|
ws.onopen = () => {
|
|
9787
|
-
openedAt = Date.now();
|
|
9788
|
-
const initPayload = JSON.stringify(initMessage);
|
|
9789
|
-
if (callbacks?.onRawMessage) {
|
|
9790
|
-
callbacks.onRawMessage({
|
|
9791
|
-
provider: this.name,
|
|
9792
|
-
direction: "outgoing",
|
|
9793
|
-
timestamp: Date.now(),
|
|
9794
|
-
payload: initPayload,
|
|
9795
|
-
messageType: "init"
|
|
9796
|
-
});
|
|
9797
|
-
}
|
|
9798
|
-
ws.send(initPayload);
|
|
9799
9703
|
status = "open";
|
|
9704
|
+
openedAt = Date.now();
|
|
9800
9705
|
callbacks?.onOpen?.();
|
|
9801
9706
|
};
|
|
9802
9707
|
ws.onmessage = (event) => {
|
|
@@ -9805,7 +9710,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9805
9710
|
let messageType;
|
|
9806
9711
|
try {
|
|
9807
9712
|
const data = JSON.parse(rawPayload);
|
|
9808
|
-
|
|
9713
|
+
const errorMessage = data.error_message;
|
|
9714
|
+
if (errorMessage) {
|
|
9809
9715
|
messageType = "error";
|
|
9810
9716
|
} else if (data.finished) {
|
|
9811
9717
|
messageType = "finished";
|
|
@@ -9821,10 +9727,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9821
9727
|
messageType
|
|
9822
9728
|
});
|
|
9823
9729
|
}
|
|
9824
|
-
if (
|
|
9730
|
+
if (errorMessage) {
|
|
9825
9731
|
callbacks?.onError?.({
|
|
9826
9732
|
code: data.error_code?.toString() || "STREAM_ERROR",
|
|
9827
|
-
message:
|
|
9733
|
+
message: errorMessage
|
|
9828
9734
|
});
|
|
9829
9735
|
return;
|
|
9830
9736
|
}
|
|
@@ -9838,7 +9744,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9838
9744
|
start: token.start_ms ? token.start_ms / 1e3 : 0,
|
|
9839
9745
|
end: token.end_ms ? token.end_ms / 1e3 : 0,
|
|
9840
9746
|
confidence: token.confidence,
|
|
9841
|
-
speaker: token.speaker
|
|
9747
|
+
speaker: token.speaker ?? void 0
|
|
9842
9748
|
}));
|
|
9843
9749
|
const text = data.text || data.tokens.map((t) => t.text).join("");
|
|
9844
9750
|
const isFinal = data.tokens.every((t) => t.is_final);
|
|
@@ -9847,8 +9753,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9847
9753
|
text,
|
|
9848
9754
|
isFinal,
|
|
9849
9755
|
words,
|
|
9850
|
-
speaker: data.tokens[0]?.speaker,
|
|
9851
|
-
language: data.tokens[0]?.language,
|
|
9756
|
+
speaker: data.tokens[0]?.speaker ?? void 0,
|
|
9757
|
+
language: data.tokens[0]?.language ?? void 0,
|
|
9852
9758
|
confidence: data.tokens[0]?.confidence
|
|
9853
9759
|
};
|
|
9854
9760
|
callbacks?.onTranscript?.(event2);
|
|
@@ -9875,10 +9781,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9875
9781
|
ws.onclose = (event) => {
|
|
9876
9782
|
status = "closed";
|
|
9877
9783
|
const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
|
|
9878
|
-
const
|
|
9879
|
-
if (
|
|
9784
|
+
const isImmediateClose = timeSinceOpen !== null && timeSinceOpen < 1e3 && !receivedData;
|
|
9785
|
+
if (isImmediateClose && event.code === 1e3) {
|
|
9880
9786
|
const errorMessage = [
|
|
9881
|
-
"Soniox closed connection
|
|
9787
|
+
"Soniox closed connection immediately after opening.",
|
|
9882
9788
|
`Current config: region=${this.region}, model=${modelId}`,
|
|
9883
9789
|
"Likely causes:",
|
|
9884
9790
|
" - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
|
|
@@ -9964,7 +9870,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9964
9870
|
async getModels() {
|
|
9965
9871
|
this.validateConfig();
|
|
9966
9872
|
try {
|
|
9967
|
-
const response = await this.
|
|
9873
|
+
const response = await getModels(this.getAxiosConfig());
|
|
9968
9874
|
return response.data.models || [];
|
|
9969
9875
|
} catch (error) {
|
|
9970
9876
|
console.error("Failed to fetch Soniox models:", error);
|
|
@@ -9996,11 +9902,44 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9996
9902
|
return buildUtterancesFromWords(words);
|
|
9997
9903
|
}
|
|
9998
9904
|
/**
|
|
9999
|
-
* Normalize
|
|
9905
|
+
* Normalize v1 API response to unified format
|
|
9906
|
+
*
|
|
9907
|
+
* @param meta - Transcription metadata from getTranscription/createTranscription
|
|
9908
|
+
* @param transcript - Transcript data (text/tokens), only present when status is completed
|
|
10000
9909
|
*/
|
|
10001
|
-
|
|
10002
|
-
|
|
10003
|
-
|
|
9910
|
+
normalizeTranscription(meta, transcript) {
|
|
9911
|
+
if (meta.status === TranscriptionStatus.error) {
|
|
9912
|
+
return {
|
|
9913
|
+
success: false,
|
|
9914
|
+
provider: this.name,
|
|
9915
|
+
data: {
|
|
9916
|
+
id: meta.id,
|
|
9917
|
+
text: "",
|
|
9918
|
+
status: "error"
|
|
9919
|
+
},
|
|
9920
|
+
error: {
|
|
9921
|
+
code: meta.error_type || "TRANSCRIPTION_ERROR",
|
|
9922
|
+
message: meta.error_message || "Transcription failed"
|
|
9923
|
+
},
|
|
9924
|
+
raw: { meta, transcript }
|
|
9925
|
+
};
|
|
9926
|
+
}
|
|
9927
|
+
if (!transcript) {
|
|
9928
|
+
return {
|
|
9929
|
+
success: true,
|
|
9930
|
+
provider: this.name,
|
|
9931
|
+
data: {
|
|
9932
|
+
id: meta.id,
|
|
9933
|
+
text: "",
|
|
9934
|
+
status: meta.status,
|
|
9935
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0
|
|
9936
|
+
},
|
|
9937
|
+
raw: { meta }
|
|
9938
|
+
};
|
|
9939
|
+
}
|
|
9940
|
+
const tokens = transcript.tokens || [];
|
|
9941
|
+
const text = transcript.text || tokens.map((t) => t.text).join("");
|
|
9942
|
+
const words = tokens.filter((t) => t.start_ms !== void 0 && t.end_ms !== void 0).map((token) => ({
|
|
10004
9943
|
word: token.text,
|
|
10005
9944
|
start: token.start_ms / 1e3,
|
|
10006
9945
|
end: token.end_ms / 1e3,
|
|
@@ -10008,33 +9947,32 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10008
9947
|
speaker: token.speaker ?? void 0
|
|
10009
9948
|
}));
|
|
10010
9949
|
const speakerSet = /* @__PURE__ */ new Set();
|
|
10011
|
-
|
|
10012
|
-
if (
|
|
10013
|
-
}
|
|
9950
|
+
tokens.forEach((t) => {
|
|
9951
|
+
if (t.speaker) speakerSet.add(String(t.speaker));
|
|
9952
|
+
});
|
|
10014
9953
|
const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
|
|
10015
9954
|
id,
|
|
10016
9955
|
label: `Speaker ${id}`
|
|
10017
9956
|
})) : void 0;
|
|
10018
|
-
const utterances =
|
|
9957
|
+
const utterances = this.buildUtterancesFromTokens(tokens);
|
|
10019
9958
|
const language = tokens.find((t) => t.language)?.language ?? void 0;
|
|
10020
9959
|
return {
|
|
10021
9960
|
success: true,
|
|
10022
9961
|
provider: this.name,
|
|
10023
9962
|
data: {
|
|
10024
|
-
id:
|
|
9963
|
+
id: meta.id,
|
|
10025
9964
|
text,
|
|
10026
9965
|
status: TranscriptionStatus.completed,
|
|
10027
9966
|
language,
|
|
10028
|
-
duration:
|
|
9967
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0,
|
|
10029
9968
|
speakers,
|
|
10030
9969
|
words: words.length > 0 ? words : void 0,
|
|
10031
9970
|
utterances: utterances.length > 0 ? utterances : void 0
|
|
10032
9971
|
},
|
|
10033
9972
|
tracking: {
|
|
10034
|
-
requestId:
|
|
10035
|
-
processingTimeMs: response.total_audio_proc_ms
|
|
9973
|
+
requestId: meta.id
|
|
10036
9974
|
},
|
|
10037
|
-
raw:
|
|
9975
|
+
raw: { meta, transcript }
|
|
10038
9976
|
};
|
|
10039
9977
|
}
|
|
10040
9978
|
};
|
|
@@ -10131,7 +10069,15 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10131
10069
|
/**
|
|
10132
10070
|
* Submit audio for transcription
|
|
10133
10071
|
*
|
|
10134
|
-
* ElevenLabs batch is synchronous
|
|
10072
|
+
* ElevenLabs batch is normally synchronous — the API returns results directly.
|
|
10073
|
+
*
|
|
10074
|
+
* **Webhook mode:** When `webhookUrl` is set (or `elevenlabs.webhook` is true),
|
|
10075
|
+
* the request is processed asynchronously. ElevenLabs returns a 202 with a
|
|
10076
|
+
* `request_id` and delivers results to a webhook configured in the ElevenLabs
|
|
10077
|
+
* dashboard. The unified `webhookUrl` acts as an intent flag to enable async
|
|
10078
|
+
* mode — the actual delivery destination must be pre-configured in your
|
|
10079
|
+
* ElevenLabs dashboard. Use `elevenlabs.webhook_id` to target a specific
|
|
10080
|
+
* webhook endpoint.
|
|
10135
10081
|
*/
|
|
10136
10082
|
async transcribe(audio, options) {
|
|
10137
10083
|
this.validateConfig();
|
|
@@ -10154,6 +10100,11 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10154
10100
|
}
|
|
10155
10101
|
};
|
|
10156
10102
|
}
|
|
10103
|
+
const elevenlabsOpts = options?.elevenlabs;
|
|
10104
|
+
const useWebhook = options?.webhookUrl || elevenlabsOpts?.webhook;
|
|
10105
|
+
if (useWebhook) {
|
|
10106
|
+
formData.append("webhook", "true");
|
|
10107
|
+
}
|
|
10157
10108
|
if (options?.language) {
|
|
10158
10109
|
formData.append("language_code", options.language);
|
|
10159
10110
|
}
|
|
@@ -10172,7 +10123,6 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10172
10123
|
if (options?.entityDetection) {
|
|
10173
10124
|
formData.append("entity_detection", "all");
|
|
10174
10125
|
}
|
|
10175
|
-
const elevenlabsOpts = options?.elevenlabs;
|
|
10176
10126
|
if (elevenlabsOpts) {
|
|
10177
10127
|
for (const [key, value] of Object.entries(elevenlabsOpts)) {
|
|
10178
10128
|
if (value === void 0 || value === null) continue;
|
|
@@ -10190,26 +10140,24 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10190
10140
|
}
|
|
10191
10141
|
}
|
|
10192
10142
|
}
|
|
10193
|
-
if (options?.webhookUrl) {
|
|
10194
|
-
if (!formData.has("webhook")) {
|
|
10195
|
-
formData.append("webhook", "true");
|
|
10196
|
-
}
|
|
10197
|
-
}
|
|
10198
10143
|
const response = await this.client.post("/v1/speech-to-text", formData, {
|
|
10199
10144
|
headers: {
|
|
10200
10145
|
"Content-Type": "multipart/form-data"
|
|
10201
10146
|
}
|
|
10202
10147
|
});
|
|
10203
|
-
if (
|
|
10204
|
-
const
|
|
10148
|
+
if (useWebhook) {
|
|
10149
|
+
const ack = response.data;
|
|
10205
10150
|
return {
|
|
10206
10151
|
success: true,
|
|
10207
10152
|
provider: this.name,
|
|
10208
10153
|
data: {
|
|
10209
|
-
id:
|
|
10154
|
+
id: ack.request_id || ack.transcription_id || `elevenlabs_${Date.now()}`,
|
|
10210
10155
|
text: "",
|
|
10211
10156
|
status: "queued"
|
|
10212
10157
|
},
|
|
10158
|
+
tracking: {
|
|
10159
|
+
requestId: ack.request_id
|
|
10160
|
+
},
|
|
10213
10161
|
raw: response.data
|
|
10214
10162
|
};
|
|
10215
10163
|
}
|
|
@@ -10305,20 +10253,9 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10305
10253
|
ws.onmessage = (event) => {
|
|
10306
10254
|
receivedData = true;
|
|
10307
10255
|
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
10308
|
-
let messageType;
|
|
10309
10256
|
try {
|
|
10310
10257
|
const data = JSON.parse(rawPayload);
|
|
10311
|
-
|
|
10312
|
-
messageType = "error";
|
|
10313
|
-
} else if (data.message_type === "session_started") {
|
|
10314
|
-
messageType = "session_started";
|
|
10315
|
-
} else if (data.message_type === "partial_transcript") {
|
|
10316
|
-
messageType = "partial_transcript";
|
|
10317
|
-
} else if (data.message_type === "committed_transcript") {
|
|
10318
|
-
messageType = "committed_transcript";
|
|
10319
|
-
} else if (data.message_type === "committed_transcript_with_timestamps") {
|
|
10320
|
-
messageType = "committed_transcript_with_timestamps";
|
|
10321
|
-
}
|
|
10258
|
+
const messageType = "error" in data ? "error" : data.message_type;
|
|
10322
10259
|
if (callbacks?.onRawMessage) {
|
|
10323
10260
|
callbacks.onRawMessage({
|
|
10324
10261
|
provider: this.name,
|
|
@@ -10328,50 +10265,62 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10328
10265
|
messageType
|
|
10329
10266
|
});
|
|
10330
10267
|
}
|
|
10331
|
-
if (data
|
|
10268
|
+
if ("error" in data) {
|
|
10332
10269
|
callbacks?.onError?.({
|
|
10333
|
-
code: data.
|
|
10270
|
+
code: data.message_type || "STREAM_ERROR",
|
|
10334
10271
|
message: data.error
|
|
10335
10272
|
});
|
|
10336
10273
|
return;
|
|
10337
10274
|
}
|
|
10338
|
-
|
|
10339
|
-
|
|
10340
|
-
|
|
10341
|
-
|
|
10342
|
-
|
|
10343
|
-
|
|
10344
|
-
|
|
10345
|
-
|
|
10346
|
-
|
|
10347
|
-
|
|
10348
|
-
|
|
10349
|
-
|
|
10350
|
-
|
|
10351
|
-
|
|
10352
|
-
|
|
10353
|
-
|
|
10354
|
-
|
|
10355
|
-
|
|
10356
|
-
|
|
10357
|
-
|
|
10358
|
-
|
|
10359
|
-
|
|
10360
|
-
|
|
10361
|
-
|
|
10362
|
-
|
|
10363
|
-
|
|
10364
|
-
|
|
10365
|
-
|
|
10366
|
-
|
|
10367
|
-
|
|
10368
|
-
|
|
10369
|
-
|
|
10370
|
-
|
|
10371
|
-
|
|
10372
|
-
|
|
10373
|
-
|
|
10275
|
+
switch (data.message_type) {
|
|
10276
|
+
case "session_started":
|
|
10277
|
+
break;
|
|
10278
|
+
case "partial_transcript": {
|
|
10279
|
+
const streamEvent = {
|
|
10280
|
+
type: "transcript",
|
|
10281
|
+
text: data.text || "",
|
|
10282
|
+
isFinal: false,
|
|
10283
|
+
confidence: void 0
|
|
10284
|
+
};
|
|
10285
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10286
|
+
break;
|
|
10287
|
+
}
|
|
10288
|
+
case "committed_transcript": {
|
|
10289
|
+
const streamEvent = {
|
|
10290
|
+
type: "transcript",
|
|
10291
|
+
text: data.text || "",
|
|
10292
|
+
isFinal: true,
|
|
10293
|
+
confidence: void 0
|
|
10294
|
+
};
|
|
10295
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10296
|
+
break;
|
|
10297
|
+
}
|
|
10298
|
+
case "committed_transcript_with_timestamps": {
|
|
10299
|
+
const tsData = data;
|
|
10300
|
+
const words = tsData.words ? tsData.words.map((w) => ({
|
|
10301
|
+
word: w.text || "",
|
|
10302
|
+
start: w.start || 0,
|
|
10303
|
+
end: w.end || 0,
|
|
10304
|
+
confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
|
|
10305
|
+
speaker: w.speaker_id
|
|
10306
|
+
})) : [];
|
|
10307
|
+
const streamEvent = {
|
|
10308
|
+
type: "transcript",
|
|
10309
|
+
text: tsData.text || "",
|
|
10310
|
+
isFinal: true,
|
|
10311
|
+
words: words.length > 0 ? words : void 0,
|
|
10312
|
+
speaker: words[0]?.speaker,
|
|
10313
|
+
language: tsData.language_code,
|
|
10314
|
+
confidence: void 0
|
|
10315
|
+
};
|
|
10316
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10317
|
+
if (options?.diarization && words.length > 0) {
|
|
10318
|
+
const utterances = buildUtterancesFromWords(words);
|
|
10319
|
+
for (const utterance of utterances) {
|
|
10320
|
+
callbacks?.onUtterance?.(utterance);
|
|
10321
|
+
}
|
|
10374
10322
|
}
|
|
10323
|
+
break;
|
|
10375
10324
|
}
|
|
10376
10325
|
}
|
|
10377
10326
|
} catch (error) {
|
|
@@ -10526,7 +10475,7 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10526
10475
|
}
|
|
10527
10476
|
}
|
|
10528
10477
|
}
|
|
10529
|
-
const transcriptionId =
|
|
10478
|
+
const transcriptionId = response.transcription_id || chunks[0]?.transcription_id || `elevenlabs_${Date.now()}`;
|
|
10530
10479
|
return {
|
|
10531
10480
|
success: true,
|
|
10532
10481
|
provider: this.name,
|
|
@@ -36444,12 +36393,10 @@ var createTemporaryApiKeyBody = zod10.object({
|
|
|
36444
36393
|
var streaming_types_zod_exports = {};
|
|
36445
36394
|
__export(streaming_types_zod_exports, {
|
|
36446
36395
|
sonioxAudioFormatSchema: () => sonioxAudioFormatSchema,
|
|
36447
|
-
sonioxAutoDetectedAudioFormatSchema: () => sonioxAutoDetectedAudioFormatSchema,
|
|
36448
36396
|
sonioxContextGeneralItemSchema: () => sonioxContextGeneralItemSchema,
|
|
36449
36397
|
sonioxContextSchema: () => sonioxContextSchema,
|
|
36450
36398
|
sonioxErrorStatusSchema: () => sonioxErrorStatusSchema,
|
|
36451
36399
|
sonioxOneWayTranslationSchema: () => sonioxOneWayTranslationSchema,
|
|
36452
|
-
sonioxPcmAudioEncodingSchema: () => sonioxPcmAudioEncodingSchema,
|
|
36453
36400
|
sonioxRealtimeModelSchema: () => sonioxRealtimeModelSchema,
|
|
36454
36401
|
sonioxRecorderStateSchema: () => sonioxRecorderStateSchema,
|
|
36455
36402
|
sonioxStreamingResponseSchema: () => sonioxStreamingResponseSchema,
|
|
@@ -36463,7 +36410,7 @@ __export(streaming_types_zod_exports, {
|
|
|
36463
36410
|
streamingUpdateConfigParams: () => streamingUpdateConfigParams3
|
|
36464
36411
|
});
|
|
36465
36412
|
import { z as zod11 } from "zod";
|
|
36466
|
-
var
|
|
36413
|
+
var sonioxAudioFormatSchema = zod11.enum([
|
|
36467
36414
|
"auto",
|
|
36468
36415
|
"aac",
|
|
36469
36416
|
"aiff",
|
|
@@ -36473,10 +36420,7 @@ var sonioxAutoDetectedAudioFormatSchema = zod11.enum([
|
|
|
36473
36420
|
"mp3",
|
|
36474
36421
|
"ogg",
|
|
36475
36422
|
"wav",
|
|
36476
|
-
"webm"
|
|
36477
|
-
]);
|
|
36478
|
-
var sonioxPcmAudioEncodingSchema = zod11.enum([
|
|
36479
|
-
// Signed PCM
|
|
36423
|
+
"webm",
|
|
36480
36424
|
"pcm_s8",
|
|
36481
36425
|
"pcm_s16le",
|
|
36482
36426
|
"pcm_s16be",
|
|
@@ -36484,7 +36428,6 @@ var sonioxPcmAudioEncodingSchema = zod11.enum([
|
|
|
36484
36428
|
"pcm_s24be",
|
|
36485
36429
|
"pcm_s32le",
|
|
36486
36430
|
"pcm_s32be",
|
|
36487
|
-
// Unsigned PCM
|
|
36488
36431
|
"pcm_u8",
|
|
36489
36432
|
"pcm_u16le",
|
|
36490
36433
|
"pcm_u16be",
|
|
@@ -36492,86 +36435,81 @@ var sonioxPcmAudioEncodingSchema = zod11.enum([
|
|
|
36492
36435
|
"pcm_u24be",
|
|
36493
36436
|
"pcm_u32le",
|
|
36494
36437
|
"pcm_u32be",
|
|
36495
|
-
// Float PCM
|
|
36496
36438
|
"pcm_f32le",
|
|
36497
36439
|
"pcm_f32be",
|
|
36498
36440
|
"pcm_f64le",
|
|
36499
36441
|
"pcm_f64be",
|
|
36500
|
-
// Companded
|
|
36501
36442
|
"mulaw",
|
|
36502
36443
|
"alaw"
|
|
36503
36444
|
]);
|
|
36504
|
-
var sonioxAudioFormatSchema = zod11.union([
|
|
36505
|
-
sonioxAutoDetectedAudioFormatSchema,
|
|
36506
|
-
sonioxPcmAudioEncodingSchema
|
|
36507
|
-
]);
|
|
36508
36445
|
var sonioxOneWayTranslationSchema = zod11.object({
|
|
36509
36446
|
type: zod11.literal("one_way"),
|
|
36510
|
-
target_language: zod11.string()
|
|
36447
|
+
target_language: zod11.string()
|
|
36511
36448
|
});
|
|
36512
36449
|
var sonioxTwoWayTranslationSchema = zod11.object({
|
|
36513
36450
|
type: zod11.literal("two_way"),
|
|
36514
|
-
language_a: zod11.string()
|
|
36515
|
-
language_b: zod11.string()
|
|
36451
|
+
language_a: zod11.string(),
|
|
36452
|
+
language_b: zod11.string()
|
|
36516
36453
|
});
|
|
36517
36454
|
var sonioxTranslationConfigSchema = zod11.union([
|
|
36518
36455
|
sonioxOneWayTranslationSchema,
|
|
36519
36456
|
sonioxTwoWayTranslationSchema
|
|
36520
36457
|
]);
|
|
36521
36458
|
var sonioxContextGeneralItemSchema = zod11.object({
|
|
36522
|
-
key: zod11.string()
|
|
36523
|
-
value: zod11.string()
|
|
36459
|
+
key: zod11.string(),
|
|
36460
|
+
value: zod11.string()
|
|
36524
36461
|
});
|
|
36525
36462
|
var sonioxTranslationTermSchema = zod11.object({
|
|
36526
|
-
source: zod11.string()
|
|
36527
|
-
target: zod11.string()
|
|
36463
|
+
source: zod11.string(),
|
|
36464
|
+
target: zod11.string()
|
|
36528
36465
|
});
|
|
36529
36466
|
var sonioxStructuredContextSchema = zod11.object({
|
|
36530
|
-
general: zod11.array(sonioxContextGeneralItemSchema).optional()
|
|
36531
|
-
text: zod11.string().optional()
|
|
36532
|
-
terms: zod11.array(zod11.string()).optional()
|
|
36533
|
-
translation_terms: zod11.array(sonioxTranslationTermSchema).optional()
|
|
36467
|
+
general: zod11.array(sonioxContextGeneralItemSchema).optional(),
|
|
36468
|
+
text: zod11.string().optional(),
|
|
36469
|
+
terms: zod11.array(zod11.string()).optional(),
|
|
36470
|
+
translation_terms: zod11.array(sonioxTranslationTermSchema).optional()
|
|
36534
36471
|
});
|
|
36535
36472
|
var sonioxContextSchema = zod11.union([sonioxStructuredContextSchema, zod11.string()]);
|
|
36536
36473
|
var sonioxRealtimeModelSchema = zod11.enum([
|
|
36474
|
+
"stt-rt-v4",
|
|
36537
36475
|
"stt-rt-v3",
|
|
36538
36476
|
"stt-rt-preview",
|
|
36539
36477
|
"stt-rt-v3-preview",
|
|
36540
36478
|
"stt-rt-preview-v2"
|
|
36541
36479
|
]);
|
|
36542
36480
|
var streamingTranscriberParams3 = zod11.object({
|
|
36543
|
-
model: sonioxRealtimeModelSchema
|
|
36544
|
-
audioFormat: sonioxAudioFormatSchema.optional()
|
|
36545
|
-
sampleRate: zod11.number().optional()
|
|
36546
|
-
numChannels: zod11.number().
|
|
36547
|
-
languageHints: zod11.array(zod11.string()).optional()
|
|
36548
|
-
context: sonioxContextSchema.optional()
|
|
36549
|
-
enableSpeakerDiarization: zod11.boolean().optional()
|
|
36550
|
-
enableLanguageIdentification: zod11.boolean().optional()
|
|
36551
|
-
enableEndpointDetection: zod11.boolean().optional()
|
|
36552
|
-
translation: sonioxTranslationConfigSchema.optional()
|
|
36553
|
-
clientReferenceId: zod11.string().optional()
|
|
36554
|
-
});
|
|
36555
|
-
var sonioxTranslationStatusSchema = zod11.enum(["
|
|
36481
|
+
model: sonioxRealtimeModelSchema,
|
|
36482
|
+
audioFormat: sonioxAudioFormatSchema.optional(),
|
|
36483
|
+
sampleRate: zod11.number().optional(),
|
|
36484
|
+
numChannels: zod11.number().optional(),
|
|
36485
|
+
languageHints: zod11.array(zod11.string()).optional(),
|
|
36486
|
+
context: sonioxContextSchema.optional(),
|
|
36487
|
+
enableSpeakerDiarization: zod11.boolean().optional(),
|
|
36488
|
+
enableLanguageIdentification: zod11.boolean().optional(),
|
|
36489
|
+
enableEndpointDetection: zod11.boolean().optional(),
|
|
36490
|
+
translation: sonioxTranslationConfigSchema.optional(),
|
|
36491
|
+
clientReferenceId: zod11.string().optional()
|
|
36492
|
+
});
|
|
36493
|
+
var sonioxTranslationStatusSchema = zod11.enum(["original", "translation", "none"]);
|
|
36556
36494
|
var sonioxTokenSchema = zod11.object({
|
|
36557
|
-
text: zod11.string()
|
|
36558
|
-
start_ms: zod11.number().optional()
|
|
36559
|
-
end_ms: zod11.number().optional()
|
|
36560
|
-
confidence: zod11.number()
|
|
36561
|
-
is_final: zod11.boolean()
|
|
36562
|
-
speaker: zod11.string().optional()
|
|
36563
|
-
|
|
36564
|
-
|
|
36565
|
-
|
|
36495
|
+
text: zod11.string(),
|
|
36496
|
+
start_ms: zod11.number().optional(),
|
|
36497
|
+
end_ms: zod11.number().optional(),
|
|
36498
|
+
confidence: zod11.number(),
|
|
36499
|
+
is_final: zod11.boolean(),
|
|
36500
|
+
speaker: zod11.string().optional(),
|
|
36501
|
+
translation_status: sonioxTranslationStatusSchema.optional(),
|
|
36502
|
+
language: zod11.string().optional(),
|
|
36503
|
+
source_language: zod11.string().optional()
|
|
36566
36504
|
});
|
|
36567
36505
|
var sonioxStreamingResponseSchema = zod11.object({
|
|
36568
|
-
text: zod11.string()
|
|
36569
|
-
tokens: zod11.array(sonioxTokenSchema)
|
|
36570
|
-
final_audio_proc_ms: zod11.number()
|
|
36571
|
-
total_audio_proc_ms: zod11.number()
|
|
36572
|
-
finished: zod11.boolean().optional()
|
|
36573
|
-
|
|
36574
|
-
|
|
36506
|
+
text: zod11.string(),
|
|
36507
|
+
tokens: zod11.array(sonioxTokenSchema),
|
|
36508
|
+
final_audio_proc_ms: zod11.number(),
|
|
36509
|
+
total_audio_proc_ms: zod11.number(),
|
|
36510
|
+
finished: zod11.boolean().optional(),
|
|
36511
|
+
error_code: zod11.number().optional(),
|
|
36512
|
+
error_message: zod11.string().optional()
|
|
36575
36513
|
});
|
|
36576
36514
|
var sonioxRecorderStateSchema = zod11.enum([
|
|
36577
36515
|
"Init",
|
|
@@ -37137,8 +37075,8 @@ var BatchOnlyProviders = AllProviders.filter(
|
|
|
37137
37075
|
);
|
|
37138
37076
|
|
|
37139
37077
|
// src/generated/deepgram/schema/index.ts
|
|
37140
|
-
var
|
|
37141
|
-
__export(
|
|
37078
|
+
var schema_exports5 = {};
|
|
37079
|
+
__export(schema_exports5, {
|
|
37142
37080
|
V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
|
|
37143
37081
|
V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
|
|
37144
37082
|
V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
|
|
@@ -37393,8 +37331,8 @@ var V1SpeakPostParametersSampleRate = {
|
|
|
37393
37331
|
};
|
|
37394
37332
|
|
|
37395
37333
|
// src/generated/openai/schema/index.ts
|
|
37396
|
-
var
|
|
37397
|
-
__export(
|
|
37334
|
+
var schema_exports6 = {};
|
|
37335
|
+
__export(schema_exports6, {
|
|
37398
37336
|
AudioResponseFormat: () => AudioResponseFormat,
|
|
37399
37337
|
CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
|
|
37400
37338
|
CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
|
|
@@ -37734,8 +37672,8 @@ var VoiceResourceObject = {
|
|
|
37734
37672
|
};
|
|
37735
37673
|
|
|
37736
37674
|
// src/generated/speechmatics/schema/index.ts
|
|
37737
|
-
var
|
|
37738
|
-
__export(
|
|
37675
|
+
var schema_exports7 = {};
|
|
37676
|
+
__export(schema_exports7, {
|
|
37739
37677
|
AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
|
|
37740
37678
|
ErrorResponseError: () => ErrorResponseError,
|
|
37741
37679
|
GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
|
|
@@ -37924,32 +37862,6 @@ var WrittenFormRecognitionResultType = {
|
|
|
37924
37862
|
word: "word"
|
|
37925
37863
|
};
|
|
37926
37864
|
|
|
37927
|
-
// src/generated/soniox/schema/index.ts
|
|
37928
|
-
var schema_exports7 = {};
|
|
37929
|
-
__export(schema_exports7, {
|
|
37930
|
-
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
37931
|
-
TranscriptionMode: () => TranscriptionMode,
|
|
37932
|
-
TranscriptionStatus: () => TranscriptionStatus,
|
|
37933
|
-
TranslationConfigType: () => TranslationConfigType
|
|
37934
|
-
});
|
|
37935
|
-
|
|
37936
|
-
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
37937
|
-
var TemporaryApiKeyUsageType = {
|
|
37938
|
-
transcribe_websocket: "transcribe_websocket"
|
|
37939
|
-
};
|
|
37940
|
-
|
|
37941
|
-
// src/generated/soniox/schema/transcriptionMode.ts
|
|
37942
|
-
var TranscriptionMode = {
|
|
37943
|
-
real_time: "real_time",
|
|
37944
|
-
async: "async"
|
|
37945
|
-
};
|
|
37946
|
-
|
|
37947
|
-
// src/generated/soniox/schema/translationConfigType.ts
|
|
37948
|
-
var TranslationConfigType = {
|
|
37949
|
-
one_way: "one_way",
|
|
37950
|
-
two_way: "two_way"
|
|
37951
|
-
};
|
|
37952
|
-
|
|
37953
37865
|
// src/generated/elevenlabs/schema/index.ts
|
|
37954
37866
|
var schema_exports8 = {};
|
|
37955
37867
|
__export(schema_exports8, {
|
|
@@ -39653,7 +39565,7 @@ export {
|
|
|
39653
39565
|
DeepgramTTSSampleRate,
|
|
39654
39566
|
DeepgramTopicMode,
|
|
39655
39567
|
DeepgramTranscriptionSchema,
|
|
39656
|
-
|
|
39568
|
+
schema_exports5 as DeepgramTypes,
|
|
39657
39569
|
deepgramAPI_zod_exports as DeepgramZodSchemas,
|
|
39658
39570
|
ElevenLabsAdapter,
|
|
39659
39571
|
ElevenLabsCapabilities,
|
|
@@ -39690,7 +39602,7 @@ export {
|
|
|
39690
39602
|
OpenAIResponseFormat,
|
|
39691
39603
|
streaming_types_exports as OpenAIStreamingTypes,
|
|
39692
39604
|
OpenAITranscriptionSchema,
|
|
39693
|
-
|
|
39605
|
+
schema_exports6 as OpenAITypes,
|
|
39694
39606
|
OpenAIWhisperAdapter,
|
|
39695
39607
|
openAIAudioRealtimeAPI_zod_exports as OpenAIZodSchemas,
|
|
39696
39608
|
ProfanityFilterMode,
|
|
@@ -39719,7 +39631,7 @@ export {
|
|
|
39719
39631
|
SonioxStreamingUpdateSchema,
|
|
39720
39632
|
streaming_types_zod_exports as SonioxStreamingZodSchemas,
|
|
39721
39633
|
SonioxTranscriptionSchema,
|
|
39722
|
-
|
|
39634
|
+
schema_exports4 as SonioxTypes,
|
|
39723
39635
|
SpeakV1ContainerParameter,
|
|
39724
39636
|
SpeakV1EncodingParameter,
|
|
39725
39637
|
SpeakV1SampleRateParameter,
|
|
@@ -39734,7 +39646,7 @@ export {
|
|
|
39734
39646
|
SpeechmaticsStreamingSchema,
|
|
39735
39647
|
SpeechmaticsStreamingUpdateSchema,
|
|
39736
39648
|
SpeechmaticsTranscriptionSchema,
|
|
39737
|
-
|
|
39649
|
+
schema_exports7 as SpeechmaticsTypes,
|
|
39738
39650
|
speechmaticsASRRESTAPI_zod_exports as SpeechmaticsZodSchemas,
|
|
39739
39651
|
StreamingProviders,
|
|
39740
39652
|
StreamingSupportedBitDepthEnum,
|