voice-router-dev 0.8.7 → 0.8.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/dist/constants.d.mts +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/{field-configs-2c1-pid1.d.mts → field-configs-CSOt3yc9.d.mts} +6194 -6194
- package/dist/{field-configs-2c1-pid1.d.ts → field-configs-CSOt3yc9.d.ts} +6194 -6194
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/index.d.mts +763 -716
- package/dist/index.d.ts +763 -716
- package/dist/index.js +427 -34
- package/dist/index.mjs +427 -34
- package/dist/{provider-metadata-MDUUEuqF.d.mts → provider-metadata-BJ29OPW1.d.mts} +6 -6
- package/dist/{provider-metadata-_gUWlRXS.d.ts → provider-metadata-D1d-9cng.d.ts} +6 -6
- package/dist/provider-metadata.d.mts +1 -1
- package/dist/provider-metadata.d.ts +1 -1
- package/dist/provider-metadata.js +1 -1
- package/dist/provider-metadata.mjs +1 -1
- package/dist/{speechToTextChunkResponseModel-o8_dfC4c.d.ts → speechToTextChunkResponseModel-B4kVoFc3.d.ts} +97 -6
- package/dist/{speechToTextChunkResponseModel-BYhlHNqP.d.mts → speechToTextChunkResponseModel-DmajV4F-.d.mts} +97 -6
- package/dist/webhooks.d.mts +2 -2
- package/dist/webhooks.d.ts +2 -2
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -8691,6 +8691,7 @@ function createOpenAIWhisperAdapter(config) {
|
|
|
8691
8691
|
|
|
8692
8692
|
// src/adapters/speechmatics-adapter.ts
|
|
8693
8693
|
import axios8 from "axios";
|
|
8694
|
+
import WebSocket6 from "ws";
|
|
8694
8695
|
|
|
8695
8696
|
// src/generated/speechmatics/schema/notificationConfigContentsItem.ts
|
|
8696
8697
|
var NotificationConfigContentsItem = {
|
|
@@ -8740,8 +8741,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8740
8741
|
super(...arguments);
|
|
8741
8742
|
this.name = "speechmatics";
|
|
8742
8743
|
this.capabilities = {
|
|
8743
|
-
streaming:
|
|
8744
|
-
// Batch only (streaming available via separate WebSocket API)
|
|
8744
|
+
streaming: true,
|
|
8745
8745
|
diarization: true,
|
|
8746
8746
|
wordTimestamps: true,
|
|
8747
8747
|
languageDetection: false,
|
|
@@ -8990,6 +8990,389 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8990
8990
|
throw error;
|
|
8991
8991
|
}
|
|
8992
8992
|
}
|
|
8993
|
+
/**
|
|
8994
|
+
* Build WebSocket URL for real-time streaming
|
|
8995
|
+
*
|
|
8996
|
+
* Note: Real-time API uses a different host from the batch API:
|
|
8997
|
+
* - Batch: {region}.asr.api.speechmatics.com
|
|
8998
|
+
* - Real-time: {region}.rt.speechmatics.com
|
|
8999
|
+
*
|
|
9000
|
+
* @param region - Regional endpoint identifier
|
|
9001
|
+
* @returns WebSocket URL for real-time API
|
|
9002
|
+
*/
|
|
9003
|
+
getRegionalWsUrl(region) {
|
|
9004
|
+
if (this.config?.wsBaseUrl) {
|
|
9005
|
+
return this.config.wsBaseUrl;
|
|
9006
|
+
}
|
|
9007
|
+
const rtRegionMap = {
|
|
9008
|
+
eu1: "eu",
|
|
9009
|
+
eu2: "eu",
|
|
9010
|
+
us1: "us",
|
|
9011
|
+
us2: "us",
|
|
9012
|
+
au1: "eu"
|
|
9013
|
+
// No AU RT endpoint — fall back to EU
|
|
9014
|
+
};
|
|
9015
|
+
const rtPrefix = rtRegionMap[region || ""] || "eu";
|
|
9016
|
+
return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
|
|
9017
|
+
}
|
|
9018
|
+
/**
|
|
9019
|
+
* Stream audio for real-time transcription via WebSocket
|
|
9020
|
+
*
|
|
9021
|
+
* Connects to Speechmatics' real-time API and sends audio chunks
|
|
9022
|
+
* for transcription with results returned via callbacks.
|
|
9023
|
+
*
|
|
9024
|
+
* @param options - Streaming configuration options
|
|
9025
|
+
* @param callbacks - Event callbacks for transcription results
|
|
9026
|
+
* @returns Promise that resolves with a StreamingSession
|
|
9027
|
+
*
|
|
9028
|
+
* @example Basic streaming
|
|
9029
|
+
* ```typescript
|
|
9030
|
+
* const session = await adapter.transcribeStream({
|
|
9031
|
+
* language: 'en',
|
|
9032
|
+
* speechmaticsStreaming: {
|
|
9033
|
+
* enablePartials: true,
|
|
9034
|
+
* operatingPoint: 'enhanced'
|
|
9035
|
+
* }
|
|
9036
|
+
* }, {
|
|
9037
|
+
* onTranscript: (event) => console.log(event.text),
|
|
9038
|
+
* onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
|
|
9039
|
+
* onError: (error) => console.error(error)
|
|
9040
|
+
* });
|
|
9041
|
+
*
|
|
9042
|
+
* await session.sendAudio({ data: audioBuffer });
|
|
9043
|
+
* await session.close();
|
|
9044
|
+
* ```
|
|
9045
|
+
*/
|
|
9046
|
+
async transcribeStream(options, callbacks) {
|
|
9047
|
+
this.validateConfig();
|
|
9048
|
+
const smOpts = options?.speechmaticsStreaming || {};
|
|
9049
|
+
const region = smOpts.region || this.config?.region;
|
|
9050
|
+
const wsUrl = this.getRegionalWsUrl(region);
|
|
9051
|
+
const ws = new WebSocket6(wsUrl, {
|
|
9052
|
+
headers: {
|
|
9053
|
+
Authorization: `Bearer ${this.config.apiKey}`
|
|
9054
|
+
}
|
|
9055
|
+
});
|
|
9056
|
+
let sessionStatus = "connecting";
|
|
9057
|
+
const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
9058
|
+
let seqNo = 0;
|
|
9059
|
+
let utteranceResults = [];
|
|
9060
|
+
const sessionReady = new Promise((resolve, reject) => {
|
|
9061
|
+
const timeout = setTimeout(() => {
|
|
9062
|
+
reject(new Error("WebSocket connection timeout"));
|
|
9063
|
+
}, 1e4);
|
|
9064
|
+
let wsOpen = false;
|
|
9065
|
+
ws.once("error", (error) => {
|
|
9066
|
+
clearTimeout(timeout);
|
|
9067
|
+
reject(error);
|
|
9068
|
+
});
|
|
9069
|
+
ws.once("open", () => {
|
|
9070
|
+
wsOpen = true;
|
|
9071
|
+
const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
|
|
9072
|
+
const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
|
|
9073
|
+
const startMsg = {
|
|
9074
|
+
message: "StartRecognition",
|
|
9075
|
+
audio_format: {
|
|
9076
|
+
type: "raw",
|
|
9077
|
+
encoding,
|
|
9078
|
+
sample_rate: sampleRate
|
|
9079
|
+
},
|
|
9080
|
+
transcription_config: {
|
|
9081
|
+
language: smOpts.language || options?.language || "en",
|
|
9082
|
+
enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
|
|
9083
|
+
}
|
|
9084
|
+
};
|
|
9085
|
+
const txConfig = startMsg.transcription_config;
|
|
9086
|
+
if (smOpts.domain) txConfig.domain = smOpts.domain;
|
|
9087
|
+
if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
|
|
9088
|
+
if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
|
|
9089
|
+
if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
|
|
9090
|
+
if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
|
|
9091
|
+
if (smOpts.diarization === "speaker" || options?.diarization) {
|
|
9092
|
+
txConfig.diarization = "speaker";
|
|
9093
|
+
if (smOpts.maxSpeakers) {
|
|
9094
|
+
txConfig.speaker_diarization_config = {
|
|
9095
|
+
max_speakers: smOpts.maxSpeakers
|
|
9096
|
+
};
|
|
9097
|
+
} else if (options?.speakersExpected) {
|
|
9098
|
+
txConfig.speaker_diarization_config = {
|
|
9099
|
+
max_speakers: options.speakersExpected
|
|
9100
|
+
};
|
|
9101
|
+
}
|
|
9102
|
+
}
|
|
9103
|
+
if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
|
|
9104
|
+
txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
|
|
9105
|
+
content: word
|
|
9106
|
+
}));
|
|
9107
|
+
} else if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9108
|
+
txConfig.additional_vocab = options.customVocabulary.map((word) => ({
|
|
9109
|
+
content: word
|
|
9110
|
+
}));
|
|
9111
|
+
}
|
|
9112
|
+
if (smOpts.conversationConfig) {
|
|
9113
|
+
txConfig.conversation_config = {
|
|
9114
|
+
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9115
|
+
};
|
|
9116
|
+
}
|
|
9117
|
+
const startPayload = JSON.stringify(startMsg);
|
|
9118
|
+
if (callbacks?.onRawMessage) {
|
|
9119
|
+
callbacks.onRawMessage({
|
|
9120
|
+
provider: "speechmatics",
|
|
9121
|
+
direction: "outgoing",
|
|
9122
|
+
timestamp: Date.now(),
|
|
9123
|
+
payload: startPayload,
|
|
9124
|
+
messageType: "StartRecognition"
|
|
9125
|
+
});
|
|
9126
|
+
}
|
|
9127
|
+
ws.send(startPayload);
|
|
9128
|
+
});
|
|
9129
|
+
const onMessage = (data) => {
|
|
9130
|
+
const rawPayload = data.toString();
|
|
9131
|
+
try {
|
|
9132
|
+
const msg = JSON.parse(rawPayload);
|
|
9133
|
+
if (msg.message === "RecognitionStarted") {
|
|
9134
|
+
clearTimeout(timeout);
|
|
9135
|
+
ws.removeListener("message", onMessage);
|
|
9136
|
+
ws.emit("message", data);
|
|
9137
|
+
resolve();
|
|
9138
|
+
} else if (msg.message === "Error") {
|
|
9139
|
+
clearTimeout(timeout);
|
|
9140
|
+
ws.removeListener("message", onMessage);
|
|
9141
|
+
reject(new Error(msg.reason || "Recognition failed to start"));
|
|
9142
|
+
}
|
|
9143
|
+
} catch {
|
|
9144
|
+
}
|
|
9145
|
+
};
|
|
9146
|
+
ws.on("message", onMessage);
|
|
9147
|
+
});
|
|
9148
|
+
ws.on("message", (data) => {
|
|
9149
|
+
const rawPayload = data.toString();
|
|
9150
|
+
try {
|
|
9151
|
+
const message = JSON.parse(rawPayload);
|
|
9152
|
+
if (callbacks?.onRawMessage) {
|
|
9153
|
+
callbacks.onRawMessage({
|
|
9154
|
+
provider: "speechmatics",
|
|
9155
|
+
direction: "incoming",
|
|
9156
|
+
timestamp: Date.now(),
|
|
9157
|
+
payload: rawPayload,
|
|
9158
|
+
messageType: message.message
|
|
9159
|
+
});
|
|
9160
|
+
}
|
|
9161
|
+
this.handleStreamingMessage(message, callbacks, utteranceResults);
|
|
9162
|
+
} catch (error) {
|
|
9163
|
+
if (callbacks?.onRawMessage) {
|
|
9164
|
+
callbacks.onRawMessage({
|
|
9165
|
+
provider: "speechmatics",
|
|
9166
|
+
direction: "incoming",
|
|
9167
|
+
timestamp: Date.now(),
|
|
9168
|
+
payload: rawPayload,
|
|
9169
|
+
messageType: "parse_error"
|
|
9170
|
+
});
|
|
9171
|
+
}
|
|
9172
|
+
callbacks?.onError?.({
|
|
9173
|
+
code: "PARSE_ERROR",
|
|
9174
|
+
message: "Failed to parse WebSocket message",
|
|
9175
|
+
details: error
|
|
9176
|
+
});
|
|
9177
|
+
}
|
|
9178
|
+
});
|
|
9179
|
+
ws.on("error", (error) => {
|
|
9180
|
+
callbacks?.onError?.({
|
|
9181
|
+
code: "WEBSOCKET_ERROR",
|
|
9182
|
+
message: error.message,
|
|
9183
|
+
details: error
|
|
9184
|
+
});
|
|
9185
|
+
});
|
|
9186
|
+
ws.on("close", (code, reason) => {
|
|
9187
|
+
sessionStatus = "closed";
|
|
9188
|
+
callbacks?.onClose?.(code, reason.toString());
|
|
9189
|
+
});
|
|
9190
|
+
await sessionReady;
|
|
9191
|
+
sessionStatus = "open";
|
|
9192
|
+
callbacks?.onOpen?.();
|
|
9193
|
+
return {
|
|
9194
|
+
id: sessionId,
|
|
9195
|
+
provider: this.name,
|
|
9196
|
+
createdAt: /* @__PURE__ */ new Date(),
|
|
9197
|
+
getStatus: () => sessionStatus,
|
|
9198
|
+
sendAudio: async (chunk) => {
|
|
9199
|
+
if (sessionStatus !== "open") {
|
|
9200
|
+
throw new Error(`Cannot send audio: session is ${sessionStatus}`);
|
|
9201
|
+
}
|
|
9202
|
+
if (ws.readyState !== WebSocket6.OPEN) {
|
|
9203
|
+
throw new Error("WebSocket is not open");
|
|
9204
|
+
}
|
|
9205
|
+
if (callbacks?.onRawMessage) {
|
|
9206
|
+
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9207
|
+
chunk.data.byteOffset,
|
|
9208
|
+
chunk.data.byteOffset + chunk.data.byteLength
|
|
9209
|
+
);
|
|
9210
|
+
callbacks.onRawMessage({
|
|
9211
|
+
provider: this.name,
|
|
9212
|
+
direction: "outgoing",
|
|
9213
|
+
timestamp: Date.now(),
|
|
9214
|
+
payload: audioPayload,
|
|
9215
|
+
messageType: "audio"
|
|
9216
|
+
});
|
|
9217
|
+
}
|
|
9218
|
+
ws.send(chunk.data);
|
|
9219
|
+
seqNo++;
|
|
9220
|
+
if (chunk.isLast) {
|
|
9221
|
+
const endMsg = JSON.stringify({
|
|
9222
|
+
message: "EndOfStream",
|
|
9223
|
+
last_seq_no: seqNo
|
|
9224
|
+
});
|
|
9225
|
+
if (callbacks?.onRawMessage) {
|
|
9226
|
+
callbacks.onRawMessage({
|
|
9227
|
+
provider: this.name,
|
|
9228
|
+
direction: "outgoing",
|
|
9229
|
+
timestamp: Date.now(),
|
|
9230
|
+
payload: endMsg,
|
|
9231
|
+
messageType: "EndOfStream"
|
|
9232
|
+
});
|
|
9233
|
+
}
|
|
9234
|
+
ws.send(endMsg);
|
|
9235
|
+
}
|
|
9236
|
+
},
|
|
9237
|
+
close: async () => {
|
|
9238
|
+
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
9239
|
+
return;
|
|
9240
|
+
}
|
|
9241
|
+
sessionStatus = "closing";
|
|
9242
|
+
if (ws.readyState === WebSocket6.OPEN) {
|
|
9243
|
+
seqNo++;
|
|
9244
|
+
ws.send(
|
|
9245
|
+
JSON.stringify({
|
|
9246
|
+
message: "EndOfStream",
|
|
9247
|
+
last_seq_no: seqNo
|
|
9248
|
+
})
|
|
9249
|
+
);
|
|
9250
|
+
}
|
|
9251
|
+
return new Promise((resolve) => {
|
|
9252
|
+
const timeout = setTimeout(() => {
|
|
9253
|
+
ws.terminate();
|
|
9254
|
+
sessionStatus = "closed";
|
|
9255
|
+
resolve();
|
|
9256
|
+
}, 5e3);
|
|
9257
|
+
const onMsg = (data) => {
|
|
9258
|
+
try {
|
|
9259
|
+
const msg = JSON.parse(data.toString());
|
|
9260
|
+
if (msg.message === "EndOfTranscript") {
|
|
9261
|
+
ws.removeListener("message", onMsg);
|
|
9262
|
+
clearTimeout(timeout);
|
|
9263
|
+
ws.close();
|
|
9264
|
+
}
|
|
9265
|
+
} catch {
|
|
9266
|
+
}
|
|
9267
|
+
};
|
|
9268
|
+
ws.on("message", onMsg);
|
|
9269
|
+
ws.once("close", () => {
|
|
9270
|
+
clearTimeout(timeout);
|
|
9271
|
+
sessionStatus = "closed";
|
|
9272
|
+
resolve();
|
|
9273
|
+
});
|
|
9274
|
+
});
|
|
9275
|
+
}
|
|
9276
|
+
};
|
|
9277
|
+
}
|
|
9278
|
+
/**
|
|
9279
|
+
* Handle incoming Speechmatics real-time WebSocket messages
|
|
9280
|
+
*/
|
|
9281
|
+
handleStreamingMessage(message, callbacks, utteranceResults) {
|
|
9282
|
+
switch (message.message) {
|
|
9283
|
+
case "RecognitionStarted": {
|
|
9284
|
+
break;
|
|
9285
|
+
}
|
|
9286
|
+
case "AddPartialTranscript": {
|
|
9287
|
+
const results = message.results || [];
|
|
9288
|
+
const text = buildTextFromSpeechmaticsResults(results);
|
|
9289
|
+
if (text) {
|
|
9290
|
+
callbacks?.onTranscript?.({
|
|
9291
|
+
type: "transcript",
|
|
9292
|
+
text,
|
|
9293
|
+
isFinal: false,
|
|
9294
|
+
words: this.extractWordsFromResults(results),
|
|
9295
|
+
data: message
|
|
9296
|
+
});
|
|
9297
|
+
}
|
|
9298
|
+
break;
|
|
9299
|
+
}
|
|
9300
|
+
case "AddTranscript": {
|
|
9301
|
+
const results = message.results || [];
|
|
9302
|
+
const text = buildTextFromSpeechmaticsResults(results);
|
|
9303
|
+
if (utteranceResults) {
|
|
9304
|
+
utteranceResults.push(...results);
|
|
9305
|
+
}
|
|
9306
|
+
if (text) {
|
|
9307
|
+
callbacks?.onTranscript?.({
|
|
9308
|
+
type: "transcript",
|
|
9309
|
+
text,
|
|
9310
|
+
isFinal: true,
|
|
9311
|
+
words: this.extractWordsFromResults(results),
|
|
9312
|
+
data: message
|
|
9313
|
+
});
|
|
9314
|
+
}
|
|
9315
|
+
break;
|
|
9316
|
+
}
|
|
9317
|
+
case "EndOfUtterance": {
|
|
9318
|
+
if (utteranceResults && utteranceResults.length > 0) {
|
|
9319
|
+
const text = buildTextFromSpeechmaticsResults(utteranceResults);
|
|
9320
|
+
const words = this.extractWordsFromResults(utteranceResults);
|
|
9321
|
+
const utterances = buildUtterancesFromWords(words);
|
|
9322
|
+
if (utterances.length > 0) {
|
|
9323
|
+
for (const utt of utterances) {
|
|
9324
|
+
callbacks?.onUtterance?.(utt);
|
|
9325
|
+
}
|
|
9326
|
+
} else if (text) {
|
|
9327
|
+
callbacks?.onUtterance?.({
|
|
9328
|
+
text,
|
|
9329
|
+
start: words.length > 0 ? words[0].start : 0,
|
|
9330
|
+
end: words.length > 0 ? words[words.length - 1].end : 0,
|
|
9331
|
+
words
|
|
9332
|
+
});
|
|
9333
|
+
}
|
|
9334
|
+
utteranceResults.length = 0;
|
|
9335
|
+
}
|
|
9336
|
+
break;
|
|
9337
|
+
}
|
|
9338
|
+
case "AudioAdded": {
|
|
9339
|
+
break;
|
|
9340
|
+
}
|
|
9341
|
+
case "EndOfTranscript": {
|
|
9342
|
+
break;
|
|
9343
|
+
}
|
|
9344
|
+
case "Info":
|
|
9345
|
+
case "Warning": {
|
|
9346
|
+
callbacks?.onMetadata?.(message);
|
|
9347
|
+
break;
|
|
9348
|
+
}
|
|
9349
|
+
case "Error": {
|
|
9350
|
+
const errMsg = message;
|
|
9351
|
+
callbacks?.onError?.({
|
|
9352
|
+
code: errMsg.type || "SPEECHMATICS_ERROR",
|
|
9353
|
+
message: errMsg.reason || "Unknown error",
|
|
9354
|
+
details: message
|
|
9355
|
+
});
|
|
9356
|
+
break;
|
|
9357
|
+
}
|
|
9358
|
+
default: {
|
|
9359
|
+
callbacks?.onMetadata?.(message);
|
|
9360
|
+
break;
|
|
9361
|
+
}
|
|
9362
|
+
}
|
|
9363
|
+
}
|
|
9364
|
+
/**
|
|
9365
|
+
* Extract unified Word[] from Speechmatics recognition results
|
|
9366
|
+
*/
|
|
9367
|
+
extractWordsFromResults(results) {
|
|
9368
|
+
return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
|
|
9369
|
+
word: result.alternatives?.[0]?.content || "",
|
|
9370
|
+
start: result.start_time,
|
|
9371
|
+
end: result.end_time,
|
|
9372
|
+
confidence: result.alternatives?.[0]?.confidence,
|
|
9373
|
+
speaker: result.alternatives?.[0]?.speaker
|
|
9374
|
+
}));
|
|
9375
|
+
}
|
|
8993
9376
|
/**
|
|
8994
9377
|
* Normalize Speechmatics status to unified status
|
|
8995
9378
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9311,50 +9694,51 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9311
9694
|
const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9312
9695
|
const createdAt = /* @__PURE__ */ new Date();
|
|
9313
9696
|
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
|
|
9314
|
-
const wsUrl =
|
|
9315
|
-
|
|
9316
|
-
const
|
|
9317
|
-
|
|
9318
|
-
|
|
9697
|
+
const wsUrl = `${wsBase}/transcribe-websocket`;
|
|
9698
|
+
const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-v4";
|
|
9699
|
+
const sonioxOpts = options?.sonioxStreaming;
|
|
9700
|
+
const initMessage = {
|
|
9701
|
+
api_key: this.config.apiKey,
|
|
9702
|
+
model: modelId
|
|
9703
|
+
};
|
|
9704
|
+
if (sonioxOpts?.audioFormat) {
|
|
9705
|
+
initMessage.audio_format = sonioxOpts.audioFormat;
|
|
9706
|
+
} else if (options?.encoding) {
|
|
9319
9707
|
const encodingMap = {
|
|
9320
9708
|
linear16: "pcm_s16le",
|
|
9321
9709
|
pcm: "pcm_s16le",
|
|
9322
9710
|
mulaw: "mulaw",
|
|
9323
9711
|
alaw: "alaw"
|
|
9324
9712
|
};
|
|
9325
|
-
|
|
9713
|
+
initMessage.audio_format = encodingMap[options.encoding] || options.encoding;
|
|
9326
9714
|
}
|
|
9327
|
-
if (options?.sampleRate) {
|
|
9328
|
-
|
|
9715
|
+
if (sonioxOpts?.sampleRate || options?.sampleRate) {
|
|
9716
|
+
initMessage.sample_rate = sonioxOpts?.sampleRate || options?.sampleRate;
|
|
9329
9717
|
}
|
|
9330
|
-
if (options?.channels) {
|
|
9331
|
-
|
|
9718
|
+
if (sonioxOpts?.numChannels || options?.channels) {
|
|
9719
|
+
initMessage.num_channels = sonioxOpts?.numChannels || options?.channels;
|
|
9332
9720
|
}
|
|
9333
|
-
const sonioxOpts = options?.sonioxStreaming;
|
|
9334
9721
|
if (sonioxOpts) {
|
|
9335
9722
|
if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
|
|
9336
|
-
|
|
9723
|
+
initMessage.language_hints = sonioxOpts.languageHints;
|
|
9337
9724
|
}
|
|
9338
9725
|
if (sonioxOpts.enableLanguageIdentification) {
|
|
9339
|
-
|
|
9726
|
+
initMessage.enable_language_identification = true;
|
|
9340
9727
|
}
|
|
9341
9728
|
if (sonioxOpts.enableEndpointDetection) {
|
|
9342
|
-
|
|
9729
|
+
initMessage.enable_endpoint_detection = true;
|
|
9343
9730
|
}
|
|
9344
9731
|
if (sonioxOpts.enableSpeakerDiarization) {
|
|
9345
|
-
|
|
9732
|
+
initMessage.enable_speaker_diarization = true;
|
|
9346
9733
|
}
|
|
9347
9734
|
if (sonioxOpts.context) {
|
|
9348
|
-
|
|
9349
|
-
"context",
|
|
9350
|
-
typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
|
|
9351
|
-
);
|
|
9735
|
+
initMessage.context = typeof sonioxOpts.context === "string" ? sonioxOpts.context : sonioxOpts.context;
|
|
9352
9736
|
}
|
|
9353
9737
|
if (sonioxOpts.translation) {
|
|
9354
|
-
|
|
9738
|
+
initMessage.translation = sonioxOpts.translation;
|
|
9355
9739
|
}
|
|
9356
9740
|
if (sonioxOpts.clientReferenceId) {
|
|
9357
|
-
|
|
9741
|
+
initMessage.client_reference_id = sonioxOpts.clientReferenceId;
|
|
9358
9742
|
}
|
|
9359
9743
|
}
|
|
9360
9744
|
if (!sonioxOpts?.languageHints && options?.language) {
|
|
@@ -9363,24 +9747,33 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9363
9747
|
`[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
|
|
9364
9748
|
);
|
|
9365
9749
|
}
|
|
9366
|
-
|
|
9750
|
+
initMessage.language_hints = [options.language];
|
|
9367
9751
|
}
|
|
9368
9752
|
if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
|
|
9369
|
-
|
|
9753
|
+
initMessage.enable_speaker_diarization = true;
|
|
9370
9754
|
}
|
|
9371
9755
|
if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
|
|
9372
|
-
|
|
9373
|
-
}
|
|
9374
|
-
if (options?.interimResults !== false) {
|
|
9756
|
+
initMessage.enable_language_identification = true;
|
|
9375
9757
|
}
|
|
9376
9758
|
let status = "connecting";
|
|
9377
9759
|
let openedAt = null;
|
|
9378
9760
|
let receivedData = false;
|
|
9379
9761
|
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : __require("ws");
|
|
9380
|
-
const ws = new WebSocketImpl(wsUrl
|
|
9762
|
+
const ws = new WebSocketImpl(wsUrl);
|
|
9381
9763
|
ws.onopen = () => {
|
|
9382
|
-
status = "open";
|
|
9383
9764
|
openedAt = Date.now();
|
|
9765
|
+
const initPayload = JSON.stringify(initMessage);
|
|
9766
|
+
if (callbacks?.onRawMessage) {
|
|
9767
|
+
callbacks.onRawMessage({
|
|
9768
|
+
provider: this.name,
|
|
9769
|
+
direction: "outgoing",
|
|
9770
|
+
timestamp: Date.now(),
|
|
9771
|
+
payload: initPayload,
|
|
9772
|
+
messageType: "init"
|
|
9773
|
+
});
|
|
9774
|
+
}
|
|
9775
|
+
ws.send(initPayload);
|
|
9776
|
+
status = "open";
|
|
9384
9777
|
callbacks?.onOpen?.();
|
|
9385
9778
|
};
|
|
9386
9779
|
ws.onmessage = (event) => {
|
|
@@ -9459,10 +9852,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9459
9852
|
ws.onclose = (event) => {
|
|
9460
9853
|
status = "closed";
|
|
9461
9854
|
const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
|
|
9462
|
-
const
|
|
9463
|
-
if (
|
|
9855
|
+
const isEarlyClose = timeSinceOpen !== null && timeSinceOpen < 5e3 && !receivedData;
|
|
9856
|
+
if (isEarlyClose && event.code === 1e3) {
|
|
9464
9857
|
const errorMessage = [
|
|
9465
|
-
"Soniox closed connection
|
|
9858
|
+
"Soniox closed connection shortly after opening.",
|
|
9466
9859
|
`Current config: region=${this.region}, model=${modelId}`,
|
|
9467
9860
|
"Likely causes:",
|
|
9468
9861
|
" - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
|
|
@@ -36481,7 +36874,7 @@ var AzureCapabilities = {
|
|
|
36481
36874
|
deleteTranscript: true
|
|
36482
36875
|
};
|
|
36483
36876
|
var SpeechmaticsCapabilities = {
|
|
36484
|
-
streaming:
|
|
36877
|
+
streaming: true,
|
|
36485
36878
|
diarization: true,
|
|
36486
36879
|
wordTimestamps: true,
|
|
36487
36880
|
languageDetection: false,
|
|
@@ -187,7 +187,7 @@ declare const AzureCapabilities: {
|
|
|
187
187
|
* Speechmatics provider capabilities
|
|
188
188
|
*/
|
|
189
189
|
declare const SpeechmaticsCapabilities: {
|
|
190
|
-
readonly streaming:
|
|
190
|
+
readonly streaming: true;
|
|
191
191
|
readonly diarization: true;
|
|
192
192
|
readonly wordTimestamps: true;
|
|
193
193
|
readonly languageDetection: false;
|
|
@@ -322,7 +322,7 @@ declare const ProviderCapabilitiesMap: {
|
|
|
322
322
|
readonly deleteTranscript: true;
|
|
323
323
|
};
|
|
324
324
|
readonly speechmatics: {
|
|
325
|
-
readonly streaming:
|
|
325
|
+
readonly streaming: true;
|
|
326
326
|
readonly diarization: true;
|
|
327
327
|
readonly wordTimestamps: true;
|
|
328
328
|
readonly languageDetection: false;
|
|
@@ -519,11 +519,11 @@ type LanguageCode = keyof typeof LanguageLabels;
|
|
|
519
519
|
/**
|
|
520
520
|
* Gladia supported language codes (from OpenAPI spec)
|
|
521
521
|
*/
|
|
522
|
-
declare const GladiaLanguageCodes: ("
|
|
522
|
+
declare const GladiaLanguageCodes: ("af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh")[];
|
|
523
523
|
/**
|
|
524
524
|
* AssemblyAI supported language codes (from OpenAPI spec)
|
|
525
525
|
*/
|
|
526
|
-
declare const AssemblyAILanguageCodes: ("
|
|
526
|
+
declare const AssemblyAILanguageCodes: ("af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "en_au" | "en_uk" | "en_us")[];
|
|
527
527
|
/**
|
|
528
528
|
* Deepgram supported language codes
|
|
529
529
|
* Note: Deepgram accepts BCP-47 tags, these are the most common
|
|
@@ -551,8 +551,8 @@ declare const DeepgramLanguageCodes: readonly ["en", "en-US", "en-GB", "en-AU",
|
|
|
551
551
|
* ```
|
|
552
552
|
*/
|
|
553
553
|
declare const AllLanguageCodes: {
|
|
554
|
-
readonly gladia: ("
|
|
555
|
-
readonly assemblyai: ("
|
|
554
|
+
readonly gladia: ("af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh")[];
|
|
555
|
+
readonly assemblyai: ("af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "en_au" | "en_uk" | "en_us")[];
|
|
556
556
|
readonly deepgram: readonly ["en", "en-US", "en-GB", "en-AU", "en-IN", "es", "es-419", "fr", "fr-CA", "de", "it", "pt", "pt-BR", "nl", "ru", "uk", "pl", "cs", "sk", "hu", "ro", "bg", "hr", "sl", "el", "tr", "fi", "sv", "da", "no", "et", "lv", "lt", "zh", "zh-CN", "zh-TW", "ja", "ko", "th", "vi", "id", "ms", "tl", "hi", "ta", "te", "bn", "ar"];
|
|
557
557
|
readonly "openai-whisper": readonly ["en", "es", "fr", "de", "it", "pt", "nl", "ru", "zh", "ja", "ko", "ar", "hi", "pl", "uk", "cs", "ro", "hu", "el", "tr", "fi", "sv", "da", "no", "th", "vi", "id", "ms", "he", "fa"];
|
|
558
558
|
readonly "azure-stt": readonly ["af-ZA", "am-ET", "ar-AE", "ar-BH", "ar-DZ", "ar-EG", "ar-IL", "ar-IQ", "ar-JO", "ar-KW", "ar-LB", "ar-LY", "ar-MA", "ar-OM", "ar-PS", "ar-QA", "ar-SA", "ar-SY", "ar-TN", "ar-YE", "as-IN", "az-AZ", "be-BY", "bg-BG", "bn-BD", "bn-IN", "bs-BA", "ca-ES", "cs-CZ", "cy-GB", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU", "en-CA", "en-GB", "en-GH", "en-HK", "en-IE", "en-IN", "en-KE", "en-NG", "en-NZ", "en-PH", "en-SG", "en-TZ", "en-US", "en-ZA", "es-AR", "es-BO", "es-CL", "es-CO", "es-CR", "es-CU", "es-DO", "es-EC", "es-ES", "es-GQ", "es-GT", "es-HN", "es-MX", "es-NI", "es-PA", "es-PE", "es-PR", "es-PY", "es-SV", "es-US", "es-UY", "es-VE", "et-EE", "eu-ES", "fa-IR", "fi-FI", "fil-PH", "fr-BE", "fr-CA", "fr-CH", "fr-FR", "ga-IE", "gl-ES", "gu-IN", "he-IL", "hi-IN", "hr-HR", "hu-HU", "hy-AM", "id-ID", "is-IS", "it-CH", "it-IT", "ja-JP", "jv-ID", "ka-GE", "kk-KZ", "km-KH", "kn-IN", "ko-KR", "lo-LA", "lt-LT", "lv-LV", "mi-NZ", "mk-MK", "ml-IN", "mn-MN", "mr-IN", "ms-MY", "mt-MT", "my-MM", "nan-CN", "nb-NO", "ne-NP", "nl-BE", "nl-NL", "or-IN", "pa-IN", "pl-PL", "ps-AF", "pt-BR", "pt-PT", "ro-RO", "ru-RU", "si-LK", "sk-SK", "sl-SI", "so-SO", "sq-AL", "sr-ME", "sr-RS", "sr-XK", "su-ID", "sv-SE", "sw-KE", "sw-TZ", "ta-IN", "ta-LK", "ta-MY", "ta-SG", "te-IN", "th-TH", "tr-TR", "uk-UA", "ur-IN", "ur-PK", "uz-UZ", "vi-VN", "wuu-CN", "yue-CN", "zh-CN", "zh-HK", "zh-SG", "zh-TW", "zu-ZA"];
|