voice-router-dev 0.8.7 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/dist/constants.d.mts +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/{field-configs-2c1-pid1.d.mts → field-configs-BtR4uR2N.d.mts} +1168 -1168
- package/dist/{field-configs-2c1-pid1.d.ts → field-configs-BtR4uR2N.d.ts} +1168 -1168
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/index.d.mts +557 -510
- package/dist/index.d.ts +557 -510
- package/dist/index.js +419 -34
- package/dist/index.mjs +419 -34
- package/dist/{provider-metadata-MDUUEuqF.d.mts → provider-metadata-BJ29OPW1.d.mts} +6 -6
- package/dist/{provider-metadata-_gUWlRXS.d.ts → provider-metadata-D1d-9cng.d.ts} +6 -6
- package/dist/provider-metadata.d.mts +1 -1
- package/dist/provider-metadata.d.ts +1 -1
- package/dist/provider-metadata.js +1 -1
- package/dist/provider-metadata.mjs +1 -1
- package/dist/{speechToTextChunkResponseModel-o8_dfC4c.d.ts → speechToTextChunkResponseModel-B4kVoFc3.d.ts} +97 -6
- package/dist/{speechToTextChunkResponseModel-BYhlHNqP.d.mts → speechToTextChunkResponseModel-DmajV4F-.d.mts} +97 -6
- package/dist/webhooks.d.mts +2 -2
- package/dist/webhooks.d.ts +2 -2
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -8691,6 +8691,7 @@ function createOpenAIWhisperAdapter(config) {
|
|
|
8691
8691
|
|
|
8692
8692
|
// src/adapters/speechmatics-adapter.ts
|
|
8693
8693
|
import axios8 from "axios";
|
|
8694
|
+
import WebSocket6 from "ws";
|
|
8694
8695
|
|
|
8695
8696
|
// src/generated/speechmatics/schema/notificationConfigContentsItem.ts
|
|
8696
8697
|
var NotificationConfigContentsItem = {
|
|
@@ -8740,8 +8741,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8740
8741
|
super(...arguments);
|
|
8741
8742
|
this.name = "speechmatics";
|
|
8742
8743
|
this.capabilities = {
|
|
8743
|
-
streaming:
|
|
8744
|
-
// Batch only (streaming available via separate WebSocket API)
|
|
8744
|
+
streaming: true,
|
|
8745
8745
|
diarization: true,
|
|
8746
8746
|
wordTimestamps: true,
|
|
8747
8747
|
languageDetection: false,
|
|
@@ -8990,6 +8990,381 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8990
8990
|
throw error;
|
|
8991
8991
|
}
|
|
8992
8992
|
}
|
|
8993
|
+
/**
|
|
8994
|
+
* Build WebSocket URL for real-time streaming
|
|
8995
|
+
*
|
|
8996
|
+
* Note: Real-time API uses a different host from the batch API:
|
|
8997
|
+
* - Batch: {region}.asr.api.speechmatics.com
|
|
8998
|
+
* - Real-time: {region}.rt.speechmatics.com
|
|
8999
|
+
*
|
|
9000
|
+
* @param region - Regional endpoint identifier
|
|
9001
|
+
* @returns WebSocket URL for real-time API
|
|
9002
|
+
*/
|
|
9003
|
+
getRegionalWsUrl(region) {
|
|
9004
|
+
if (this.config?.wsBaseUrl) {
|
|
9005
|
+
return this.config.wsBaseUrl;
|
|
9006
|
+
}
|
|
9007
|
+
const regionPrefix = region || "eu1";
|
|
9008
|
+
return `wss://${regionPrefix}.rt.speechmatics.com/v2`;
|
|
9009
|
+
}
|
|
9010
|
+
/**
|
|
9011
|
+
* Stream audio for real-time transcription via WebSocket
|
|
9012
|
+
*
|
|
9013
|
+
* Connects to Speechmatics' real-time API and sends audio chunks
|
|
9014
|
+
* for transcription with results returned via callbacks.
|
|
9015
|
+
*
|
|
9016
|
+
* @param options - Streaming configuration options
|
|
9017
|
+
* @param callbacks - Event callbacks for transcription results
|
|
9018
|
+
* @returns Promise that resolves with a StreamingSession
|
|
9019
|
+
*
|
|
9020
|
+
* @example Basic streaming
|
|
9021
|
+
* ```typescript
|
|
9022
|
+
* const session = await adapter.transcribeStream({
|
|
9023
|
+
* language: 'en',
|
|
9024
|
+
* speechmaticsStreaming: {
|
|
9025
|
+
* enablePartials: true,
|
|
9026
|
+
* operatingPoint: 'enhanced'
|
|
9027
|
+
* }
|
|
9028
|
+
* }, {
|
|
9029
|
+
* onTranscript: (event) => console.log(event.text),
|
|
9030
|
+
* onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
|
|
9031
|
+
* onError: (error) => console.error(error)
|
|
9032
|
+
* });
|
|
9033
|
+
*
|
|
9034
|
+
* await session.sendAudio({ data: audioBuffer });
|
|
9035
|
+
* await session.close();
|
|
9036
|
+
* ```
|
|
9037
|
+
*/
|
|
9038
|
+
async transcribeStream(options, callbacks) {
|
|
9039
|
+
this.validateConfig();
|
|
9040
|
+
const smOpts = options?.speechmaticsStreaming || {};
|
|
9041
|
+
const region = smOpts.region || this.config?.region;
|
|
9042
|
+
const wsUrl = this.getRegionalWsUrl(region);
|
|
9043
|
+
const ws = new WebSocket6(wsUrl, {
|
|
9044
|
+
headers: {
|
|
9045
|
+
Authorization: `Bearer ${this.config.apiKey}`
|
|
9046
|
+
}
|
|
9047
|
+
});
|
|
9048
|
+
let sessionStatus = "connecting";
|
|
9049
|
+
const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
9050
|
+
let seqNo = 0;
|
|
9051
|
+
let utteranceResults = [];
|
|
9052
|
+
const sessionReady = new Promise((resolve, reject) => {
|
|
9053
|
+
const timeout = setTimeout(() => {
|
|
9054
|
+
reject(new Error("WebSocket connection timeout"));
|
|
9055
|
+
}, 1e4);
|
|
9056
|
+
let wsOpen = false;
|
|
9057
|
+
ws.once("error", (error) => {
|
|
9058
|
+
clearTimeout(timeout);
|
|
9059
|
+
reject(error);
|
|
9060
|
+
});
|
|
9061
|
+
ws.once("open", () => {
|
|
9062
|
+
wsOpen = true;
|
|
9063
|
+
const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
|
|
9064
|
+
const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
|
|
9065
|
+
const startMsg = {
|
|
9066
|
+
message: "StartRecognition",
|
|
9067
|
+
audio_format: {
|
|
9068
|
+
type: "raw",
|
|
9069
|
+
encoding,
|
|
9070
|
+
sample_rate: sampleRate
|
|
9071
|
+
},
|
|
9072
|
+
transcription_config: {
|
|
9073
|
+
language: smOpts.language || options?.language || "en",
|
|
9074
|
+
enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
|
|
9075
|
+
}
|
|
9076
|
+
};
|
|
9077
|
+
const txConfig = startMsg.transcription_config;
|
|
9078
|
+
if (smOpts.domain) txConfig.domain = smOpts.domain;
|
|
9079
|
+
if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
|
|
9080
|
+
if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
|
|
9081
|
+
if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
|
|
9082
|
+
if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
|
|
9083
|
+
if (smOpts.diarization === "speaker" || options?.diarization) {
|
|
9084
|
+
txConfig.diarization = "speaker";
|
|
9085
|
+
if (smOpts.maxSpeakers) {
|
|
9086
|
+
txConfig.speaker_diarization_config = {
|
|
9087
|
+
max_speakers: smOpts.maxSpeakers
|
|
9088
|
+
};
|
|
9089
|
+
} else if (options?.speakersExpected) {
|
|
9090
|
+
txConfig.speaker_diarization_config = {
|
|
9091
|
+
max_speakers: options.speakersExpected
|
|
9092
|
+
};
|
|
9093
|
+
}
|
|
9094
|
+
}
|
|
9095
|
+
if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
|
|
9096
|
+
txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
|
|
9097
|
+
content: word
|
|
9098
|
+
}));
|
|
9099
|
+
} else if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9100
|
+
txConfig.additional_vocab = options.customVocabulary.map((word) => ({
|
|
9101
|
+
content: word
|
|
9102
|
+
}));
|
|
9103
|
+
}
|
|
9104
|
+
if (smOpts.conversationConfig) {
|
|
9105
|
+
txConfig.conversation_config = {
|
|
9106
|
+
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9107
|
+
};
|
|
9108
|
+
}
|
|
9109
|
+
const startPayload = JSON.stringify(startMsg);
|
|
9110
|
+
if (callbacks?.onRawMessage) {
|
|
9111
|
+
callbacks.onRawMessage({
|
|
9112
|
+
provider: "speechmatics",
|
|
9113
|
+
direction: "outgoing",
|
|
9114
|
+
timestamp: Date.now(),
|
|
9115
|
+
payload: startPayload,
|
|
9116
|
+
messageType: "StartRecognition"
|
|
9117
|
+
});
|
|
9118
|
+
}
|
|
9119
|
+
ws.send(startPayload);
|
|
9120
|
+
});
|
|
9121
|
+
const onMessage = (data) => {
|
|
9122
|
+
const rawPayload = data.toString();
|
|
9123
|
+
try {
|
|
9124
|
+
const msg = JSON.parse(rawPayload);
|
|
9125
|
+
if (msg.message === "RecognitionStarted") {
|
|
9126
|
+
clearTimeout(timeout);
|
|
9127
|
+
ws.removeListener("message", onMessage);
|
|
9128
|
+
ws.emit("message", data);
|
|
9129
|
+
resolve();
|
|
9130
|
+
} else if (msg.message === "Error") {
|
|
9131
|
+
clearTimeout(timeout);
|
|
9132
|
+
ws.removeListener("message", onMessage);
|
|
9133
|
+
reject(new Error(msg.reason || "Recognition failed to start"));
|
|
9134
|
+
}
|
|
9135
|
+
} catch {
|
|
9136
|
+
}
|
|
9137
|
+
};
|
|
9138
|
+
ws.on("message", onMessage);
|
|
9139
|
+
});
|
|
9140
|
+
ws.on("message", (data) => {
|
|
9141
|
+
const rawPayload = data.toString();
|
|
9142
|
+
try {
|
|
9143
|
+
const message = JSON.parse(rawPayload);
|
|
9144
|
+
if (callbacks?.onRawMessage) {
|
|
9145
|
+
callbacks.onRawMessage({
|
|
9146
|
+
provider: "speechmatics",
|
|
9147
|
+
direction: "incoming",
|
|
9148
|
+
timestamp: Date.now(),
|
|
9149
|
+
payload: rawPayload,
|
|
9150
|
+
messageType: message.message
|
|
9151
|
+
});
|
|
9152
|
+
}
|
|
9153
|
+
this.handleStreamingMessage(message, callbacks, utteranceResults);
|
|
9154
|
+
} catch (error) {
|
|
9155
|
+
if (callbacks?.onRawMessage) {
|
|
9156
|
+
callbacks.onRawMessage({
|
|
9157
|
+
provider: "speechmatics",
|
|
9158
|
+
direction: "incoming",
|
|
9159
|
+
timestamp: Date.now(),
|
|
9160
|
+
payload: rawPayload,
|
|
9161
|
+
messageType: "parse_error"
|
|
9162
|
+
});
|
|
9163
|
+
}
|
|
9164
|
+
callbacks?.onError?.({
|
|
9165
|
+
code: "PARSE_ERROR",
|
|
9166
|
+
message: "Failed to parse WebSocket message",
|
|
9167
|
+
details: error
|
|
9168
|
+
});
|
|
9169
|
+
}
|
|
9170
|
+
});
|
|
9171
|
+
ws.on("error", (error) => {
|
|
9172
|
+
callbacks?.onError?.({
|
|
9173
|
+
code: "WEBSOCKET_ERROR",
|
|
9174
|
+
message: error.message,
|
|
9175
|
+
details: error
|
|
9176
|
+
});
|
|
9177
|
+
});
|
|
9178
|
+
ws.on("close", (code, reason) => {
|
|
9179
|
+
sessionStatus = "closed";
|
|
9180
|
+
callbacks?.onClose?.(code, reason.toString());
|
|
9181
|
+
});
|
|
9182
|
+
await sessionReady;
|
|
9183
|
+
sessionStatus = "open";
|
|
9184
|
+
callbacks?.onOpen?.();
|
|
9185
|
+
return {
|
|
9186
|
+
id: sessionId,
|
|
9187
|
+
provider: this.name,
|
|
9188
|
+
createdAt: /* @__PURE__ */ new Date(),
|
|
9189
|
+
getStatus: () => sessionStatus,
|
|
9190
|
+
sendAudio: async (chunk) => {
|
|
9191
|
+
if (sessionStatus !== "open") {
|
|
9192
|
+
throw new Error(`Cannot send audio: session is ${sessionStatus}`);
|
|
9193
|
+
}
|
|
9194
|
+
if (ws.readyState !== WebSocket6.OPEN) {
|
|
9195
|
+
throw new Error("WebSocket is not open");
|
|
9196
|
+
}
|
|
9197
|
+
if (callbacks?.onRawMessage) {
|
|
9198
|
+
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9199
|
+
chunk.data.byteOffset,
|
|
9200
|
+
chunk.data.byteOffset + chunk.data.byteLength
|
|
9201
|
+
);
|
|
9202
|
+
callbacks.onRawMessage({
|
|
9203
|
+
provider: this.name,
|
|
9204
|
+
direction: "outgoing",
|
|
9205
|
+
timestamp: Date.now(),
|
|
9206
|
+
payload: audioPayload,
|
|
9207
|
+
messageType: "audio"
|
|
9208
|
+
});
|
|
9209
|
+
}
|
|
9210
|
+
ws.send(chunk.data);
|
|
9211
|
+
seqNo++;
|
|
9212
|
+
if (chunk.isLast) {
|
|
9213
|
+
const endMsg = JSON.stringify({
|
|
9214
|
+
message: "EndOfStream",
|
|
9215
|
+
last_seq_no: seqNo
|
|
9216
|
+
});
|
|
9217
|
+
if (callbacks?.onRawMessage) {
|
|
9218
|
+
callbacks.onRawMessage({
|
|
9219
|
+
provider: this.name,
|
|
9220
|
+
direction: "outgoing",
|
|
9221
|
+
timestamp: Date.now(),
|
|
9222
|
+
payload: endMsg,
|
|
9223
|
+
messageType: "EndOfStream"
|
|
9224
|
+
});
|
|
9225
|
+
}
|
|
9226
|
+
ws.send(endMsg);
|
|
9227
|
+
}
|
|
9228
|
+
},
|
|
9229
|
+
close: async () => {
|
|
9230
|
+
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
9231
|
+
return;
|
|
9232
|
+
}
|
|
9233
|
+
sessionStatus = "closing";
|
|
9234
|
+
if (ws.readyState === WebSocket6.OPEN) {
|
|
9235
|
+
seqNo++;
|
|
9236
|
+
ws.send(
|
|
9237
|
+
JSON.stringify({
|
|
9238
|
+
message: "EndOfStream",
|
|
9239
|
+
last_seq_no: seqNo
|
|
9240
|
+
})
|
|
9241
|
+
);
|
|
9242
|
+
}
|
|
9243
|
+
return new Promise((resolve) => {
|
|
9244
|
+
const timeout = setTimeout(() => {
|
|
9245
|
+
ws.terminate();
|
|
9246
|
+
sessionStatus = "closed";
|
|
9247
|
+
resolve();
|
|
9248
|
+
}, 5e3);
|
|
9249
|
+
const onMsg = (data) => {
|
|
9250
|
+
try {
|
|
9251
|
+
const msg = JSON.parse(data.toString());
|
|
9252
|
+
if (msg.message === "EndOfTranscript") {
|
|
9253
|
+
ws.removeListener("message", onMsg);
|
|
9254
|
+
clearTimeout(timeout);
|
|
9255
|
+
ws.close();
|
|
9256
|
+
}
|
|
9257
|
+
} catch {
|
|
9258
|
+
}
|
|
9259
|
+
};
|
|
9260
|
+
ws.on("message", onMsg);
|
|
9261
|
+
ws.once("close", () => {
|
|
9262
|
+
clearTimeout(timeout);
|
|
9263
|
+
sessionStatus = "closed";
|
|
9264
|
+
resolve();
|
|
9265
|
+
});
|
|
9266
|
+
});
|
|
9267
|
+
}
|
|
9268
|
+
};
|
|
9269
|
+
}
|
|
9270
|
+
/**
|
|
9271
|
+
* Handle incoming Speechmatics real-time WebSocket messages
|
|
9272
|
+
*/
|
|
9273
|
+
handleStreamingMessage(message, callbacks, utteranceResults) {
|
|
9274
|
+
switch (message.message) {
|
|
9275
|
+
case "RecognitionStarted": {
|
|
9276
|
+
break;
|
|
9277
|
+
}
|
|
9278
|
+
case "AddPartialTranscript": {
|
|
9279
|
+
const results = message.results || [];
|
|
9280
|
+
const text = buildTextFromSpeechmaticsResults(results);
|
|
9281
|
+
if (text) {
|
|
9282
|
+
callbacks?.onTranscript?.({
|
|
9283
|
+
type: "transcript",
|
|
9284
|
+
text,
|
|
9285
|
+
isFinal: false,
|
|
9286
|
+
words: this.extractWordsFromResults(results),
|
|
9287
|
+
data: message
|
|
9288
|
+
});
|
|
9289
|
+
}
|
|
9290
|
+
break;
|
|
9291
|
+
}
|
|
9292
|
+
case "AddTranscript": {
|
|
9293
|
+
const results = message.results || [];
|
|
9294
|
+
const text = buildTextFromSpeechmaticsResults(results);
|
|
9295
|
+
if (utteranceResults) {
|
|
9296
|
+
utteranceResults.push(...results);
|
|
9297
|
+
}
|
|
9298
|
+
if (text) {
|
|
9299
|
+
callbacks?.onTranscript?.({
|
|
9300
|
+
type: "transcript",
|
|
9301
|
+
text,
|
|
9302
|
+
isFinal: true,
|
|
9303
|
+
words: this.extractWordsFromResults(results),
|
|
9304
|
+
data: message
|
|
9305
|
+
});
|
|
9306
|
+
}
|
|
9307
|
+
break;
|
|
9308
|
+
}
|
|
9309
|
+
case "EndOfUtterance": {
|
|
9310
|
+
if (utteranceResults && utteranceResults.length > 0) {
|
|
9311
|
+
const text = buildTextFromSpeechmaticsResults(utteranceResults);
|
|
9312
|
+
const words = this.extractWordsFromResults(utteranceResults);
|
|
9313
|
+
const utterances = buildUtterancesFromWords(words);
|
|
9314
|
+
if (utterances.length > 0) {
|
|
9315
|
+
for (const utt of utterances) {
|
|
9316
|
+
callbacks?.onUtterance?.(utt);
|
|
9317
|
+
}
|
|
9318
|
+
} else if (text) {
|
|
9319
|
+
callbacks?.onUtterance?.({
|
|
9320
|
+
text,
|
|
9321
|
+
start: words.length > 0 ? words[0].start : 0,
|
|
9322
|
+
end: words.length > 0 ? words[words.length - 1].end : 0,
|
|
9323
|
+
words
|
|
9324
|
+
});
|
|
9325
|
+
}
|
|
9326
|
+
utteranceResults.length = 0;
|
|
9327
|
+
}
|
|
9328
|
+
break;
|
|
9329
|
+
}
|
|
9330
|
+
case "AudioAdded": {
|
|
9331
|
+
break;
|
|
9332
|
+
}
|
|
9333
|
+
case "EndOfTranscript": {
|
|
9334
|
+
break;
|
|
9335
|
+
}
|
|
9336
|
+
case "Info":
|
|
9337
|
+
case "Warning": {
|
|
9338
|
+
callbacks?.onMetadata?.(message);
|
|
9339
|
+
break;
|
|
9340
|
+
}
|
|
9341
|
+
case "Error": {
|
|
9342
|
+
const errMsg = message;
|
|
9343
|
+
callbacks?.onError?.({
|
|
9344
|
+
code: errMsg.type || "SPEECHMATICS_ERROR",
|
|
9345
|
+
message: errMsg.reason || "Unknown error",
|
|
9346
|
+
details: message
|
|
9347
|
+
});
|
|
9348
|
+
break;
|
|
9349
|
+
}
|
|
9350
|
+
default: {
|
|
9351
|
+
callbacks?.onMetadata?.(message);
|
|
9352
|
+
break;
|
|
9353
|
+
}
|
|
9354
|
+
}
|
|
9355
|
+
}
|
|
9356
|
+
/**
|
|
9357
|
+
* Extract unified Word[] from Speechmatics recognition results
|
|
9358
|
+
*/
|
|
9359
|
+
extractWordsFromResults(results) {
|
|
9360
|
+
return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
|
|
9361
|
+
word: result.alternatives?.[0]?.content || "",
|
|
9362
|
+
start: result.start_time,
|
|
9363
|
+
end: result.end_time,
|
|
9364
|
+
confidence: result.alternatives?.[0]?.confidence,
|
|
9365
|
+
speaker: result.alternatives?.[0]?.speaker
|
|
9366
|
+
}));
|
|
9367
|
+
}
|
|
8993
9368
|
/**
|
|
8994
9369
|
* Normalize Speechmatics status to unified status
|
|
8995
9370
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9311,50 +9686,51 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9311
9686
|
const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9312
9687
|
const createdAt = /* @__PURE__ */ new Date();
|
|
9313
9688
|
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
|
|
9314
|
-
const wsUrl =
|
|
9315
|
-
|
|
9316
|
-
const
|
|
9317
|
-
|
|
9318
|
-
|
|
9689
|
+
const wsUrl = `${wsBase}/transcribe-websocket`;
|
|
9690
|
+
const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-v4";
|
|
9691
|
+
const sonioxOpts = options?.sonioxStreaming;
|
|
9692
|
+
const initMessage = {
|
|
9693
|
+
api_key: this.config.apiKey,
|
|
9694
|
+
model: modelId
|
|
9695
|
+
};
|
|
9696
|
+
if (sonioxOpts?.audioFormat) {
|
|
9697
|
+
initMessage.audio_format = sonioxOpts.audioFormat;
|
|
9698
|
+
} else if (options?.encoding) {
|
|
9319
9699
|
const encodingMap = {
|
|
9320
9700
|
linear16: "pcm_s16le",
|
|
9321
9701
|
pcm: "pcm_s16le",
|
|
9322
9702
|
mulaw: "mulaw",
|
|
9323
9703
|
alaw: "alaw"
|
|
9324
9704
|
};
|
|
9325
|
-
|
|
9705
|
+
initMessage.audio_format = encodingMap[options.encoding] || options.encoding;
|
|
9326
9706
|
}
|
|
9327
|
-
if (options?.sampleRate) {
|
|
9328
|
-
|
|
9707
|
+
if (sonioxOpts?.sampleRate || options?.sampleRate) {
|
|
9708
|
+
initMessage.sample_rate = sonioxOpts?.sampleRate || options?.sampleRate;
|
|
9329
9709
|
}
|
|
9330
|
-
if (options?.channels) {
|
|
9331
|
-
|
|
9710
|
+
if (sonioxOpts?.numChannels || options?.channels) {
|
|
9711
|
+
initMessage.num_channels = sonioxOpts?.numChannels || options?.channels;
|
|
9332
9712
|
}
|
|
9333
|
-
const sonioxOpts = options?.sonioxStreaming;
|
|
9334
9713
|
if (sonioxOpts) {
|
|
9335
9714
|
if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
|
|
9336
|
-
|
|
9715
|
+
initMessage.language_hints = sonioxOpts.languageHints;
|
|
9337
9716
|
}
|
|
9338
9717
|
if (sonioxOpts.enableLanguageIdentification) {
|
|
9339
|
-
|
|
9718
|
+
initMessage.enable_language_identification = true;
|
|
9340
9719
|
}
|
|
9341
9720
|
if (sonioxOpts.enableEndpointDetection) {
|
|
9342
|
-
|
|
9721
|
+
initMessage.enable_endpoint_detection = true;
|
|
9343
9722
|
}
|
|
9344
9723
|
if (sonioxOpts.enableSpeakerDiarization) {
|
|
9345
|
-
|
|
9724
|
+
initMessage.enable_speaker_diarization = true;
|
|
9346
9725
|
}
|
|
9347
9726
|
if (sonioxOpts.context) {
|
|
9348
|
-
|
|
9349
|
-
"context",
|
|
9350
|
-
typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
|
|
9351
|
-
);
|
|
9727
|
+
initMessage.context = typeof sonioxOpts.context === "string" ? sonioxOpts.context : sonioxOpts.context;
|
|
9352
9728
|
}
|
|
9353
9729
|
if (sonioxOpts.translation) {
|
|
9354
|
-
|
|
9730
|
+
initMessage.translation = sonioxOpts.translation;
|
|
9355
9731
|
}
|
|
9356
9732
|
if (sonioxOpts.clientReferenceId) {
|
|
9357
|
-
|
|
9733
|
+
initMessage.client_reference_id = sonioxOpts.clientReferenceId;
|
|
9358
9734
|
}
|
|
9359
9735
|
}
|
|
9360
9736
|
if (!sonioxOpts?.languageHints && options?.language) {
|
|
@@ -9363,24 +9739,33 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9363
9739
|
`[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
|
|
9364
9740
|
);
|
|
9365
9741
|
}
|
|
9366
|
-
|
|
9742
|
+
initMessage.language_hints = [options.language];
|
|
9367
9743
|
}
|
|
9368
9744
|
if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
|
|
9369
|
-
|
|
9745
|
+
initMessage.enable_speaker_diarization = true;
|
|
9370
9746
|
}
|
|
9371
9747
|
if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
|
|
9372
|
-
|
|
9373
|
-
}
|
|
9374
|
-
if (options?.interimResults !== false) {
|
|
9748
|
+
initMessage.enable_language_identification = true;
|
|
9375
9749
|
}
|
|
9376
9750
|
let status = "connecting";
|
|
9377
9751
|
let openedAt = null;
|
|
9378
9752
|
let receivedData = false;
|
|
9379
9753
|
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : __require("ws");
|
|
9380
|
-
const ws = new WebSocketImpl(wsUrl
|
|
9754
|
+
const ws = new WebSocketImpl(wsUrl);
|
|
9381
9755
|
ws.onopen = () => {
|
|
9382
|
-
status = "open";
|
|
9383
9756
|
openedAt = Date.now();
|
|
9757
|
+
const initPayload = JSON.stringify(initMessage);
|
|
9758
|
+
if (callbacks?.onRawMessage) {
|
|
9759
|
+
callbacks.onRawMessage({
|
|
9760
|
+
provider: this.name,
|
|
9761
|
+
direction: "outgoing",
|
|
9762
|
+
timestamp: Date.now(),
|
|
9763
|
+
payload: initPayload,
|
|
9764
|
+
messageType: "init"
|
|
9765
|
+
});
|
|
9766
|
+
}
|
|
9767
|
+
ws.send(initPayload);
|
|
9768
|
+
status = "open";
|
|
9384
9769
|
callbacks?.onOpen?.();
|
|
9385
9770
|
};
|
|
9386
9771
|
ws.onmessage = (event) => {
|
|
@@ -9459,10 +9844,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9459
9844
|
ws.onclose = (event) => {
|
|
9460
9845
|
status = "closed";
|
|
9461
9846
|
const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
|
|
9462
|
-
const
|
|
9463
|
-
if (
|
|
9847
|
+
const isEarlyClose = timeSinceOpen !== null && timeSinceOpen < 5e3 && !receivedData;
|
|
9848
|
+
if (isEarlyClose && event.code === 1e3) {
|
|
9464
9849
|
const errorMessage = [
|
|
9465
|
-
"Soniox closed connection
|
|
9850
|
+
"Soniox closed connection shortly after opening.",
|
|
9466
9851
|
`Current config: region=${this.region}, model=${modelId}`,
|
|
9467
9852
|
"Likely causes:",
|
|
9468
9853
|
" - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
|
|
@@ -36481,7 +36866,7 @@ var AzureCapabilities = {
|
|
|
36481
36866
|
deleteTranscript: true
|
|
36482
36867
|
};
|
|
36483
36868
|
var SpeechmaticsCapabilities = {
|
|
36484
|
-
streaming:
|
|
36869
|
+
streaming: true,
|
|
36485
36870
|
diarization: true,
|
|
36486
36871
|
wordTimestamps: true,
|
|
36487
36872
|
languageDetection: false,
|
|
@@ -187,7 +187,7 @@ declare const AzureCapabilities: {
|
|
|
187
187
|
* Speechmatics provider capabilities
|
|
188
188
|
*/
|
|
189
189
|
declare const SpeechmaticsCapabilities: {
|
|
190
|
-
readonly streaming:
|
|
190
|
+
readonly streaming: true;
|
|
191
191
|
readonly diarization: true;
|
|
192
192
|
readonly wordTimestamps: true;
|
|
193
193
|
readonly languageDetection: false;
|
|
@@ -322,7 +322,7 @@ declare const ProviderCapabilitiesMap: {
|
|
|
322
322
|
readonly deleteTranscript: true;
|
|
323
323
|
};
|
|
324
324
|
readonly speechmatics: {
|
|
325
|
-
readonly streaming:
|
|
325
|
+
readonly streaming: true;
|
|
326
326
|
readonly diarization: true;
|
|
327
327
|
readonly wordTimestamps: true;
|
|
328
328
|
readonly languageDetection: false;
|
|
@@ -519,11 +519,11 @@ type LanguageCode = keyof typeof LanguageLabels;
|
|
|
519
519
|
/**
|
|
520
520
|
* Gladia supported language codes (from OpenAPI spec)
|
|
521
521
|
*/
|
|
522
|
-
declare const GladiaLanguageCodes: ("
|
|
522
|
+
declare const GladiaLanguageCodes: ("af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh")[];
|
|
523
523
|
/**
|
|
524
524
|
* AssemblyAI supported language codes (from OpenAPI spec)
|
|
525
525
|
*/
|
|
526
|
-
declare const AssemblyAILanguageCodes: ("
|
|
526
|
+
declare const AssemblyAILanguageCodes: ("af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "en_au" | "en_uk" | "en_us")[];
|
|
527
527
|
/**
|
|
528
528
|
* Deepgram supported language codes
|
|
529
529
|
* Note: Deepgram accepts BCP-47 tags, these are the most common
|
|
@@ -551,8 +551,8 @@ declare const DeepgramLanguageCodes: readonly ["en", "en-US", "en-GB", "en-AU",
|
|
|
551
551
|
* ```
|
|
552
552
|
*/
|
|
553
553
|
declare const AllLanguageCodes: {
|
|
554
|
-
readonly gladia: ("
|
|
555
|
-
readonly assemblyai: ("
|
|
554
|
+
readonly gladia: ("af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh")[];
|
|
555
|
+
readonly assemblyai: ("af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "en_au" | "en_uk" | "en_us")[];
|
|
556
556
|
readonly deepgram: readonly ["en", "en-US", "en-GB", "en-AU", "en-IN", "es", "es-419", "fr", "fr-CA", "de", "it", "pt", "pt-BR", "nl", "ru", "uk", "pl", "cs", "sk", "hu", "ro", "bg", "hr", "sl", "el", "tr", "fi", "sv", "da", "no", "et", "lv", "lt", "zh", "zh-CN", "zh-TW", "ja", "ko", "th", "vi", "id", "ms", "tl", "hi", "ta", "te", "bn", "ar"];
|
|
557
557
|
readonly "openai-whisper": readonly ["en", "es", "fr", "de", "it", "pt", "nl", "ru", "zh", "ja", "ko", "ar", "hi", "pl", "uk", "cs", "ro", "hu", "el", "tr", "fi", "sv", "da", "no", "th", "vi", "id", "ms", "he", "fa"];
|
|
558
558
|
readonly "azure-stt": readonly ["af-ZA", "am-ET", "ar-AE", "ar-BH", "ar-DZ", "ar-EG", "ar-IL", "ar-IQ", "ar-JO", "ar-KW", "ar-LB", "ar-LY", "ar-MA", "ar-OM", "ar-PS", "ar-QA", "ar-SA", "ar-SY", "ar-TN", "ar-YE", "as-IN", "az-AZ", "be-BY", "bg-BG", "bn-BD", "bn-IN", "bs-BA", "ca-ES", "cs-CZ", "cy-GB", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU", "en-CA", "en-GB", "en-GH", "en-HK", "en-IE", "en-IN", "en-KE", "en-NG", "en-NZ", "en-PH", "en-SG", "en-TZ", "en-US", "en-ZA", "es-AR", "es-BO", "es-CL", "es-CO", "es-CR", "es-CU", "es-DO", "es-EC", "es-ES", "es-GQ", "es-GT", "es-HN", "es-MX", "es-NI", "es-PA", "es-PE", "es-PR", "es-PY", "es-SV", "es-US", "es-UY", "es-VE", "et-EE", "eu-ES", "fa-IR", "fi-FI", "fil-PH", "fr-BE", "fr-CA", "fr-CH", "fr-FR", "ga-IE", "gl-ES", "gu-IN", "he-IL", "hi-IN", "hr-HR", "hu-HU", "hy-AM", "id-ID", "is-IS", "it-CH", "it-IT", "ja-JP", "jv-ID", "ka-GE", "kk-KZ", "km-KH", "kn-IN", "ko-KR", "lo-LA", "lt-LT", "lv-LV", "mi-NZ", "mk-MK", "ml-IN", "mn-MN", "mr-IN", "ms-MY", "mt-MT", "my-MM", "nan-CN", "nb-NO", "ne-NP", "nl-BE", "nl-NL", "or-IN", "pa-IN", "pl-PL", "ps-AF", "pt-BR", "pt-PT", "ro-RO", "ru-RU", "si-LK", "sk-SK", "sl-SI", "so-SO", "sq-AL", "sr-ME", "sr-RS", "sr-XK", "su-ID", "sv-SE", "sw-KE", "sw-TZ", "ta-IN", "ta-LK", "ta-MY", "ta-SG", "te-IN", "th-TH", "tr-TR", "uk-UA", "ur-IN", "ur-PK", "uz-UZ", "vi-VN", "wuu-CN", "yue-CN", "zh-CN", "zh-HK", "zh-SG", "zh-TW", "zu-ZA"];
|