voice-router-dev 0.8.7 → 0.8.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/dist/constants.d.mts +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/{field-configs-2c1-pid1.d.mts → field-configs-CSOt3yc9.d.mts} +6194 -6194
- package/dist/{field-configs-2c1-pid1.d.ts → field-configs-CSOt3yc9.d.ts} +6194 -6194
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/index.d.mts +763 -716
- package/dist/index.d.ts +763 -716
- package/dist/index.js +427 -34
- package/dist/index.mjs +427 -34
- package/dist/{provider-metadata-MDUUEuqF.d.mts → provider-metadata-BJ29OPW1.d.mts} +6 -6
- package/dist/{provider-metadata-_gUWlRXS.d.ts → provider-metadata-D1d-9cng.d.ts} +6 -6
- package/dist/provider-metadata.d.mts +1 -1
- package/dist/provider-metadata.d.ts +1 -1
- package/dist/provider-metadata.js +1 -1
- package/dist/provider-metadata.mjs +1 -1
- package/dist/{speechToTextChunkResponseModel-o8_dfC4c.d.ts → speechToTextChunkResponseModel-B4kVoFc3.d.ts} +97 -6
- package/dist/{speechToTextChunkResponseModel-BYhlHNqP.d.mts → speechToTextChunkResponseModel-DmajV4F-.d.mts} +97 -6
- package/dist/webhooks.d.mts +2 -2
- package/dist/webhooks.d.ts +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -8920,6 +8920,7 @@ function createOpenAIWhisperAdapter(config) {
|
|
|
8920
8920
|
|
|
8921
8921
|
// src/adapters/speechmatics-adapter.ts
|
|
8922
8922
|
var import_axios8 = __toESM(require("axios"));
|
|
8923
|
+
var import_ws5 = __toESM(require("ws"));
|
|
8923
8924
|
|
|
8924
8925
|
// src/generated/speechmatics/schema/notificationConfigContentsItem.ts
|
|
8925
8926
|
var NotificationConfigContentsItem = {
|
|
@@ -8969,8 +8970,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8969
8970
|
super(...arguments);
|
|
8970
8971
|
this.name = "speechmatics";
|
|
8971
8972
|
this.capabilities = {
|
|
8972
|
-
streaming:
|
|
8973
|
-
// Batch only (streaming available via separate WebSocket API)
|
|
8973
|
+
streaming: true,
|
|
8974
8974
|
diarization: true,
|
|
8975
8975
|
wordTimestamps: true,
|
|
8976
8976
|
languageDetection: false,
|
|
@@ -9219,6 +9219,389 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9219
9219
|
throw error;
|
|
9220
9220
|
}
|
|
9221
9221
|
}
|
|
9222
|
+
/**
|
|
9223
|
+
* Build WebSocket URL for real-time streaming
|
|
9224
|
+
*
|
|
9225
|
+
* Note: Real-time API uses a different host from the batch API:
|
|
9226
|
+
* - Batch: {region}.asr.api.speechmatics.com
|
|
9227
|
+
* - Real-time: {region}.rt.speechmatics.com
|
|
9228
|
+
*
|
|
9229
|
+
* @param region - Regional endpoint identifier
|
|
9230
|
+
* @returns WebSocket URL for real-time API
|
|
9231
|
+
*/
|
|
9232
|
+
getRegionalWsUrl(region) {
|
|
9233
|
+
if (this.config?.wsBaseUrl) {
|
|
9234
|
+
return this.config.wsBaseUrl;
|
|
9235
|
+
}
|
|
9236
|
+
const rtRegionMap = {
|
|
9237
|
+
eu1: "eu",
|
|
9238
|
+
eu2: "eu",
|
|
9239
|
+
us1: "us",
|
|
9240
|
+
us2: "us",
|
|
9241
|
+
au1: "eu"
|
|
9242
|
+
// No AU RT endpoint — fall back to EU
|
|
9243
|
+
};
|
|
9244
|
+
const rtPrefix = rtRegionMap[region || ""] || "eu";
|
|
9245
|
+
return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
|
|
9246
|
+
}
|
|
9247
|
+
/**
|
|
9248
|
+
* Stream audio for real-time transcription via WebSocket
|
|
9249
|
+
*
|
|
9250
|
+
* Connects to Speechmatics' real-time API and sends audio chunks
|
|
9251
|
+
* for transcription with results returned via callbacks.
|
|
9252
|
+
*
|
|
9253
|
+
* @param options - Streaming configuration options
|
|
9254
|
+
* @param callbacks - Event callbacks for transcription results
|
|
9255
|
+
* @returns Promise that resolves with a StreamingSession
|
|
9256
|
+
*
|
|
9257
|
+
* @example Basic streaming
|
|
9258
|
+
* ```typescript
|
|
9259
|
+
* const session = await adapter.transcribeStream({
|
|
9260
|
+
* language: 'en',
|
|
9261
|
+
* speechmaticsStreaming: {
|
|
9262
|
+
* enablePartials: true,
|
|
9263
|
+
* operatingPoint: 'enhanced'
|
|
9264
|
+
* }
|
|
9265
|
+
* }, {
|
|
9266
|
+
* onTranscript: (event) => console.log(event.text),
|
|
9267
|
+
* onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
|
|
9268
|
+
* onError: (error) => console.error(error)
|
|
9269
|
+
* });
|
|
9270
|
+
*
|
|
9271
|
+
* await session.sendAudio({ data: audioBuffer });
|
|
9272
|
+
* await session.close();
|
|
9273
|
+
* ```
|
|
9274
|
+
*/
|
|
9275
|
+
async transcribeStream(options, callbacks) {
|
|
9276
|
+
this.validateConfig();
|
|
9277
|
+
const smOpts = options?.speechmaticsStreaming || {};
|
|
9278
|
+
const region = smOpts.region || this.config?.region;
|
|
9279
|
+
const wsUrl = this.getRegionalWsUrl(region);
|
|
9280
|
+
const ws = new import_ws5.default(wsUrl, {
|
|
9281
|
+
headers: {
|
|
9282
|
+
Authorization: `Bearer ${this.config.apiKey}`
|
|
9283
|
+
}
|
|
9284
|
+
});
|
|
9285
|
+
let sessionStatus = "connecting";
|
|
9286
|
+
const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
9287
|
+
let seqNo = 0;
|
|
9288
|
+
let utteranceResults = [];
|
|
9289
|
+
const sessionReady = new Promise((resolve, reject) => {
|
|
9290
|
+
const timeout = setTimeout(() => {
|
|
9291
|
+
reject(new Error("WebSocket connection timeout"));
|
|
9292
|
+
}, 1e4);
|
|
9293
|
+
let wsOpen = false;
|
|
9294
|
+
ws.once("error", (error) => {
|
|
9295
|
+
clearTimeout(timeout);
|
|
9296
|
+
reject(error);
|
|
9297
|
+
});
|
|
9298
|
+
ws.once("open", () => {
|
|
9299
|
+
wsOpen = true;
|
|
9300
|
+
const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
|
|
9301
|
+
const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
|
|
9302
|
+
const startMsg = {
|
|
9303
|
+
message: "StartRecognition",
|
|
9304
|
+
audio_format: {
|
|
9305
|
+
type: "raw",
|
|
9306
|
+
encoding,
|
|
9307
|
+
sample_rate: sampleRate
|
|
9308
|
+
},
|
|
9309
|
+
transcription_config: {
|
|
9310
|
+
language: smOpts.language || options?.language || "en",
|
|
9311
|
+
enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
|
|
9312
|
+
}
|
|
9313
|
+
};
|
|
9314
|
+
const txConfig = startMsg.transcription_config;
|
|
9315
|
+
if (smOpts.domain) txConfig.domain = smOpts.domain;
|
|
9316
|
+
if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
|
|
9317
|
+
if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
|
|
9318
|
+
if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
|
|
9319
|
+
if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
|
|
9320
|
+
if (smOpts.diarization === "speaker" || options?.diarization) {
|
|
9321
|
+
txConfig.diarization = "speaker";
|
|
9322
|
+
if (smOpts.maxSpeakers) {
|
|
9323
|
+
txConfig.speaker_diarization_config = {
|
|
9324
|
+
max_speakers: smOpts.maxSpeakers
|
|
9325
|
+
};
|
|
9326
|
+
} else if (options?.speakersExpected) {
|
|
9327
|
+
txConfig.speaker_diarization_config = {
|
|
9328
|
+
max_speakers: options.speakersExpected
|
|
9329
|
+
};
|
|
9330
|
+
}
|
|
9331
|
+
}
|
|
9332
|
+
if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
|
|
9333
|
+
txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
|
|
9334
|
+
content: word
|
|
9335
|
+
}));
|
|
9336
|
+
} else if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9337
|
+
txConfig.additional_vocab = options.customVocabulary.map((word) => ({
|
|
9338
|
+
content: word
|
|
9339
|
+
}));
|
|
9340
|
+
}
|
|
9341
|
+
if (smOpts.conversationConfig) {
|
|
9342
|
+
txConfig.conversation_config = {
|
|
9343
|
+
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9344
|
+
};
|
|
9345
|
+
}
|
|
9346
|
+
const startPayload = JSON.stringify(startMsg);
|
|
9347
|
+
if (callbacks?.onRawMessage) {
|
|
9348
|
+
callbacks.onRawMessage({
|
|
9349
|
+
provider: "speechmatics",
|
|
9350
|
+
direction: "outgoing",
|
|
9351
|
+
timestamp: Date.now(),
|
|
9352
|
+
payload: startPayload,
|
|
9353
|
+
messageType: "StartRecognition"
|
|
9354
|
+
});
|
|
9355
|
+
}
|
|
9356
|
+
ws.send(startPayload);
|
|
9357
|
+
});
|
|
9358
|
+
const onMessage = (data) => {
|
|
9359
|
+
const rawPayload = data.toString();
|
|
9360
|
+
try {
|
|
9361
|
+
const msg = JSON.parse(rawPayload);
|
|
9362
|
+
if (msg.message === "RecognitionStarted") {
|
|
9363
|
+
clearTimeout(timeout);
|
|
9364
|
+
ws.removeListener("message", onMessage);
|
|
9365
|
+
ws.emit("message", data);
|
|
9366
|
+
resolve();
|
|
9367
|
+
} else if (msg.message === "Error") {
|
|
9368
|
+
clearTimeout(timeout);
|
|
9369
|
+
ws.removeListener("message", onMessage);
|
|
9370
|
+
reject(new Error(msg.reason || "Recognition failed to start"));
|
|
9371
|
+
}
|
|
9372
|
+
} catch {
|
|
9373
|
+
}
|
|
9374
|
+
};
|
|
9375
|
+
ws.on("message", onMessage);
|
|
9376
|
+
});
|
|
9377
|
+
ws.on("message", (data) => {
|
|
9378
|
+
const rawPayload = data.toString();
|
|
9379
|
+
try {
|
|
9380
|
+
const message = JSON.parse(rawPayload);
|
|
9381
|
+
if (callbacks?.onRawMessage) {
|
|
9382
|
+
callbacks.onRawMessage({
|
|
9383
|
+
provider: "speechmatics",
|
|
9384
|
+
direction: "incoming",
|
|
9385
|
+
timestamp: Date.now(),
|
|
9386
|
+
payload: rawPayload,
|
|
9387
|
+
messageType: message.message
|
|
9388
|
+
});
|
|
9389
|
+
}
|
|
9390
|
+
this.handleStreamingMessage(message, callbacks, utteranceResults);
|
|
9391
|
+
} catch (error) {
|
|
9392
|
+
if (callbacks?.onRawMessage) {
|
|
9393
|
+
callbacks.onRawMessage({
|
|
9394
|
+
provider: "speechmatics",
|
|
9395
|
+
direction: "incoming",
|
|
9396
|
+
timestamp: Date.now(),
|
|
9397
|
+
payload: rawPayload,
|
|
9398
|
+
messageType: "parse_error"
|
|
9399
|
+
});
|
|
9400
|
+
}
|
|
9401
|
+
callbacks?.onError?.({
|
|
9402
|
+
code: "PARSE_ERROR",
|
|
9403
|
+
message: "Failed to parse WebSocket message",
|
|
9404
|
+
details: error
|
|
9405
|
+
});
|
|
9406
|
+
}
|
|
9407
|
+
});
|
|
9408
|
+
ws.on("error", (error) => {
|
|
9409
|
+
callbacks?.onError?.({
|
|
9410
|
+
code: "WEBSOCKET_ERROR",
|
|
9411
|
+
message: error.message,
|
|
9412
|
+
details: error
|
|
9413
|
+
});
|
|
9414
|
+
});
|
|
9415
|
+
ws.on("close", (code, reason) => {
|
|
9416
|
+
sessionStatus = "closed";
|
|
9417
|
+
callbacks?.onClose?.(code, reason.toString());
|
|
9418
|
+
});
|
|
9419
|
+
await sessionReady;
|
|
9420
|
+
sessionStatus = "open";
|
|
9421
|
+
callbacks?.onOpen?.();
|
|
9422
|
+
return {
|
|
9423
|
+
id: sessionId,
|
|
9424
|
+
provider: this.name,
|
|
9425
|
+
createdAt: /* @__PURE__ */ new Date(),
|
|
9426
|
+
getStatus: () => sessionStatus,
|
|
9427
|
+
sendAudio: async (chunk) => {
|
|
9428
|
+
if (sessionStatus !== "open") {
|
|
9429
|
+
throw new Error(`Cannot send audio: session is ${sessionStatus}`);
|
|
9430
|
+
}
|
|
9431
|
+
if (ws.readyState !== import_ws5.default.OPEN) {
|
|
9432
|
+
throw new Error("WebSocket is not open");
|
|
9433
|
+
}
|
|
9434
|
+
if (callbacks?.onRawMessage) {
|
|
9435
|
+
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9436
|
+
chunk.data.byteOffset,
|
|
9437
|
+
chunk.data.byteOffset + chunk.data.byteLength
|
|
9438
|
+
);
|
|
9439
|
+
callbacks.onRawMessage({
|
|
9440
|
+
provider: this.name,
|
|
9441
|
+
direction: "outgoing",
|
|
9442
|
+
timestamp: Date.now(),
|
|
9443
|
+
payload: audioPayload,
|
|
9444
|
+
messageType: "audio"
|
|
9445
|
+
});
|
|
9446
|
+
}
|
|
9447
|
+
ws.send(chunk.data);
|
|
9448
|
+
seqNo++;
|
|
9449
|
+
if (chunk.isLast) {
|
|
9450
|
+
const endMsg = JSON.stringify({
|
|
9451
|
+
message: "EndOfStream",
|
|
9452
|
+
last_seq_no: seqNo
|
|
9453
|
+
});
|
|
9454
|
+
if (callbacks?.onRawMessage) {
|
|
9455
|
+
callbacks.onRawMessage({
|
|
9456
|
+
provider: this.name,
|
|
9457
|
+
direction: "outgoing",
|
|
9458
|
+
timestamp: Date.now(),
|
|
9459
|
+
payload: endMsg,
|
|
9460
|
+
messageType: "EndOfStream"
|
|
9461
|
+
});
|
|
9462
|
+
}
|
|
9463
|
+
ws.send(endMsg);
|
|
9464
|
+
}
|
|
9465
|
+
},
|
|
9466
|
+
close: async () => {
|
|
9467
|
+
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
9468
|
+
return;
|
|
9469
|
+
}
|
|
9470
|
+
sessionStatus = "closing";
|
|
9471
|
+
if (ws.readyState === import_ws5.default.OPEN) {
|
|
9472
|
+
seqNo++;
|
|
9473
|
+
ws.send(
|
|
9474
|
+
JSON.stringify({
|
|
9475
|
+
message: "EndOfStream",
|
|
9476
|
+
last_seq_no: seqNo
|
|
9477
|
+
})
|
|
9478
|
+
);
|
|
9479
|
+
}
|
|
9480
|
+
return new Promise((resolve) => {
|
|
9481
|
+
const timeout = setTimeout(() => {
|
|
9482
|
+
ws.terminate();
|
|
9483
|
+
sessionStatus = "closed";
|
|
9484
|
+
resolve();
|
|
9485
|
+
}, 5e3);
|
|
9486
|
+
const onMsg = (data) => {
|
|
9487
|
+
try {
|
|
9488
|
+
const msg = JSON.parse(data.toString());
|
|
9489
|
+
if (msg.message === "EndOfTranscript") {
|
|
9490
|
+
ws.removeListener("message", onMsg);
|
|
9491
|
+
clearTimeout(timeout);
|
|
9492
|
+
ws.close();
|
|
9493
|
+
}
|
|
9494
|
+
} catch {
|
|
9495
|
+
}
|
|
9496
|
+
};
|
|
9497
|
+
ws.on("message", onMsg);
|
|
9498
|
+
ws.once("close", () => {
|
|
9499
|
+
clearTimeout(timeout);
|
|
9500
|
+
sessionStatus = "closed";
|
|
9501
|
+
resolve();
|
|
9502
|
+
});
|
|
9503
|
+
});
|
|
9504
|
+
}
|
|
9505
|
+
};
|
|
9506
|
+
}
|
|
9507
|
+
/**
|
|
9508
|
+
* Handle incoming Speechmatics real-time WebSocket messages
|
|
9509
|
+
*/
|
|
9510
|
+
handleStreamingMessage(message, callbacks, utteranceResults) {
|
|
9511
|
+
switch (message.message) {
|
|
9512
|
+
case "RecognitionStarted": {
|
|
9513
|
+
break;
|
|
9514
|
+
}
|
|
9515
|
+
case "AddPartialTranscript": {
|
|
9516
|
+
const results = message.results || [];
|
|
9517
|
+
const text = buildTextFromSpeechmaticsResults(results);
|
|
9518
|
+
if (text) {
|
|
9519
|
+
callbacks?.onTranscript?.({
|
|
9520
|
+
type: "transcript",
|
|
9521
|
+
text,
|
|
9522
|
+
isFinal: false,
|
|
9523
|
+
words: this.extractWordsFromResults(results),
|
|
9524
|
+
data: message
|
|
9525
|
+
});
|
|
9526
|
+
}
|
|
9527
|
+
break;
|
|
9528
|
+
}
|
|
9529
|
+
case "AddTranscript": {
|
|
9530
|
+
const results = message.results || [];
|
|
9531
|
+
const text = buildTextFromSpeechmaticsResults(results);
|
|
9532
|
+
if (utteranceResults) {
|
|
9533
|
+
utteranceResults.push(...results);
|
|
9534
|
+
}
|
|
9535
|
+
if (text) {
|
|
9536
|
+
callbacks?.onTranscript?.({
|
|
9537
|
+
type: "transcript",
|
|
9538
|
+
text,
|
|
9539
|
+
isFinal: true,
|
|
9540
|
+
words: this.extractWordsFromResults(results),
|
|
9541
|
+
data: message
|
|
9542
|
+
});
|
|
9543
|
+
}
|
|
9544
|
+
break;
|
|
9545
|
+
}
|
|
9546
|
+
case "EndOfUtterance": {
|
|
9547
|
+
if (utteranceResults && utteranceResults.length > 0) {
|
|
9548
|
+
const text = buildTextFromSpeechmaticsResults(utteranceResults);
|
|
9549
|
+
const words = this.extractWordsFromResults(utteranceResults);
|
|
9550
|
+
const utterances = buildUtterancesFromWords(words);
|
|
9551
|
+
if (utterances.length > 0) {
|
|
9552
|
+
for (const utt of utterances) {
|
|
9553
|
+
callbacks?.onUtterance?.(utt);
|
|
9554
|
+
}
|
|
9555
|
+
} else if (text) {
|
|
9556
|
+
callbacks?.onUtterance?.({
|
|
9557
|
+
text,
|
|
9558
|
+
start: words.length > 0 ? words[0].start : 0,
|
|
9559
|
+
end: words.length > 0 ? words[words.length - 1].end : 0,
|
|
9560
|
+
words
|
|
9561
|
+
});
|
|
9562
|
+
}
|
|
9563
|
+
utteranceResults.length = 0;
|
|
9564
|
+
}
|
|
9565
|
+
break;
|
|
9566
|
+
}
|
|
9567
|
+
case "AudioAdded": {
|
|
9568
|
+
break;
|
|
9569
|
+
}
|
|
9570
|
+
case "EndOfTranscript": {
|
|
9571
|
+
break;
|
|
9572
|
+
}
|
|
9573
|
+
case "Info":
|
|
9574
|
+
case "Warning": {
|
|
9575
|
+
callbacks?.onMetadata?.(message);
|
|
9576
|
+
break;
|
|
9577
|
+
}
|
|
9578
|
+
case "Error": {
|
|
9579
|
+
const errMsg = message;
|
|
9580
|
+
callbacks?.onError?.({
|
|
9581
|
+
code: errMsg.type || "SPEECHMATICS_ERROR",
|
|
9582
|
+
message: errMsg.reason || "Unknown error",
|
|
9583
|
+
details: message
|
|
9584
|
+
});
|
|
9585
|
+
break;
|
|
9586
|
+
}
|
|
9587
|
+
default: {
|
|
9588
|
+
callbacks?.onMetadata?.(message);
|
|
9589
|
+
break;
|
|
9590
|
+
}
|
|
9591
|
+
}
|
|
9592
|
+
}
|
|
9593
|
+
/**
|
|
9594
|
+
* Extract unified Word[] from Speechmatics recognition results
|
|
9595
|
+
*/
|
|
9596
|
+
extractWordsFromResults(results) {
|
|
9597
|
+
return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
|
|
9598
|
+
word: result.alternatives?.[0]?.content || "",
|
|
9599
|
+
start: result.start_time,
|
|
9600
|
+
end: result.end_time,
|
|
9601
|
+
confidence: result.alternatives?.[0]?.confidence,
|
|
9602
|
+
speaker: result.alternatives?.[0]?.speaker
|
|
9603
|
+
}));
|
|
9604
|
+
}
|
|
9222
9605
|
/**
|
|
9223
9606
|
* Normalize Speechmatics status to unified status
|
|
9224
9607
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9540,50 +9923,51 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9540
9923
|
const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9541
9924
|
const createdAt = /* @__PURE__ */ new Date();
|
|
9542
9925
|
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
|
|
9543
|
-
const wsUrl =
|
|
9544
|
-
|
|
9545
|
-
const
|
|
9546
|
-
|
|
9547
|
-
|
|
9926
|
+
const wsUrl = `${wsBase}/transcribe-websocket`;
|
|
9927
|
+
const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-v4";
|
|
9928
|
+
const sonioxOpts = options?.sonioxStreaming;
|
|
9929
|
+
const initMessage = {
|
|
9930
|
+
api_key: this.config.apiKey,
|
|
9931
|
+
model: modelId
|
|
9932
|
+
};
|
|
9933
|
+
if (sonioxOpts?.audioFormat) {
|
|
9934
|
+
initMessage.audio_format = sonioxOpts.audioFormat;
|
|
9935
|
+
} else if (options?.encoding) {
|
|
9548
9936
|
const encodingMap = {
|
|
9549
9937
|
linear16: "pcm_s16le",
|
|
9550
9938
|
pcm: "pcm_s16le",
|
|
9551
9939
|
mulaw: "mulaw",
|
|
9552
9940
|
alaw: "alaw"
|
|
9553
9941
|
};
|
|
9554
|
-
|
|
9942
|
+
initMessage.audio_format = encodingMap[options.encoding] || options.encoding;
|
|
9555
9943
|
}
|
|
9556
|
-
if (options?.sampleRate) {
|
|
9557
|
-
|
|
9944
|
+
if (sonioxOpts?.sampleRate || options?.sampleRate) {
|
|
9945
|
+
initMessage.sample_rate = sonioxOpts?.sampleRate || options?.sampleRate;
|
|
9558
9946
|
}
|
|
9559
|
-
if (options?.channels) {
|
|
9560
|
-
|
|
9947
|
+
if (sonioxOpts?.numChannels || options?.channels) {
|
|
9948
|
+
initMessage.num_channels = sonioxOpts?.numChannels || options?.channels;
|
|
9561
9949
|
}
|
|
9562
|
-
const sonioxOpts = options?.sonioxStreaming;
|
|
9563
9950
|
if (sonioxOpts) {
|
|
9564
9951
|
if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
|
|
9565
|
-
|
|
9952
|
+
initMessage.language_hints = sonioxOpts.languageHints;
|
|
9566
9953
|
}
|
|
9567
9954
|
if (sonioxOpts.enableLanguageIdentification) {
|
|
9568
|
-
|
|
9955
|
+
initMessage.enable_language_identification = true;
|
|
9569
9956
|
}
|
|
9570
9957
|
if (sonioxOpts.enableEndpointDetection) {
|
|
9571
|
-
|
|
9958
|
+
initMessage.enable_endpoint_detection = true;
|
|
9572
9959
|
}
|
|
9573
9960
|
if (sonioxOpts.enableSpeakerDiarization) {
|
|
9574
|
-
|
|
9961
|
+
initMessage.enable_speaker_diarization = true;
|
|
9575
9962
|
}
|
|
9576
9963
|
if (sonioxOpts.context) {
|
|
9577
|
-
|
|
9578
|
-
"context",
|
|
9579
|
-
typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
|
|
9580
|
-
);
|
|
9964
|
+
initMessage.context = typeof sonioxOpts.context === "string" ? sonioxOpts.context : sonioxOpts.context;
|
|
9581
9965
|
}
|
|
9582
9966
|
if (sonioxOpts.translation) {
|
|
9583
|
-
|
|
9967
|
+
initMessage.translation = sonioxOpts.translation;
|
|
9584
9968
|
}
|
|
9585
9969
|
if (sonioxOpts.clientReferenceId) {
|
|
9586
|
-
|
|
9970
|
+
initMessage.client_reference_id = sonioxOpts.clientReferenceId;
|
|
9587
9971
|
}
|
|
9588
9972
|
}
|
|
9589
9973
|
if (!sonioxOpts?.languageHints && options?.language) {
|
|
@@ -9592,24 +9976,33 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9592
9976
|
`[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
|
|
9593
9977
|
);
|
|
9594
9978
|
}
|
|
9595
|
-
|
|
9979
|
+
initMessage.language_hints = [options.language];
|
|
9596
9980
|
}
|
|
9597
9981
|
if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
|
|
9598
|
-
|
|
9982
|
+
initMessage.enable_speaker_diarization = true;
|
|
9599
9983
|
}
|
|
9600
9984
|
if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
|
|
9601
|
-
|
|
9602
|
-
}
|
|
9603
|
-
if (options?.interimResults !== false) {
|
|
9985
|
+
initMessage.enable_language_identification = true;
|
|
9604
9986
|
}
|
|
9605
9987
|
let status = "connecting";
|
|
9606
9988
|
let openedAt = null;
|
|
9607
9989
|
let receivedData = false;
|
|
9608
9990
|
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : require("ws");
|
|
9609
|
-
const ws = new WebSocketImpl(wsUrl
|
|
9991
|
+
const ws = new WebSocketImpl(wsUrl);
|
|
9610
9992
|
ws.onopen = () => {
|
|
9611
|
-
status = "open";
|
|
9612
9993
|
openedAt = Date.now();
|
|
9994
|
+
const initPayload = JSON.stringify(initMessage);
|
|
9995
|
+
if (callbacks?.onRawMessage) {
|
|
9996
|
+
callbacks.onRawMessage({
|
|
9997
|
+
provider: this.name,
|
|
9998
|
+
direction: "outgoing",
|
|
9999
|
+
timestamp: Date.now(),
|
|
10000
|
+
payload: initPayload,
|
|
10001
|
+
messageType: "init"
|
|
10002
|
+
});
|
|
10003
|
+
}
|
|
10004
|
+
ws.send(initPayload);
|
|
10005
|
+
status = "open";
|
|
9613
10006
|
callbacks?.onOpen?.();
|
|
9614
10007
|
};
|
|
9615
10008
|
ws.onmessage = (event) => {
|
|
@@ -9688,10 +10081,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9688
10081
|
ws.onclose = (event) => {
|
|
9689
10082
|
status = "closed";
|
|
9690
10083
|
const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
|
|
9691
|
-
const
|
|
9692
|
-
if (
|
|
10084
|
+
const isEarlyClose = timeSinceOpen !== null && timeSinceOpen < 5e3 && !receivedData;
|
|
10085
|
+
if (isEarlyClose && event.code === 1e3) {
|
|
9693
10086
|
const errorMessage = [
|
|
9694
|
-
"Soniox closed connection
|
|
10087
|
+
"Soniox closed connection shortly after opening.",
|
|
9695
10088
|
`Current config: region=${this.region}, model=${modelId}`,
|
|
9696
10089
|
"Likely causes:",
|
|
9697
10090
|
" - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
|
|
@@ -36710,7 +37103,7 @@ var AzureCapabilities = {
|
|
|
36710
37103
|
deleteTranscript: true
|
|
36711
37104
|
};
|
|
36712
37105
|
var SpeechmaticsCapabilities = {
|
|
36713
|
-
streaming:
|
|
37106
|
+
streaming: true,
|
|
36714
37107
|
diarization: true,
|
|
36715
37108
|
wordTimestamps: true,
|
|
36716
37109
|
languageDetection: false,
|