voice-router-dev 0.8.7 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/dist/constants.d.mts +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/{field-configs-2c1-pid1.d.mts → field-configs-BtR4uR2N.d.mts} +1168 -1168
- package/dist/{field-configs-2c1-pid1.d.ts → field-configs-BtR4uR2N.d.ts} +1168 -1168
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/index.d.mts +557 -510
- package/dist/index.d.ts +557 -510
- package/dist/index.js +419 -34
- package/dist/index.mjs +419 -34
- package/dist/{provider-metadata-MDUUEuqF.d.mts → provider-metadata-BJ29OPW1.d.mts} +6 -6
- package/dist/{provider-metadata-_gUWlRXS.d.ts → provider-metadata-D1d-9cng.d.ts} +6 -6
- package/dist/provider-metadata.d.mts +1 -1
- package/dist/provider-metadata.d.ts +1 -1
- package/dist/provider-metadata.js +1 -1
- package/dist/provider-metadata.mjs +1 -1
- package/dist/{speechToTextChunkResponseModel-o8_dfC4c.d.ts → speechToTextChunkResponseModel-B4kVoFc3.d.ts} +97 -6
- package/dist/{speechToTextChunkResponseModel-BYhlHNqP.d.mts → speechToTextChunkResponseModel-DmajV4F-.d.mts} +97 -6
- package/dist/webhooks.d.mts +2 -2
- package/dist/webhooks.d.ts +2 -2
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -8920,6 +8920,7 @@ function createOpenAIWhisperAdapter(config) {
|
|
|
8920
8920
|
|
|
8921
8921
|
// src/adapters/speechmatics-adapter.ts
|
|
8922
8922
|
var import_axios8 = __toESM(require("axios"));
|
|
8923
|
+
var import_ws5 = __toESM(require("ws"));
|
|
8923
8924
|
|
|
8924
8925
|
// src/generated/speechmatics/schema/notificationConfigContentsItem.ts
|
|
8925
8926
|
var NotificationConfigContentsItem = {
|
|
@@ -8969,8 +8970,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8969
8970
|
super(...arguments);
|
|
8970
8971
|
this.name = "speechmatics";
|
|
8971
8972
|
this.capabilities = {
|
|
8972
|
-
streaming:
|
|
8973
|
-
// Batch only (streaming available via separate WebSocket API)
|
|
8973
|
+
streaming: true,
|
|
8974
8974
|
diarization: true,
|
|
8975
8975
|
wordTimestamps: true,
|
|
8976
8976
|
languageDetection: false,
|
|
@@ -9219,6 +9219,381 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9219
9219
|
throw error;
|
|
9220
9220
|
}
|
|
9221
9221
|
}
|
|
9222
|
+
/**
|
|
9223
|
+
* Build WebSocket URL for real-time streaming
|
|
9224
|
+
*
|
|
9225
|
+
* Note: Real-time API uses a different host from the batch API:
|
|
9226
|
+
* - Batch: {region}.asr.api.speechmatics.com
|
|
9227
|
+
* - Real-time: {region}.rt.speechmatics.com
|
|
9228
|
+
*
|
|
9229
|
+
* @param region - Regional endpoint identifier
|
|
9230
|
+
* @returns WebSocket URL for real-time API
|
|
9231
|
+
*/
|
|
9232
|
+
getRegionalWsUrl(region) {
|
|
9233
|
+
if (this.config?.wsBaseUrl) {
|
|
9234
|
+
return this.config.wsBaseUrl;
|
|
9235
|
+
}
|
|
9236
|
+
const regionPrefix = region || "eu1";
|
|
9237
|
+
return `wss://${regionPrefix}.rt.speechmatics.com/v2`;
|
|
9238
|
+
}
|
|
9239
|
+
/**
|
|
9240
|
+
* Stream audio for real-time transcription via WebSocket
|
|
9241
|
+
*
|
|
9242
|
+
* Connects to Speechmatics' real-time API and sends audio chunks
|
|
9243
|
+
* for transcription with results returned via callbacks.
|
|
9244
|
+
*
|
|
9245
|
+
* @param options - Streaming configuration options
|
|
9246
|
+
* @param callbacks - Event callbacks for transcription results
|
|
9247
|
+
* @returns Promise that resolves with a StreamingSession
|
|
9248
|
+
*
|
|
9249
|
+
* @example Basic streaming
|
|
9250
|
+
* ```typescript
|
|
9251
|
+
* const session = await adapter.transcribeStream({
|
|
9252
|
+
* language: 'en',
|
|
9253
|
+
* speechmaticsStreaming: {
|
|
9254
|
+
* enablePartials: true,
|
|
9255
|
+
* operatingPoint: 'enhanced'
|
|
9256
|
+
* }
|
|
9257
|
+
* }, {
|
|
9258
|
+
* onTranscript: (event) => console.log(event.text),
|
|
9259
|
+
* onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
|
|
9260
|
+
* onError: (error) => console.error(error)
|
|
9261
|
+
* });
|
|
9262
|
+
*
|
|
9263
|
+
* await session.sendAudio({ data: audioBuffer });
|
|
9264
|
+
* await session.close();
|
|
9265
|
+
* ```
|
|
9266
|
+
*/
|
|
9267
|
+
async transcribeStream(options, callbacks) {
|
|
9268
|
+
this.validateConfig();
|
|
9269
|
+
const smOpts = options?.speechmaticsStreaming || {};
|
|
9270
|
+
const region = smOpts.region || this.config?.region;
|
|
9271
|
+
const wsUrl = this.getRegionalWsUrl(region);
|
|
9272
|
+
const ws = new import_ws5.default(wsUrl, {
|
|
9273
|
+
headers: {
|
|
9274
|
+
Authorization: `Bearer ${this.config.apiKey}`
|
|
9275
|
+
}
|
|
9276
|
+
});
|
|
9277
|
+
let sessionStatus = "connecting";
|
|
9278
|
+
const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
9279
|
+
let seqNo = 0;
|
|
9280
|
+
let utteranceResults = [];
|
|
9281
|
+
const sessionReady = new Promise((resolve, reject) => {
|
|
9282
|
+
const timeout = setTimeout(() => {
|
|
9283
|
+
reject(new Error("WebSocket connection timeout"));
|
|
9284
|
+
}, 1e4);
|
|
9285
|
+
let wsOpen = false;
|
|
9286
|
+
ws.once("error", (error) => {
|
|
9287
|
+
clearTimeout(timeout);
|
|
9288
|
+
reject(error);
|
|
9289
|
+
});
|
|
9290
|
+
ws.once("open", () => {
|
|
9291
|
+
wsOpen = true;
|
|
9292
|
+
const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
|
|
9293
|
+
const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
|
|
9294
|
+
const startMsg = {
|
|
9295
|
+
message: "StartRecognition",
|
|
9296
|
+
audio_format: {
|
|
9297
|
+
type: "raw",
|
|
9298
|
+
encoding,
|
|
9299
|
+
sample_rate: sampleRate
|
|
9300
|
+
},
|
|
9301
|
+
transcription_config: {
|
|
9302
|
+
language: smOpts.language || options?.language || "en",
|
|
9303
|
+
enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
|
|
9304
|
+
}
|
|
9305
|
+
};
|
|
9306
|
+
const txConfig = startMsg.transcription_config;
|
|
9307
|
+
if (smOpts.domain) txConfig.domain = smOpts.domain;
|
|
9308
|
+
if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
|
|
9309
|
+
if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
|
|
9310
|
+
if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
|
|
9311
|
+
if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
|
|
9312
|
+
if (smOpts.diarization === "speaker" || options?.diarization) {
|
|
9313
|
+
txConfig.diarization = "speaker";
|
|
9314
|
+
if (smOpts.maxSpeakers) {
|
|
9315
|
+
txConfig.speaker_diarization_config = {
|
|
9316
|
+
max_speakers: smOpts.maxSpeakers
|
|
9317
|
+
};
|
|
9318
|
+
} else if (options?.speakersExpected) {
|
|
9319
|
+
txConfig.speaker_diarization_config = {
|
|
9320
|
+
max_speakers: options.speakersExpected
|
|
9321
|
+
};
|
|
9322
|
+
}
|
|
9323
|
+
}
|
|
9324
|
+
if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
|
|
9325
|
+
txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
|
|
9326
|
+
content: word
|
|
9327
|
+
}));
|
|
9328
|
+
} else if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9329
|
+
txConfig.additional_vocab = options.customVocabulary.map((word) => ({
|
|
9330
|
+
content: word
|
|
9331
|
+
}));
|
|
9332
|
+
}
|
|
9333
|
+
if (smOpts.conversationConfig) {
|
|
9334
|
+
txConfig.conversation_config = {
|
|
9335
|
+
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9336
|
+
};
|
|
9337
|
+
}
|
|
9338
|
+
const startPayload = JSON.stringify(startMsg);
|
|
9339
|
+
if (callbacks?.onRawMessage) {
|
|
9340
|
+
callbacks.onRawMessage({
|
|
9341
|
+
provider: "speechmatics",
|
|
9342
|
+
direction: "outgoing",
|
|
9343
|
+
timestamp: Date.now(),
|
|
9344
|
+
payload: startPayload,
|
|
9345
|
+
messageType: "StartRecognition"
|
|
9346
|
+
});
|
|
9347
|
+
}
|
|
9348
|
+
ws.send(startPayload);
|
|
9349
|
+
});
|
|
9350
|
+
const onMessage = (data) => {
|
|
9351
|
+
const rawPayload = data.toString();
|
|
9352
|
+
try {
|
|
9353
|
+
const msg = JSON.parse(rawPayload);
|
|
9354
|
+
if (msg.message === "RecognitionStarted") {
|
|
9355
|
+
clearTimeout(timeout);
|
|
9356
|
+
ws.removeListener("message", onMessage);
|
|
9357
|
+
ws.emit("message", data);
|
|
9358
|
+
resolve();
|
|
9359
|
+
} else if (msg.message === "Error") {
|
|
9360
|
+
clearTimeout(timeout);
|
|
9361
|
+
ws.removeListener("message", onMessage);
|
|
9362
|
+
reject(new Error(msg.reason || "Recognition failed to start"));
|
|
9363
|
+
}
|
|
9364
|
+
} catch {
|
|
9365
|
+
}
|
|
9366
|
+
};
|
|
9367
|
+
ws.on("message", onMessage);
|
|
9368
|
+
});
|
|
9369
|
+
ws.on("message", (data) => {
|
|
9370
|
+
const rawPayload = data.toString();
|
|
9371
|
+
try {
|
|
9372
|
+
const message = JSON.parse(rawPayload);
|
|
9373
|
+
if (callbacks?.onRawMessage) {
|
|
9374
|
+
callbacks.onRawMessage({
|
|
9375
|
+
provider: "speechmatics",
|
|
9376
|
+
direction: "incoming",
|
|
9377
|
+
timestamp: Date.now(),
|
|
9378
|
+
payload: rawPayload,
|
|
9379
|
+
messageType: message.message
|
|
9380
|
+
});
|
|
9381
|
+
}
|
|
9382
|
+
this.handleStreamingMessage(message, callbacks, utteranceResults);
|
|
9383
|
+
} catch (error) {
|
|
9384
|
+
if (callbacks?.onRawMessage) {
|
|
9385
|
+
callbacks.onRawMessage({
|
|
9386
|
+
provider: "speechmatics",
|
|
9387
|
+
direction: "incoming",
|
|
9388
|
+
timestamp: Date.now(),
|
|
9389
|
+
payload: rawPayload,
|
|
9390
|
+
messageType: "parse_error"
|
|
9391
|
+
});
|
|
9392
|
+
}
|
|
9393
|
+
callbacks?.onError?.({
|
|
9394
|
+
code: "PARSE_ERROR",
|
|
9395
|
+
message: "Failed to parse WebSocket message",
|
|
9396
|
+
details: error
|
|
9397
|
+
});
|
|
9398
|
+
}
|
|
9399
|
+
});
|
|
9400
|
+
ws.on("error", (error) => {
|
|
9401
|
+
callbacks?.onError?.({
|
|
9402
|
+
code: "WEBSOCKET_ERROR",
|
|
9403
|
+
message: error.message,
|
|
9404
|
+
details: error
|
|
9405
|
+
});
|
|
9406
|
+
});
|
|
9407
|
+
ws.on("close", (code, reason) => {
|
|
9408
|
+
sessionStatus = "closed";
|
|
9409
|
+
callbacks?.onClose?.(code, reason.toString());
|
|
9410
|
+
});
|
|
9411
|
+
await sessionReady;
|
|
9412
|
+
sessionStatus = "open";
|
|
9413
|
+
callbacks?.onOpen?.();
|
|
9414
|
+
return {
|
|
9415
|
+
id: sessionId,
|
|
9416
|
+
provider: this.name,
|
|
9417
|
+
createdAt: /* @__PURE__ */ new Date(),
|
|
9418
|
+
getStatus: () => sessionStatus,
|
|
9419
|
+
sendAudio: async (chunk) => {
|
|
9420
|
+
if (sessionStatus !== "open") {
|
|
9421
|
+
throw new Error(`Cannot send audio: session is ${sessionStatus}`);
|
|
9422
|
+
}
|
|
9423
|
+
if (ws.readyState !== import_ws5.default.OPEN) {
|
|
9424
|
+
throw new Error("WebSocket is not open");
|
|
9425
|
+
}
|
|
9426
|
+
if (callbacks?.onRawMessage) {
|
|
9427
|
+
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9428
|
+
chunk.data.byteOffset,
|
|
9429
|
+
chunk.data.byteOffset + chunk.data.byteLength
|
|
9430
|
+
);
|
|
9431
|
+
callbacks.onRawMessage({
|
|
9432
|
+
provider: this.name,
|
|
9433
|
+
direction: "outgoing",
|
|
9434
|
+
timestamp: Date.now(),
|
|
9435
|
+
payload: audioPayload,
|
|
9436
|
+
messageType: "audio"
|
|
9437
|
+
});
|
|
9438
|
+
}
|
|
9439
|
+
ws.send(chunk.data);
|
|
9440
|
+
seqNo++;
|
|
9441
|
+
if (chunk.isLast) {
|
|
9442
|
+
const endMsg = JSON.stringify({
|
|
9443
|
+
message: "EndOfStream",
|
|
9444
|
+
last_seq_no: seqNo
|
|
9445
|
+
});
|
|
9446
|
+
if (callbacks?.onRawMessage) {
|
|
9447
|
+
callbacks.onRawMessage({
|
|
9448
|
+
provider: this.name,
|
|
9449
|
+
direction: "outgoing",
|
|
9450
|
+
timestamp: Date.now(),
|
|
9451
|
+
payload: endMsg,
|
|
9452
|
+
messageType: "EndOfStream"
|
|
9453
|
+
});
|
|
9454
|
+
}
|
|
9455
|
+
ws.send(endMsg);
|
|
9456
|
+
}
|
|
9457
|
+
},
|
|
9458
|
+
close: async () => {
|
|
9459
|
+
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
9460
|
+
return;
|
|
9461
|
+
}
|
|
9462
|
+
sessionStatus = "closing";
|
|
9463
|
+
if (ws.readyState === import_ws5.default.OPEN) {
|
|
9464
|
+
seqNo++;
|
|
9465
|
+
ws.send(
|
|
9466
|
+
JSON.stringify({
|
|
9467
|
+
message: "EndOfStream",
|
|
9468
|
+
last_seq_no: seqNo
|
|
9469
|
+
})
|
|
9470
|
+
);
|
|
9471
|
+
}
|
|
9472
|
+
return new Promise((resolve) => {
|
|
9473
|
+
const timeout = setTimeout(() => {
|
|
9474
|
+
ws.terminate();
|
|
9475
|
+
sessionStatus = "closed";
|
|
9476
|
+
resolve();
|
|
9477
|
+
}, 5e3);
|
|
9478
|
+
const onMsg = (data) => {
|
|
9479
|
+
try {
|
|
9480
|
+
const msg = JSON.parse(data.toString());
|
|
9481
|
+
if (msg.message === "EndOfTranscript") {
|
|
9482
|
+
ws.removeListener("message", onMsg);
|
|
9483
|
+
clearTimeout(timeout);
|
|
9484
|
+
ws.close();
|
|
9485
|
+
}
|
|
9486
|
+
} catch {
|
|
9487
|
+
}
|
|
9488
|
+
};
|
|
9489
|
+
ws.on("message", onMsg);
|
|
9490
|
+
ws.once("close", () => {
|
|
9491
|
+
clearTimeout(timeout);
|
|
9492
|
+
sessionStatus = "closed";
|
|
9493
|
+
resolve();
|
|
9494
|
+
});
|
|
9495
|
+
});
|
|
9496
|
+
}
|
|
9497
|
+
};
|
|
9498
|
+
}
|
|
9499
|
+
/**
|
|
9500
|
+
* Handle incoming Speechmatics real-time WebSocket messages
|
|
9501
|
+
*/
|
|
9502
|
+
handleStreamingMessage(message, callbacks, utteranceResults) {
|
|
9503
|
+
switch (message.message) {
|
|
9504
|
+
case "RecognitionStarted": {
|
|
9505
|
+
break;
|
|
9506
|
+
}
|
|
9507
|
+
case "AddPartialTranscript": {
|
|
9508
|
+
const results = message.results || [];
|
|
9509
|
+
const text = buildTextFromSpeechmaticsResults(results);
|
|
9510
|
+
if (text) {
|
|
9511
|
+
callbacks?.onTranscript?.({
|
|
9512
|
+
type: "transcript",
|
|
9513
|
+
text,
|
|
9514
|
+
isFinal: false,
|
|
9515
|
+
words: this.extractWordsFromResults(results),
|
|
9516
|
+
data: message
|
|
9517
|
+
});
|
|
9518
|
+
}
|
|
9519
|
+
break;
|
|
9520
|
+
}
|
|
9521
|
+
case "AddTranscript": {
|
|
9522
|
+
const results = message.results || [];
|
|
9523
|
+
const text = buildTextFromSpeechmaticsResults(results);
|
|
9524
|
+
if (utteranceResults) {
|
|
9525
|
+
utteranceResults.push(...results);
|
|
9526
|
+
}
|
|
9527
|
+
if (text) {
|
|
9528
|
+
callbacks?.onTranscript?.({
|
|
9529
|
+
type: "transcript",
|
|
9530
|
+
text,
|
|
9531
|
+
isFinal: true,
|
|
9532
|
+
words: this.extractWordsFromResults(results),
|
|
9533
|
+
data: message
|
|
9534
|
+
});
|
|
9535
|
+
}
|
|
9536
|
+
break;
|
|
9537
|
+
}
|
|
9538
|
+
case "EndOfUtterance": {
|
|
9539
|
+
if (utteranceResults && utteranceResults.length > 0) {
|
|
9540
|
+
const text = buildTextFromSpeechmaticsResults(utteranceResults);
|
|
9541
|
+
const words = this.extractWordsFromResults(utteranceResults);
|
|
9542
|
+
const utterances = buildUtterancesFromWords(words);
|
|
9543
|
+
if (utterances.length > 0) {
|
|
9544
|
+
for (const utt of utterances) {
|
|
9545
|
+
callbacks?.onUtterance?.(utt);
|
|
9546
|
+
}
|
|
9547
|
+
} else if (text) {
|
|
9548
|
+
callbacks?.onUtterance?.({
|
|
9549
|
+
text,
|
|
9550
|
+
start: words.length > 0 ? words[0].start : 0,
|
|
9551
|
+
end: words.length > 0 ? words[words.length - 1].end : 0,
|
|
9552
|
+
words
|
|
9553
|
+
});
|
|
9554
|
+
}
|
|
9555
|
+
utteranceResults.length = 0;
|
|
9556
|
+
}
|
|
9557
|
+
break;
|
|
9558
|
+
}
|
|
9559
|
+
case "AudioAdded": {
|
|
9560
|
+
break;
|
|
9561
|
+
}
|
|
9562
|
+
case "EndOfTranscript": {
|
|
9563
|
+
break;
|
|
9564
|
+
}
|
|
9565
|
+
case "Info":
|
|
9566
|
+
case "Warning": {
|
|
9567
|
+
callbacks?.onMetadata?.(message);
|
|
9568
|
+
break;
|
|
9569
|
+
}
|
|
9570
|
+
case "Error": {
|
|
9571
|
+
const errMsg = message;
|
|
9572
|
+
callbacks?.onError?.({
|
|
9573
|
+
code: errMsg.type || "SPEECHMATICS_ERROR",
|
|
9574
|
+
message: errMsg.reason || "Unknown error",
|
|
9575
|
+
details: message
|
|
9576
|
+
});
|
|
9577
|
+
break;
|
|
9578
|
+
}
|
|
9579
|
+
default: {
|
|
9580
|
+
callbacks?.onMetadata?.(message);
|
|
9581
|
+
break;
|
|
9582
|
+
}
|
|
9583
|
+
}
|
|
9584
|
+
}
|
|
9585
|
+
/**
|
|
9586
|
+
* Extract unified Word[] from Speechmatics recognition results
|
|
9587
|
+
*/
|
|
9588
|
+
extractWordsFromResults(results) {
|
|
9589
|
+
return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
|
|
9590
|
+
word: result.alternatives?.[0]?.content || "",
|
|
9591
|
+
start: result.start_time,
|
|
9592
|
+
end: result.end_time,
|
|
9593
|
+
confidence: result.alternatives?.[0]?.confidence,
|
|
9594
|
+
speaker: result.alternatives?.[0]?.speaker
|
|
9595
|
+
}));
|
|
9596
|
+
}
|
|
9222
9597
|
/**
|
|
9223
9598
|
* Normalize Speechmatics status to unified status
|
|
9224
9599
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9540,50 +9915,51 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9540
9915
|
const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9541
9916
|
const createdAt = /* @__PURE__ */ new Date();
|
|
9542
9917
|
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
|
|
9543
|
-
const wsUrl =
|
|
9544
|
-
|
|
9545
|
-
const
|
|
9546
|
-
|
|
9547
|
-
|
|
9918
|
+
const wsUrl = `${wsBase}/transcribe-websocket`;
|
|
9919
|
+
const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-v4";
|
|
9920
|
+
const sonioxOpts = options?.sonioxStreaming;
|
|
9921
|
+
const initMessage = {
|
|
9922
|
+
api_key: this.config.apiKey,
|
|
9923
|
+
model: modelId
|
|
9924
|
+
};
|
|
9925
|
+
if (sonioxOpts?.audioFormat) {
|
|
9926
|
+
initMessage.audio_format = sonioxOpts.audioFormat;
|
|
9927
|
+
} else if (options?.encoding) {
|
|
9548
9928
|
const encodingMap = {
|
|
9549
9929
|
linear16: "pcm_s16le",
|
|
9550
9930
|
pcm: "pcm_s16le",
|
|
9551
9931
|
mulaw: "mulaw",
|
|
9552
9932
|
alaw: "alaw"
|
|
9553
9933
|
};
|
|
9554
|
-
|
|
9934
|
+
initMessage.audio_format = encodingMap[options.encoding] || options.encoding;
|
|
9555
9935
|
}
|
|
9556
|
-
if (options?.sampleRate) {
|
|
9557
|
-
|
|
9936
|
+
if (sonioxOpts?.sampleRate || options?.sampleRate) {
|
|
9937
|
+
initMessage.sample_rate = sonioxOpts?.sampleRate || options?.sampleRate;
|
|
9558
9938
|
}
|
|
9559
|
-
if (options?.channels) {
|
|
9560
|
-
|
|
9939
|
+
if (sonioxOpts?.numChannels || options?.channels) {
|
|
9940
|
+
initMessage.num_channels = sonioxOpts?.numChannels || options?.channels;
|
|
9561
9941
|
}
|
|
9562
|
-
const sonioxOpts = options?.sonioxStreaming;
|
|
9563
9942
|
if (sonioxOpts) {
|
|
9564
9943
|
if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
|
|
9565
|
-
|
|
9944
|
+
initMessage.language_hints = sonioxOpts.languageHints;
|
|
9566
9945
|
}
|
|
9567
9946
|
if (sonioxOpts.enableLanguageIdentification) {
|
|
9568
|
-
|
|
9947
|
+
initMessage.enable_language_identification = true;
|
|
9569
9948
|
}
|
|
9570
9949
|
if (sonioxOpts.enableEndpointDetection) {
|
|
9571
|
-
|
|
9950
|
+
initMessage.enable_endpoint_detection = true;
|
|
9572
9951
|
}
|
|
9573
9952
|
if (sonioxOpts.enableSpeakerDiarization) {
|
|
9574
|
-
|
|
9953
|
+
initMessage.enable_speaker_diarization = true;
|
|
9575
9954
|
}
|
|
9576
9955
|
if (sonioxOpts.context) {
|
|
9577
|
-
|
|
9578
|
-
"context",
|
|
9579
|
-
typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
|
|
9580
|
-
);
|
|
9956
|
+
initMessage.context = typeof sonioxOpts.context === "string" ? sonioxOpts.context : sonioxOpts.context;
|
|
9581
9957
|
}
|
|
9582
9958
|
if (sonioxOpts.translation) {
|
|
9583
|
-
|
|
9959
|
+
initMessage.translation = sonioxOpts.translation;
|
|
9584
9960
|
}
|
|
9585
9961
|
if (sonioxOpts.clientReferenceId) {
|
|
9586
|
-
|
|
9962
|
+
initMessage.client_reference_id = sonioxOpts.clientReferenceId;
|
|
9587
9963
|
}
|
|
9588
9964
|
}
|
|
9589
9965
|
if (!sonioxOpts?.languageHints && options?.language) {
|
|
@@ -9592,24 +9968,33 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9592
9968
|
`[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
|
|
9593
9969
|
);
|
|
9594
9970
|
}
|
|
9595
|
-
|
|
9971
|
+
initMessage.language_hints = [options.language];
|
|
9596
9972
|
}
|
|
9597
9973
|
if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
|
|
9598
|
-
|
|
9974
|
+
initMessage.enable_speaker_diarization = true;
|
|
9599
9975
|
}
|
|
9600
9976
|
if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
|
|
9601
|
-
|
|
9602
|
-
}
|
|
9603
|
-
if (options?.interimResults !== false) {
|
|
9977
|
+
initMessage.enable_language_identification = true;
|
|
9604
9978
|
}
|
|
9605
9979
|
let status = "connecting";
|
|
9606
9980
|
let openedAt = null;
|
|
9607
9981
|
let receivedData = false;
|
|
9608
9982
|
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : require("ws");
|
|
9609
|
-
const ws = new WebSocketImpl(wsUrl
|
|
9983
|
+
const ws = new WebSocketImpl(wsUrl);
|
|
9610
9984
|
ws.onopen = () => {
|
|
9611
|
-
status = "open";
|
|
9612
9985
|
openedAt = Date.now();
|
|
9986
|
+
const initPayload = JSON.stringify(initMessage);
|
|
9987
|
+
if (callbacks?.onRawMessage) {
|
|
9988
|
+
callbacks.onRawMessage({
|
|
9989
|
+
provider: this.name,
|
|
9990
|
+
direction: "outgoing",
|
|
9991
|
+
timestamp: Date.now(),
|
|
9992
|
+
payload: initPayload,
|
|
9993
|
+
messageType: "init"
|
|
9994
|
+
});
|
|
9995
|
+
}
|
|
9996
|
+
ws.send(initPayload);
|
|
9997
|
+
status = "open";
|
|
9613
9998
|
callbacks?.onOpen?.();
|
|
9614
9999
|
};
|
|
9615
10000
|
ws.onmessage = (event) => {
|
|
@@ -9688,10 +10073,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9688
10073
|
ws.onclose = (event) => {
|
|
9689
10074
|
status = "closed";
|
|
9690
10075
|
const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
|
|
9691
|
-
const
|
|
9692
|
-
if (
|
|
10076
|
+
const isEarlyClose = timeSinceOpen !== null && timeSinceOpen < 5e3 && !receivedData;
|
|
10077
|
+
if (isEarlyClose && event.code === 1e3) {
|
|
9693
10078
|
const errorMessage = [
|
|
9694
|
-
"Soniox closed connection
|
|
10079
|
+
"Soniox closed connection shortly after opening.",
|
|
9695
10080
|
`Current config: region=${this.region}, model=${modelId}`,
|
|
9696
10081
|
"Likely causes:",
|
|
9697
10082
|
" - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
|
|
@@ -36710,7 +37095,7 @@ var AzureCapabilities = {
|
|
|
36710
37095
|
deleteTranscript: true
|
|
36711
37096
|
};
|
|
36712
37097
|
var SpeechmaticsCapabilities = {
|
|
36713
|
-
streaming:
|
|
37098
|
+
streaming: true,
|
|
36714
37099
|
diarization: true,
|
|
36715
37100
|
wordTimestamps: true,
|
|
36716
37101
|
languageDetection: false,
|