voice-router-dev 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/index.d.mts +179 -151
- package/dist/index.d.ts +179 -151
- package/dist/index.js +367 -78
- package/dist/index.mjs +367 -78
- package/package.json +1 -1
- package/dist/{field-configs-CH0lgAe8.d.mts → field-configs-FbtCPxzs.d.mts} +60 -60
- package/dist/{field-configs-CH0lgAe8.d.ts → field-configs-FbtCPxzs.d.ts} +60 -60
package/dist/index.js
CHANGED
|
@@ -6795,9 +6795,13 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6795
6795
|
* Submit audio for transcription
|
|
6796
6796
|
*
|
|
6797
6797
|
* Sends audio to Deepgram API for transcription. Deepgram normally processes
|
|
6798
|
-
* synchronously and returns results immediately.
|
|
6799
|
-
*
|
|
6800
|
-
*
|
|
6798
|
+
* synchronously and returns results immediately.
|
|
6799
|
+
*
|
|
6800
|
+
* **Callback mode:** When `webhookUrl` is set, Deepgram returns immediately
|
|
6801
|
+
* with a `request_id` (status `"queued"`). The full transcript is POSTed to
|
|
6802
|
+
* the webhook URL — this is the primary delivery mechanism. `getTranscript()`
|
|
6803
|
+
* can attempt to retrieve the result later via request history, but that
|
|
6804
|
+
* endpoint is best-effort and not a guaranteed durable store.
|
|
6801
6805
|
*
|
|
6802
6806
|
* @param audio - Audio input (URL or file buffer)
|
|
6803
6807
|
* @param options - Transcription options
|
|
@@ -6907,30 +6911,22 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6907
6911
|
}
|
|
6908
6912
|
}
|
|
6909
6913
|
/**
|
|
6910
|
-
* Get transcription result by ID
|
|
6911
|
-
*
|
|
6912
|
-
* Retrieves a previous transcription from Deepgram's request history.
|
|
6914
|
+
* Get transcription result by ID (best-effort)
|
|
6913
6915
|
*
|
|
6914
|
-
*
|
|
6915
|
-
*
|
|
6916
|
+
* Retrieves a previous transcription from Deepgram's request history API.
|
|
6917
|
+
* Requires `projectId` to be set during initialization.
|
|
6916
6918
|
*
|
|
6917
|
-
*
|
|
6918
|
-
*
|
|
6919
|
+
* **Important:** Deepgram's request history is best-effort. Requests may
|
|
6920
|
+
* expire or be unavailable depending on your plan and retention settings.
|
|
6921
|
+
* This is NOT a durable transcript store — for reliable retrieval, use
|
|
6922
|
+
* callback mode (`webhookUrl`) and persist the webhook payload yourself.
|
|
6919
6923
|
*
|
|
6920
|
-
*
|
|
6921
|
-
*
|
|
6922
|
-
*
|
|
6923
|
-
* adapter.initialize({
|
|
6924
|
-
* apiKey: process.env.DEEPGRAM_API_KEY,
|
|
6925
|
-
* projectId: process.env.DEEPGRAM_PROJECT_ID
|
|
6926
|
-
* })
|
|
6924
|
+
* The response field on the request history entry is cast to
|
|
6925
|
+
* `ListenV1Response` — this appears to work in practice but is not
|
|
6926
|
+
* explicitly documented by Deepgram as a guaranteed contract.
|
|
6927
6927
|
*
|
|
6928
|
-
*
|
|
6929
|
-
* if
|
|
6930
|
-
* console.log(result.data?.text)
|
|
6931
|
-
* console.log(result.data?.words)
|
|
6932
|
-
* }
|
|
6933
|
-
* ```
|
|
6928
|
+
* @param transcriptId - Request ID from a previous transcription
|
|
6929
|
+
* @returns Transcript response if still available in request history
|
|
6934
6930
|
*
|
|
6935
6931
|
* @see https://developers.deepgram.com/reference/get-request
|
|
6936
6932
|
*/
|
|
@@ -9013,8 +9009,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9013
9009
|
super(...arguments);
|
|
9014
9010
|
this.name = "speechmatics";
|
|
9015
9011
|
this.capabilities = {
|
|
9016
|
-
streaming:
|
|
9017
|
-
// Batch only (streaming available via separate WebSocket API)
|
|
9012
|
+
streaming: true,
|
|
9018
9013
|
diarization: true,
|
|
9019
9014
|
wordTimestamps: true,
|
|
9020
9015
|
languageDetection: false,
|
|
@@ -9260,6 +9255,271 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9260
9255
|
throw error;
|
|
9261
9256
|
}
|
|
9262
9257
|
}
|
|
9258
|
+
/**
|
|
9259
|
+
* Get the regional WebSocket host for real-time streaming
|
|
9260
|
+
*
|
|
9261
|
+
* Speechmatics RT uses a different host pattern: {region}.rt.speechmatics.com
|
|
9262
|
+
*/
|
|
9263
|
+
getRegionalWsHost(region) {
|
|
9264
|
+
const regionPrefix = region || "eu1";
|
|
9265
|
+
return `${regionPrefix}.rt.speechmatics.com`;
|
|
9266
|
+
}
|
|
9267
|
+
/**
|
|
9268
|
+
* Stream audio for real-time transcription
|
|
9269
|
+
*
|
|
9270
|
+
* Creates a WebSocket connection to the Speechmatics Real-Time API.
|
|
9271
|
+
* Protocol: send StartRecognition config, then AddAudio binary frames,
|
|
9272
|
+
* receive AddPartialTranscript/AddTranscript/EndOfUtterance messages.
|
|
9273
|
+
*
|
|
9274
|
+
* @param options - Streaming configuration
|
|
9275
|
+
* @param callbacks - Event callbacks
|
|
9276
|
+
* @returns StreamingSession for sending audio and closing
|
|
9277
|
+
*
|
|
9278
|
+
* @see https://docs.speechmatics.com/rt-api-ref
|
|
9279
|
+
*/
|
|
9280
|
+
async transcribeStream(options, callbacks) {
|
|
9281
|
+
this.validateConfig();
|
|
9282
|
+
const sessionId = `speechmatics_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9283
|
+
const createdAt = /* @__PURE__ */ new Date();
|
|
9284
|
+
const smOpts = options?.speechmaticsStreaming;
|
|
9285
|
+
const region = smOpts?.region || this.config?.region;
|
|
9286
|
+
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost(region)}`);
|
|
9287
|
+
const wsUrl = `${wsBase}/v2`;
|
|
9288
|
+
let status = "connecting";
|
|
9289
|
+
let recognitionStarted = false;
|
|
9290
|
+
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : require("ws");
|
|
9291
|
+
const ws = new WebSocketImpl(wsUrl);
|
|
9292
|
+
const language = smOpts?.language || options?.language || "en";
|
|
9293
|
+
const transcriptionConfig = {
|
|
9294
|
+
language,
|
|
9295
|
+
enable_entities: smOpts?.enableEntities ?? options?.entityDetection ?? false,
|
|
9296
|
+
enable_partials: smOpts?.enablePartials ?? options?.interimResults !== false,
|
|
9297
|
+
operating_point: smOpts?.operatingPoint || OperatingPoint.enhanced,
|
|
9298
|
+
...smOpts?.maxDelay !== void 0 && { max_delay: smOpts.maxDelay },
|
|
9299
|
+
...smOpts?.maxDelayMode && {
|
|
9300
|
+
max_delay_mode: smOpts.maxDelayMode
|
|
9301
|
+
},
|
|
9302
|
+
...smOpts?.domain && { domain: smOpts.domain },
|
|
9303
|
+
...(options?.diarization || smOpts?.diarization === TranscriptionConfigDiarization.speaker) && {
|
|
9304
|
+
diarization: TranscriptionConfigDiarization.speaker,
|
|
9305
|
+
...smOpts?.maxSpeakers !== void 0 && {
|
|
9306
|
+
speaker_diarization_config: { max_speakers: smOpts.maxSpeakers }
|
|
9307
|
+
}
|
|
9308
|
+
},
|
|
9309
|
+
...(options?.customVocabulary?.length || smOpts?.additionalVocab?.length) && {
|
|
9310
|
+
additional_vocab: (smOpts?.additionalVocab || options?.customVocabulary || []).map(
|
|
9311
|
+
(term) => ({ content: term })
|
|
9312
|
+
)
|
|
9313
|
+
}
|
|
9314
|
+
};
|
|
9315
|
+
const startRecognition = {
|
|
9316
|
+
message: "StartRecognition",
|
|
9317
|
+
audio_format: {
|
|
9318
|
+
type: "raw",
|
|
9319
|
+
encoding: smOpts?.encoding || "pcm_s16le",
|
|
9320
|
+
sample_rate: smOpts?.sampleRate || options?.sampleRate || 16e3
|
|
9321
|
+
},
|
|
9322
|
+
transcription_config: transcriptionConfig,
|
|
9323
|
+
...smOpts?.conversationConfig && {
|
|
9324
|
+
conversation_config: {
|
|
9325
|
+
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9326
|
+
}
|
|
9327
|
+
}
|
|
9328
|
+
};
|
|
9329
|
+
ws.onopen = () => {
|
|
9330
|
+
status = "open";
|
|
9331
|
+
const msg = JSON.stringify(startRecognition);
|
|
9332
|
+
if (callbacks?.onRawMessage) {
|
|
9333
|
+
callbacks.onRawMessage({
|
|
9334
|
+
provider: this.name,
|
|
9335
|
+
direction: "outgoing",
|
|
9336
|
+
timestamp: Date.now(),
|
|
9337
|
+
payload: msg,
|
|
9338
|
+
messageType: "StartRecognition"
|
|
9339
|
+
});
|
|
9340
|
+
}
|
|
9341
|
+
ws.send(msg);
|
|
9342
|
+
};
|
|
9343
|
+
ws.onmessage = (event) => {
|
|
9344
|
+
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
9345
|
+
try {
|
|
9346
|
+
const data = JSON.parse(rawPayload);
|
|
9347
|
+
const messageType = data.message;
|
|
9348
|
+
if (callbacks?.onRawMessage) {
|
|
9349
|
+
callbacks.onRawMessage({
|
|
9350
|
+
provider: this.name,
|
|
9351
|
+
direction: "incoming",
|
|
9352
|
+
timestamp: Date.now(),
|
|
9353
|
+
payload: rawPayload,
|
|
9354
|
+
messageType
|
|
9355
|
+
});
|
|
9356
|
+
}
|
|
9357
|
+
switch (messageType) {
|
|
9358
|
+
case "RecognitionStarted": {
|
|
9359
|
+
recognitionStarted = true;
|
|
9360
|
+
callbacks?.onOpen?.();
|
|
9361
|
+
callbacks?.onMetadata?.({
|
|
9362
|
+
id: data.id,
|
|
9363
|
+
languagePackInfo: data.language_pack_info
|
|
9364
|
+
});
|
|
9365
|
+
break;
|
|
9366
|
+
}
|
|
9367
|
+
case "AddPartialTranscript": {
|
|
9368
|
+
const partial = data;
|
|
9369
|
+
const words = this.resultsToWords(partial.results);
|
|
9370
|
+
callbacks?.onTranscript?.({
|
|
9371
|
+
type: "transcript",
|
|
9372
|
+
text: partial.metadata.transcript,
|
|
9373
|
+
isFinal: false,
|
|
9374
|
+
words,
|
|
9375
|
+
speaker: words[0]?.speaker,
|
|
9376
|
+
confidence: partial.results[0]?.alternatives?.[0]?.confidence,
|
|
9377
|
+
channel: partial.channel ? parseInt(partial.channel) : void 0
|
|
9378
|
+
});
|
|
9379
|
+
break;
|
|
9380
|
+
}
|
|
9381
|
+
case "AddTranscript": {
|
|
9382
|
+
const final = data;
|
|
9383
|
+
const words = this.resultsToWords(final.results);
|
|
9384
|
+
callbacks?.onTranscript?.({
|
|
9385
|
+
type: "transcript",
|
|
9386
|
+
text: final.metadata.transcript,
|
|
9387
|
+
isFinal: true,
|
|
9388
|
+
words,
|
|
9389
|
+
speaker: words[0]?.speaker,
|
|
9390
|
+
confidence: final.results[0]?.alternatives?.[0]?.confidence,
|
|
9391
|
+
channel: final.channel ? parseInt(final.channel) : void 0
|
|
9392
|
+
});
|
|
9393
|
+
if (options?.diarization || smOpts?.diarization === "speaker") {
|
|
9394
|
+
const utterances = buildUtterancesFromWords(words);
|
|
9395
|
+
for (const utterance of utterances) {
|
|
9396
|
+
callbacks?.onUtterance?.(utterance);
|
|
9397
|
+
}
|
|
9398
|
+
}
|
|
9399
|
+
break;
|
|
9400
|
+
}
|
|
9401
|
+
case "EndOfUtterance": {
|
|
9402
|
+
break;
|
|
9403
|
+
}
|
|
9404
|
+
case "EndOfTranscript": {
|
|
9405
|
+
callbacks?.onClose?.(1e3, "Transcription complete");
|
|
9406
|
+
break;
|
|
9407
|
+
}
|
|
9408
|
+
case "Error": {
|
|
9409
|
+
const err = data;
|
|
9410
|
+
callbacks?.onError?.({
|
|
9411
|
+
code: err.type || "SPEECHMATICS_ERROR",
|
|
9412
|
+
message: err.reason || "Unknown error"
|
|
9413
|
+
});
|
|
9414
|
+
break;
|
|
9415
|
+
}
|
|
9416
|
+
case "Warning": {
|
|
9417
|
+
const warn = data;
|
|
9418
|
+
callbacks?.onMetadata?.({
|
|
9419
|
+
warning: warn.type,
|
|
9420
|
+
reason: warn.reason
|
|
9421
|
+
});
|
|
9422
|
+
break;
|
|
9423
|
+
}
|
|
9424
|
+
case "Info": {
|
|
9425
|
+
callbacks?.onMetadata?.(data);
|
|
9426
|
+
break;
|
|
9427
|
+
}
|
|
9428
|
+
case "AudioAdded":
|
|
9429
|
+
case "ChannelAudioAdded":
|
|
9430
|
+
break;
|
|
9431
|
+
default:
|
|
9432
|
+
callbacks?.onMetadata?.(data);
|
|
9433
|
+
break;
|
|
9434
|
+
}
|
|
9435
|
+
} catch (error) {
|
|
9436
|
+
callbacks?.onError?.({
|
|
9437
|
+
code: "PARSE_ERROR",
|
|
9438
|
+
message: `Failed to parse message: ${error}`
|
|
9439
|
+
});
|
|
9440
|
+
}
|
|
9441
|
+
};
|
|
9442
|
+
ws.onerror = () => {
|
|
9443
|
+
callbacks?.onError?.({
|
|
9444
|
+
code: "WEBSOCKET_ERROR",
|
|
9445
|
+
message: "WebSocket error occurred"
|
|
9446
|
+
});
|
|
9447
|
+
};
|
|
9448
|
+
ws.onclose = (event) => {
|
|
9449
|
+
status = "closed";
|
|
9450
|
+
callbacks?.onClose?.(event.code, event.reason);
|
|
9451
|
+
};
|
|
9452
|
+
await new Promise((resolve, reject) => {
|
|
9453
|
+
const timeout = setTimeout(() => {
|
|
9454
|
+
reject(new Error("WebSocket connection timeout"));
|
|
9455
|
+
}, 1e4);
|
|
9456
|
+
const checkReady = () => {
|
|
9457
|
+
if (recognitionStarted) {
|
|
9458
|
+
clearTimeout(timeout);
|
|
9459
|
+
resolve();
|
|
9460
|
+
} else if (status === "closed") {
|
|
9461
|
+
clearTimeout(timeout);
|
|
9462
|
+
reject(new Error("WebSocket connection failed"));
|
|
9463
|
+
} else {
|
|
9464
|
+
setTimeout(checkReady, 100);
|
|
9465
|
+
}
|
|
9466
|
+
};
|
|
9467
|
+
checkReady();
|
|
9468
|
+
});
|
|
9469
|
+
return {
|
|
9470
|
+
id: sessionId,
|
|
9471
|
+
provider: this.name,
|
|
9472
|
+
createdAt,
|
|
9473
|
+
getStatus: () => status,
|
|
9474
|
+
sendAudio: async (chunk) => {
|
|
9475
|
+
if (status !== "open") {
|
|
9476
|
+
throw new Error("Session is not open");
|
|
9477
|
+
}
|
|
9478
|
+
if (callbacks?.onRawMessage) {
|
|
9479
|
+
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9480
|
+
chunk.data.byteOffset,
|
|
9481
|
+
chunk.data.byteOffset + chunk.data.byteLength
|
|
9482
|
+
);
|
|
9483
|
+
callbacks.onRawMessage({
|
|
9484
|
+
provider: this.name,
|
|
9485
|
+
direction: "outgoing",
|
|
9486
|
+
timestamp: Date.now(),
|
|
9487
|
+
payload: audioPayload,
|
|
9488
|
+
messageType: "audio"
|
|
9489
|
+
});
|
|
9490
|
+
}
|
|
9491
|
+
ws.send(chunk.data);
|
|
9492
|
+
},
|
|
9493
|
+
close: async () => {
|
|
9494
|
+
if (status === "open") {
|
|
9495
|
+
status = "closing";
|
|
9496
|
+
const endMsg = JSON.stringify({ message: "EndOfStream", last_seq_no: 0 });
|
|
9497
|
+
if (callbacks?.onRawMessage) {
|
|
9498
|
+
callbacks.onRawMessage({
|
|
9499
|
+
provider: this.name,
|
|
9500
|
+
direction: "outgoing",
|
|
9501
|
+
timestamp: Date.now(),
|
|
9502
|
+
payload: endMsg,
|
|
9503
|
+
messageType: "EndOfStream"
|
|
9504
|
+
});
|
|
9505
|
+
}
|
|
9506
|
+
ws.send(endMsg);
|
|
9507
|
+
}
|
|
9508
|
+
}
|
|
9509
|
+
};
|
|
9510
|
+
}
|
|
9511
|
+
/**
|
|
9512
|
+
* Convert Speechmatics RecognitionResult[] to unified Word[]
|
|
9513
|
+
*/
|
|
9514
|
+
resultsToWords(results) {
|
|
9515
|
+
return results.filter((r) => r.type === "word").map((r) => ({
|
|
9516
|
+
word: r.alternatives?.[0]?.content || "",
|
|
9517
|
+
start: r.start_time,
|
|
9518
|
+
end: r.end_time,
|
|
9519
|
+
confidence: r.alternatives?.[0]?.confidence,
|
|
9520
|
+
speaker: r.alternatives?.[0]?.speaker
|
|
9521
|
+
}));
|
|
9522
|
+
}
|
|
9263
9523
|
/**
|
|
9264
9524
|
* Normalize Speechmatics status to unified status
|
|
9265
9525
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9679,7 +9939,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9679
9939
|
let messageType;
|
|
9680
9940
|
try {
|
|
9681
9941
|
const data = JSON.parse(rawPayload);
|
|
9682
|
-
const errorMessage = data.error_message
|
|
9942
|
+
const errorMessage = data.error_message;
|
|
9683
9943
|
if (errorMessage) {
|
|
9684
9944
|
messageType = "error";
|
|
9685
9945
|
} else if (data.finished) {
|
|
@@ -10038,7 +10298,15 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10038
10298
|
/**
|
|
10039
10299
|
* Submit audio for transcription
|
|
10040
10300
|
*
|
|
10041
|
-
* ElevenLabs batch is synchronous
|
|
10301
|
+
* ElevenLabs batch is normally synchronous — the API returns results directly.
|
|
10302
|
+
*
|
|
10303
|
+
* **Webhook mode:** When `webhookUrl` is set (or `elevenlabs.webhook` is true),
|
|
10304
|
+
* the request is processed asynchronously. ElevenLabs returns a 202 with a
|
|
10305
|
+
* `request_id` and delivers results to a webhook configured in the ElevenLabs
|
|
10306
|
+
* dashboard. The unified `webhookUrl` acts as an intent flag to enable async
|
|
10307
|
+
* mode — the actual delivery destination must be pre-configured in your
|
|
10308
|
+
* ElevenLabs dashboard. Use `elevenlabs.webhook_id` to target a specific
|
|
10309
|
+
* webhook endpoint.
|
|
10042
10310
|
*/
|
|
10043
10311
|
async transcribe(audio, options) {
|
|
10044
10312
|
this.validateConfig();
|
|
@@ -10061,6 +10329,11 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10061
10329
|
}
|
|
10062
10330
|
};
|
|
10063
10331
|
}
|
|
10332
|
+
const elevenlabsOpts = options?.elevenlabs;
|
|
10333
|
+
const useWebhook = options?.webhookUrl || elevenlabsOpts?.webhook;
|
|
10334
|
+
if (useWebhook) {
|
|
10335
|
+
formData.append("webhook", "true");
|
|
10336
|
+
}
|
|
10064
10337
|
if (options?.language) {
|
|
10065
10338
|
formData.append("language_code", options.language);
|
|
10066
10339
|
}
|
|
@@ -10079,7 +10352,6 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10079
10352
|
if (options?.entityDetection) {
|
|
10080
10353
|
formData.append("entity_detection", "all");
|
|
10081
10354
|
}
|
|
10082
|
-
const elevenlabsOpts = options?.elevenlabs;
|
|
10083
10355
|
if (elevenlabsOpts) {
|
|
10084
10356
|
for (const [key, value] of Object.entries(elevenlabsOpts)) {
|
|
10085
10357
|
if (value === void 0 || value === null) continue;
|
|
@@ -10102,6 +10374,22 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10102
10374
|
"Content-Type": "multipart/form-data"
|
|
10103
10375
|
}
|
|
10104
10376
|
});
|
|
10377
|
+
if (useWebhook) {
|
|
10378
|
+
const ack = response.data;
|
|
10379
|
+
return {
|
|
10380
|
+
success: true,
|
|
10381
|
+
provider: this.name,
|
|
10382
|
+
data: {
|
|
10383
|
+
id: ack.request_id || ack.transcription_id || `elevenlabs_${Date.now()}`,
|
|
10384
|
+
text: "",
|
|
10385
|
+
status: "queued"
|
|
10386
|
+
},
|
|
10387
|
+
tracking: {
|
|
10388
|
+
requestId: ack.request_id
|
|
10389
|
+
},
|
|
10390
|
+
raw: response.data
|
|
10391
|
+
};
|
|
10392
|
+
}
|
|
10105
10393
|
return this.normalizeResponse(response.data);
|
|
10106
10394
|
} catch (error) {
|
|
10107
10395
|
return this.createErrorResponse(error);
|
|
@@ -10194,20 +10482,9 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10194
10482
|
ws.onmessage = (event) => {
|
|
10195
10483
|
receivedData = true;
|
|
10196
10484
|
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
10197
|
-
let messageType;
|
|
10198
10485
|
try {
|
|
10199
10486
|
const data = JSON.parse(rawPayload);
|
|
10200
|
-
|
|
10201
|
-
messageType = "error";
|
|
10202
|
-
} else if (data.message_type === "session_started") {
|
|
10203
|
-
messageType = "session_started";
|
|
10204
|
-
} else if (data.message_type === "partial_transcript") {
|
|
10205
|
-
messageType = "partial_transcript";
|
|
10206
|
-
} else if (data.message_type === "committed_transcript") {
|
|
10207
|
-
messageType = "committed_transcript";
|
|
10208
|
-
} else if (data.message_type === "committed_transcript_with_timestamps") {
|
|
10209
|
-
messageType = "committed_transcript_with_timestamps";
|
|
10210
|
-
}
|
|
10487
|
+
const messageType = "error" in data ? "error" : data.message_type;
|
|
10211
10488
|
if (callbacks?.onRawMessage) {
|
|
10212
10489
|
callbacks.onRawMessage({
|
|
10213
10490
|
provider: this.name,
|
|
@@ -10217,50 +10494,62 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10217
10494
|
messageType
|
|
10218
10495
|
});
|
|
10219
10496
|
}
|
|
10220
|
-
if (data
|
|
10497
|
+
if ("error" in data) {
|
|
10221
10498
|
callbacks?.onError?.({
|
|
10222
|
-
code: data.
|
|
10499
|
+
code: data.message_type || "STREAM_ERROR",
|
|
10223
10500
|
message: data.error
|
|
10224
10501
|
});
|
|
10225
10502
|
return;
|
|
10226
10503
|
}
|
|
10227
|
-
|
|
10228
|
-
|
|
10229
|
-
|
|
10230
|
-
|
|
10231
|
-
|
|
10232
|
-
|
|
10233
|
-
|
|
10234
|
-
|
|
10235
|
-
|
|
10236
|
-
|
|
10237
|
-
|
|
10238
|
-
|
|
10239
|
-
|
|
10240
|
-
|
|
10241
|
-
|
|
10242
|
-
|
|
10243
|
-
|
|
10244
|
-
|
|
10245
|
-
|
|
10246
|
-
|
|
10247
|
-
|
|
10248
|
-
|
|
10249
|
-
|
|
10250
|
-
|
|
10251
|
-
|
|
10252
|
-
|
|
10253
|
-
|
|
10254
|
-
|
|
10255
|
-
|
|
10256
|
-
|
|
10257
|
-
|
|
10258
|
-
|
|
10259
|
-
|
|
10260
|
-
|
|
10261
|
-
|
|
10262
|
-
|
|
10504
|
+
switch (data.message_type) {
|
|
10505
|
+
case "session_started":
|
|
10506
|
+
break;
|
|
10507
|
+
case "partial_transcript": {
|
|
10508
|
+
const streamEvent = {
|
|
10509
|
+
type: "transcript",
|
|
10510
|
+
text: data.text || "",
|
|
10511
|
+
isFinal: false,
|
|
10512
|
+
confidence: void 0
|
|
10513
|
+
};
|
|
10514
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10515
|
+
break;
|
|
10516
|
+
}
|
|
10517
|
+
case "committed_transcript": {
|
|
10518
|
+
const streamEvent = {
|
|
10519
|
+
type: "transcript",
|
|
10520
|
+
text: data.text || "",
|
|
10521
|
+
isFinal: true,
|
|
10522
|
+
confidence: void 0
|
|
10523
|
+
};
|
|
10524
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10525
|
+
break;
|
|
10526
|
+
}
|
|
10527
|
+
case "committed_transcript_with_timestamps": {
|
|
10528
|
+
const tsData = data;
|
|
10529
|
+
const words = tsData.words ? tsData.words.map((w) => ({
|
|
10530
|
+
word: w.text || "",
|
|
10531
|
+
start: w.start || 0,
|
|
10532
|
+
end: w.end || 0,
|
|
10533
|
+
confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
|
|
10534
|
+
speaker: w.speaker_id
|
|
10535
|
+
})) : [];
|
|
10536
|
+
const streamEvent = {
|
|
10537
|
+
type: "transcript",
|
|
10538
|
+
text: tsData.text || "",
|
|
10539
|
+
isFinal: true,
|
|
10540
|
+
words: words.length > 0 ? words : void 0,
|
|
10541
|
+
speaker: words[0]?.speaker,
|
|
10542
|
+
language: tsData.language_code,
|
|
10543
|
+
confidence: void 0
|
|
10544
|
+
};
|
|
10545
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10546
|
+
if (options?.diarization && words.length > 0) {
|
|
10547
|
+
const utterances = buildUtterancesFromWords(words);
|
|
10548
|
+
for (const utterance of utterances) {
|
|
10549
|
+
callbacks?.onUtterance?.(utterance);
|
|
10550
|
+
}
|
|
10263
10551
|
}
|
|
10552
|
+
break;
|
|
10264
10553
|
}
|
|
10265
10554
|
}
|
|
10266
10555
|
} catch (error) {
|