voice-router-dev 0.9.1 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -0
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/index.d.mts +181 -153
- package/dist/index.d.ts +181 -153
- package/dist/index.js +377 -78
- package/dist/index.mjs +375 -78
- package/package.json +1 -1
- package/dist/{field-configs-CH0lgAe8.d.mts → field-configs-FbtCPxzs.d.mts} +60 -60
- package/dist/{field-configs-CH0lgAe8.d.ts → field-configs-FbtCPxzs.d.ts} +60 -60
package/dist/index.js
CHANGED
|
@@ -39,6 +39,7 @@ __export(src_exports, {
|
|
|
39
39
|
AssemblyAILanguage: () => AssemblyAILanguage,
|
|
40
40
|
AssemblyAILanguageCodes: () => AssemblyAILanguageCodes,
|
|
41
41
|
AssemblyAIListFilterSchema: () => AssemblyAIListFilterSchema,
|
|
42
|
+
AssemblyAIRegion: () => AssemblyAIRegion,
|
|
42
43
|
AssemblyAISampleRate: () => AssemblyAISampleRate,
|
|
43
44
|
AssemblyAISpeechModel: () => AssemblyAISpeechModel,
|
|
44
45
|
AssemblyAIStatus: () => AssemblyAIStatus,
|
|
@@ -89,6 +90,7 @@ __export(src_exports, {
|
|
|
89
90
|
ElevenLabsLanguageCodes: () => ElevenLabsLanguageCodes,
|
|
90
91
|
ElevenLabsLanguageLabels: () => ElevenLabsLanguageLabels,
|
|
91
92
|
ElevenLabsLanguages: () => ElevenLabsLanguages,
|
|
93
|
+
ElevenLabsRegion: () => ElevenLabsRegion,
|
|
92
94
|
ElevenLabsTypes: () => schema_exports8,
|
|
93
95
|
ElevenLabsZodSchemas: () => elevenLabsSpeechToTextAPI_zod_exports,
|
|
94
96
|
GladiaAdapter: () => GladiaAdapter,
|
|
@@ -2820,6 +2822,12 @@ var AssemblyAISampleRate = {
|
|
|
2820
2822
|
rate48000: 48e3
|
|
2821
2823
|
};
|
|
2822
2824
|
var AssemblyAIStatus = TranscriptStatus;
|
|
2825
|
+
var AssemblyAIRegion = {
|
|
2826
|
+
/** United States (default) */
|
|
2827
|
+
us: "us",
|
|
2828
|
+
/** European Union — data never leaves the EU */
|
|
2829
|
+
eu: "eu"
|
|
2830
|
+
};
|
|
2823
2831
|
var GladiaStatus = TranscriptionControllerListV2StatusItem;
|
|
2824
2832
|
var DeepgramStatus = V1ProjectsProjectIdRequestsGetParametersStatus;
|
|
2825
2833
|
var SpeechmaticsRegion = {
|
|
@@ -6795,9 +6803,13 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6795
6803
|
* Submit audio for transcription
|
|
6796
6804
|
*
|
|
6797
6805
|
* Sends audio to Deepgram API for transcription. Deepgram normally processes
|
|
6798
|
-
* synchronously and returns results immediately.
|
|
6799
|
-
*
|
|
6800
|
-
*
|
|
6806
|
+
* synchronously and returns results immediately.
|
|
6807
|
+
*
|
|
6808
|
+
* **Callback mode:** When `webhookUrl` is set, Deepgram returns immediately
|
|
6809
|
+
* with a `request_id` (status `"queued"`). The full transcript is POSTed to
|
|
6810
|
+
* the webhook URL — this is the primary delivery mechanism. `getTranscript()`
|
|
6811
|
+
* can attempt to retrieve the result later via request history, but that
|
|
6812
|
+
* endpoint is best-effort and not a guaranteed durable store.
|
|
6801
6813
|
*
|
|
6802
6814
|
* @param audio - Audio input (URL or file buffer)
|
|
6803
6815
|
* @param options - Transcription options
|
|
@@ -6907,30 +6919,22 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6907
6919
|
}
|
|
6908
6920
|
}
|
|
6909
6921
|
/**
|
|
6910
|
-
* Get transcription result by ID
|
|
6922
|
+
* Get transcription result by ID (best-effort)
|
|
6911
6923
|
*
|
|
6912
|
-
* Retrieves a previous transcription from Deepgram's request history.
|
|
6924
|
+
* Retrieves a previous transcription from Deepgram's request history API.
|
|
6925
|
+
* Requires `projectId` to be set during initialization.
|
|
6913
6926
|
*
|
|
6914
|
-
*
|
|
6915
|
-
*
|
|
6927
|
+
* **Important:** Deepgram's request history is best-effort. Requests may
|
|
6928
|
+
* expire or be unavailable depending on your plan and retention settings.
|
|
6929
|
+
* This is NOT a durable transcript store — for reliable retrieval, use
|
|
6930
|
+
* callback mode (`webhookUrl`) and persist the webhook payload yourself.
|
|
6916
6931
|
*
|
|
6917
|
-
*
|
|
6918
|
-
*
|
|
6932
|
+
* The response field on the request history entry is cast to
|
|
6933
|
+
* `ListenV1Response` — this appears to work in practice but is not
|
|
6934
|
+
* explicitly documented by Deepgram as a guaranteed contract.
|
|
6919
6935
|
*
|
|
6920
|
-
* @
|
|
6921
|
-
*
|
|
6922
|
-
* const adapter = new DeepgramAdapter()
|
|
6923
|
-
* adapter.initialize({
|
|
6924
|
-
* apiKey: process.env.DEEPGRAM_API_KEY,
|
|
6925
|
-
* projectId: process.env.DEEPGRAM_PROJECT_ID
|
|
6926
|
-
* })
|
|
6927
|
-
*
|
|
6928
|
-
* const result = await adapter.getTranscript('abc123-request-id')
|
|
6929
|
-
* if (result.success) {
|
|
6930
|
-
* console.log(result.data?.text)
|
|
6931
|
-
* console.log(result.data?.words)
|
|
6932
|
-
* }
|
|
6933
|
-
* ```
|
|
6936
|
+
* @param transcriptId - Request ID from a previous transcription
|
|
6937
|
+
* @returns Transcript response if still available in request history
|
|
6934
6938
|
*
|
|
6935
6939
|
* @see https://developers.deepgram.com/reference/get-request
|
|
6936
6940
|
*/
|
|
@@ -9013,8 +9017,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9013
9017
|
super(...arguments);
|
|
9014
9018
|
this.name = "speechmatics";
|
|
9015
9019
|
this.capabilities = {
|
|
9016
|
-
streaming:
|
|
9017
|
-
// Batch only (streaming available via separate WebSocket API)
|
|
9020
|
+
streaming: true,
|
|
9018
9021
|
diarization: true,
|
|
9019
9022
|
wordTimestamps: true,
|
|
9020
9023
|
languageDetection: false,
|
|
@@ -9260,6 +9263,271 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9260
9263
|
throw error;
|
|
9261
9264
|
}
|
|
9262
9265
|
}
|
|
9266
|
+
/**
|
|
9267
|
+
* Get the regional WebSocket host for real-time streaming
|
|
9268
|
+
*
|
|
9269
|
+
* Speechmatics RT uses a different host pattern: {region}.rt.speechmatics.com
|
|
9270
|
+
*/
|
|
9271
|
+
getRegionalWsHost(region) {
|
|
9272
|
+
const regionPrefix = region || "eu1";
|
|
9273
|
+
return `${regionPrefix}.rt.speechmatics.com`;
|
|
9274
|
+
}
|
|
9275
|
+
/**
|
|
9276
|
+
* Stream audio for real-time transcription
|
|
9277
|
+
*
|
|
9278
|
+
* Creates a WebSocket connection to the Speechmatics Real-Time API.
|
|
9279
|
+
* Protocol: send StartRecognition config, then AddAudio binary frames,
|
|
9280
|
+
* receive AddPartialTranscript/AddTranscript/EndOfUtterance messages.
|
|
9281
|
+
*
|
|
9282
|
+
* @param options - Streaming configuration
|
|
9283
|
+
* @param callbacks - Event callbacks
|
|
9284
|
+
* @returns StreamingSession for sending audio and closing
|
|
9285
|
+
*
|
|
9286
|
+
* @see https://docs.speechmatics.com/rt-api-ref
|
|
9287
|
+
*/
|
|
9288
|
+
async transcribeStream(options, callbacks) {
|
|
9289
|
+
this.validateConfig();
|
|
9290
|
+
const sessionId = `speechmatics_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9291
|
+
const createdAt = /* @__PURE__ */ new Date();
|
|
9292
|
+
const smOpts = options?.speechmaticsStreaming;
|
|
9293
|
+
const region = smOpts?.region || this.config?.region;
|
|
9294
|
+
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost(region)}`);
|
|
9295
|
+
const wsUrl = `${wsBase}/v2`;
|
|
9296
|
+
let status = "connecting";
|
|
9297
|
+
let recognitionStarted = false;
|
|
9298
|
+
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : require("ws");
|
|
9299
|
+
const ws = new WebSocketImpl(wsUrl);
|
|
9300
|
+
const language = smOpts?.language || options?.language || "en";
|
|
9301
|
+
const transcriptionConfig = {
|
|
9302
|
+
language,
|
|
9303
|
+
enable_entities: smOpts?.enableEntities ?? options?.entityDetection ?? false,
|
|
9304
|
+
enable_partials: smOpts?.enablePartials ?? options?.interimResults !== false,
|
|
9305
|
+
operating_point: smOpts?.operatingPoint || OperatingPoint.enhanced,
|
|
9306
|
+
...smOpts?.maxDelay !== void 0 && { max_delay: smOpts.maxDelay },
|
|
9307
|
+
...smOpts?.maxDelayMode && {
|
|
9308
|
+
max_delay_mode: smOpts.maxDelayMode
|
|
9309
|
+
},
|
|
9310
|
+
...smOpts?.domain && { domain: smOpts.domain },
|
|
9311
|
+
...(options?.diarization || smOpts?.diarization === TranscriptionConfigDiarization.speaker) && {
|
|
9312
|
+
diarization: TranscriptionConfigDiarization.speaker,
|
|
9313
|
+
...smOpts?.maxSpeakers !== void 0 && {
|
|
9314
|
+
speaker_diarization_config: { max_speakers: smOpts.maxSpeakers }
|
|
9315
|
+
}
|
|
9316
|
+
},
|
|
9317
|
+
...(options?.customVocabulary?.length || smOpts?.additionalVocab?.length) && {
|
|
9318
|
+
additional_vocab: (smOpts?.additionalVocab || options?.customVocabulary || []).map(
|
|
9319
|
+
(term) => ({ content: term })
|
|
9320
|
+
)
|
|
9321
|
+
}
|
|
9322
|
+
};
|
|
9323
|
+
const startRecognition = {
|
|
9324
|
+
message: "StartRecognition",
|
|
9325
|
+
audio_format: {
|
|
9326
|
+
type: "raw",
|
|
9327
|
+
encoding: smOpts?.encoding || "pcm_s16le",
|
|
9328
|
+
sample_rate: smOpts?.sampleRate || options?.sampleRate || 16e3
|
|
9329
|
+
},
|
|
9330
|
+
transcription_config: transcriptionConfig,
|
|
9331
|
+
...smOpts?.conversationConfig && {
|
|
9332
|
+
conversation_config: {
|
|
9333
|
+
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9334
|
+
}
|
|
9335
|
+
}
|
|
9336
|
+
};
|
|
9337
|
+
ws.onopen = () => {
|
|
9338
|
+
status = "open";
|
|
9339
|
+
const msg = JSON.stringify(startRecognition);
|
|
9340
|
+
if (callbacks?.onRawMessage) {
|
|
9341
|
+
callbacks.onRawMessage({
|
|
9342
|
+
provider: this.name,
|
|
9343
|
+
direction: "outgoing",
|
|
9344
|
+
timestamp: Date.now(),
|
|
9345
|
+
payload: msg,
|
|
9346
|
+
messageType: "StartRecognition"
|
|
9347
|
+
});
|
|
9348
|
+
}
|
|
9349
|
+
ws.send(msg);
|
|
9350
|
+
};
|
|
9351
|
+
ws.onmessage = (event) => {
|
|
9352
|
+
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
9353
|
+
try {
|
|
9354
|
+
const data = JSON.parse(rawPayload);
|
|
9355
|
+
const messageType = data.message;
|
|
9356
|
+
if (callbacks?.onRawMessage) {
|
|
9357
|
+
callbacks.onRawMessage({
|
|
9358
|
+
provider: this.name,
|
|
9359
|
+
direction: "incoming",
|
|
9360
|
+
timestamp: Date.now(),
|
|
9361
|
+
payload: rawPayload,
|
|
9362
|
+
messageType
|
|
9363
|
+
});
|
|
9364
|
+
}
|
|
9365
|
+
switch (messageType) {
|
|
9366
|
+
case "RecognitionStarted": {
|
|
9367
|
+
recognitionStarted = true;
|
|
9368
|
+
callbacks?.onOpen?.();
|
|
9369
|
+
callbacks?.onMetadata?.({
|
|
9370
|
+
id: data.id,
|
|
9371
|
+
languagePackInfo: data.language_pack_info
|
|
9372
|
+
});
|
|
9373
|
+
break;
|
|
9374
|
+
}
|
|
9375
|
+
case "AddPartialTranscript": {
|
|
9376
|
+
const partial = data;
|
|
9377
|
+
const words = this.resultsToWords(partial.results);
|
|
9378
|
+
callbacks?.onTranscript?.({
|
|
9379
|
+
type: "transcript",
|
|
9380
|
+
text: partial.metadata.transcript,
|
|
9381
|
+
isFinal: false,
|
|
9382
|
+
words,
|
|
9383
|
+
speaker: words[0]?.speaker,
|
|
9384
|
+
confidence: partial.results[0]?.alternatives?.[0]?.confidence,
|
|
9385
|
+
channel: partial.channel ? parseInt(partial.channel) : void 0
|
|
9386
|
+
});
|
|
9387
|
+
break;
|
|
9388
|
+
}
|
|
9389
|
+
case "AddTranscript": {
|
|
9390
|
+
const final = data;
|
|
9391
|
+
const words = this.resultsToWords(final.results);
|
|
9392
|
+
callbacks?.onTranscript?.({
|
|
9393
|
+
type: "transcript",
|
|
9394
|
+
text: final.metadata.transcript,
|
|
9395
|
+
isFinal: true,
|
|
9396
|
+
words,
|
|
9397
|
+
speaker: words[0]?.speaker,
|
|
9398
|
+
confidence: final.results[0]?.alternatives?.[0]?.confidence,
|
|
9399
|
+
channel: final.channel ? parseInt(final.channel) : void 0
|
|
9400
|
+
});
|
|
9401
|
+
if (options?.diarization || smOpts?.diarization === "speaker") {
|
|
9402
|
+
const utterances = buildUtterancesFromWords(words);
|
|
9403
|
+
for (const utterance of utterances) {
|
|
9404
|
+
callbacks?.onUtterance?.(utterance);
|
|
9405
|
+
}
|
|
9406
|
+
}
|
|
9407
|
+
break;
|
|
9408
|
+
}
|
|
9409
|
+
case "EndOfUtterance": {
|
|
9410
|
+
break;
|
|
9411
|
+
}
|
|
9412
|
+
case "EndOfTranscript": {
|
|
9413
|
+
callbacks?.onClose?.(1e3, "Transcription complete");
|
|
9414
|
+
break;
|
|
9415
|
+
}
|
|
9416
|
+
case "Error": {
|
|
9417
|
+
const err = data;
|
|
9418
|
+
callbacks?.onError?.({
|
|
9419
|
+
code: err.type || "SPEECHMATICS_ERROR",
|
|
9420
|
+
message: err.reason || "Unknown error"
|
|
9421
|
+
});
|
|
9422
|
+
break;
|
|
9423
|
+
}
|
|
9424
|
+
case "Warning": {
|
|
9425
|
+
const warn = data;
|
|
9426
|
+
callbacks?.onMetadata?.({
|
|
9427
|
+
warning: warn.type,
|
|
9428
|
+
reason: warn.reason
|
|
9429
|
+
});
|
|
9430
|
+
break;
|
|
9431
|
+
}
|
|
9432
|
+
case "Info": {
|
|
9433
|
+
callbacks?.onMetadata?.(data);
|
|
9434
|
+
break;
|
|
9435
|
+
}
|
|
9436
|
+
case "AudioAdded":
|
|
9437
|
+
case "ChannelAudioAdded":
|
|
9438
|
+
break;
|
|
9439
|
+
default:
|
|
9440
|
+
callbacks?.onMetadata?.(data);
|
|
9441
|
+
break;
|
|
9442
|
+
}
|
|
9443
|
+
} catch (error) {
|
|
9444
|
+
callbacks?.onError?.({
|
|
9445
|
+
code: "PARSE_ERROR",
|
|
9446
|
+
message: `Failed to parse message: ${error}`
|
|
9447
|
+
});
|
|
9448
|
+
}
|
|
9449
|
+
};
|
|
9450
|
+
ws.onerror = () => {
|
|
9451
|
+
callbacks?.onError?.({
|
|
9452
|
+
code: "WEBSOCKET_ERROR",
|
|
9453
|
+
message: "WebSocket error occurred"
|
|
9454
|
+
});
|
|
9455
|
+
};
|
|
9456
|
+
ws.onclose = (event) => {
|
|
9457
|
+
status = "closed";
|
|
9458
|
+
callbacks?.onClose?.(event.code, event.reason);
|
|
9459
|
+
};
|
|
9460
|
+
await new Promise((resolve, reject) => {
|
|
9461
|
+
const timeout = setTimeout(() => {
|
|
9462
|
+
reject(new Error("WebSocket connection timeout"));
|
|
9463
|
+
}, 1e4);
|
|
9464
|
+
const checkReady = () => {
|
|
9465
|
+
if (recognitionStarted) {
|
|
9466
|
+
clearTimeout(timeout);
|
|
9467
|
+
resolve();
|
|
9468
|
+
} else if (status === "closed") {
|
|
9469
|
+
clearTimeout(timeout);
|
|
9470
|
+
reject(new Error("WebSocket connection failed"));
|
|
9471
|
+
} else {
|
|
9472
|
+
setTimeout(checkReady, 100);
|
|
9473
|
+
}
|
|
9474
|
+
};
|
|
9475
|
+
checkReady();
|
|
9476
|
+
});
|
|
9477
|
+
return {
|
|
9478
|
+
id: sessionId,
|
|
9479
|
+
provider: this.name,
|
|
9480
|
+
createdAt,
|
|
9481
|
+
getStatus: () => status,
|
|
9482
|
+
sendAudio: async (chunk) => {
|
|
9483
|
+
if (status !== "open") {
|
|
9484
|
+
throw new Error("Session is not open");
|
|
9485
|
+
}
|
|
9486
|
+
if (callbacks?.onRawMessage) {
|
|
9487
|
+
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9488
|
+
chunk.data.byteOffset,
|
|
9489
|
+
chunk.data.byteOffset + chunk.data.byteLength
|
|
9490
|
+
);
|
|
9491
|
+
callbacks.onRawMessage({
|
|
9492
|
+
provider: this.name,
|
|
9493
|
+
direction: "outgoing",
|
|
9494
|
+
timestamp: Date.now(),
|
|
9495
|
+
payload: audioPayload,
|
|
9496
|
+
messageType: "audio"
|
|
9497
|
+
});
|
|
9498
|
+
}
|
|
9499
|
+
ws.send(chunk.data);
|
|
9500
|
+
},
|
|
9501
|
+
close: async () => {
|
|
9502
|
+
if (status === "open") {
|
|
9503
|
+
status = "closing";
|
|
9504
|
+
const endMsg = JSON.stringify({ message: "EndOfStream", last_seq_no: 0 });
|
|
9505
|
+
if (callbacks?.onRawMessage) {
|
|
9506
|
+
callbacks.onRawMessage({
|
|
9507
|
+
provider: this.name,
|
|
9508
|
+
direction: "outgoing",
|
|
9509
|
+
timestamp: Date.now(),
|
|
9510
|
+
payload: endMsg,
|
|
9511
|
+
messageType: "EndOfStream"
|
|
9512
|
+
});
|
|
9513
|
+
}
|
|
9514
|
+
ws.send(endMsg);
|
|
9515
|
+
}
|
|
9516
|
+
}
|
|
9517
|
+
};
|
|
9518
|
+
}
|
|
9519
|
+
/**
|
|
9520
|
+
* Convert Speechmatics RecognitionResult[] to unified Word[]
|
|
9521
|
+
*/
|
|
9522
|
+
resultsToWords(results) {
|
|
9523
|
+
return results.filter((r) => r.type === "word").map((r) => ({
|
|
9524
|
+
word: r.alternatives?.[0]?.content || "",
|
|
9525
|
+
start: r.start_time,
|
|
9526
|
+
end: r.end_time,
|
|
9527
|
+
confidence: r.alternatives?.[0]?.confidence,
|
|
9528
|
+
speaker: r.alternatives?.[0]?.speaker
|
|
9529
|
+
}));
|
|
9530
|
+
}
|
|
9263
9531
|
/**
|
|
9264
9532
|
* Normalize Speechmatics status to unified status
|
|
9265
9533
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9679,7 +9947,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9679
9947
|
let messageType;
|
|
9680
9948
|
try {
|
|
9681
9949
|
const data = JSON.parse(rawPayload);
|
|
9682
|
-
const errorMessage = data.error_message
|
|
9950
|
+
const errorMessage = data.error_message;
|
|
9683
9951
|
if (errorMessage) {
|
|
9684
9952
|
messageType = "error";
|
|
9685
9953
|
} else if (data.finished) {
|
|
@@ -10038,7 +10306,15 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10038
10306
|
/**
|
|
10039
10307
|
* Submit audio for transcription
|
|
10040
10308
|
*
|
|
10041
|
-
* ElevenLabs batch is synchronous
|
|
10309
|
+
* ElevenLabs batch is normally synchronous — the API returns results directly.
|
|
10310
|
+
*
|
|
10311
|
+
* **Webhook mode:** When `webhookUrl` is set (or `elevenlabs.webhook` is true),
|
|
10312
|
+
* the request is processed asynchronously. ElevenLabs returns a 202 with a
|
|
10313
|
+
* `request_id` and delivers results to a webhook configured in the ElevenLabs
|
|
10314
|
+
* dashboard. The unified `webhookUrl` acts as an intent flag to enable async
|
|
10315
|
+
* mode — the actual delivery destination must be pre-configured in your
|
|
10316
|
+
* ElevenLabs dashboard. Use `elevenlabs.webhook_id` to target a specific
|
|
10317
|
+
* webhook endpoint.
|
|
10042
10318
|
*/
|
|
10043
10319
|
async transcribe(audio, options) {
|
|
10044
10320
|
this.validateConfig();
|
|
@@ -10061,6 +10337,11 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10061
10337
|
}
|
|
10062
10338
|
};
|
|
10063
10339
|
}
|
|
10340
|
+
const elevenlabsOpts = options?.elevenlabs;
|
|
10341
|
+
const useWebhook = options?.webhookUrl || elevenlabsOpts?.webhook;
|
|
10342
|
+
if (useWebhook) {
|
|
10343
|
+
formData.append("webhook", "true");
|
|
10344
|
+
}
|
|
10064
10345
|
if (options?.language) {
|
|
10065
10346
|
formData.append("language_code", options.language);
|
|
10066
10347
|
}
|
|
@@ -10079,7 +10360,6 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10079
10360
|
if (options?.entityDetection) {
|
|
10080
10361
|
formData.append("entity_detection", "all");
|
|
10081
10362
|
}
|
|
10082
|
-
const elevenlabsOpts = options?.elevenlabs;
|
|
10083
10363
|
if (elevenlabsOpts) {
|
|
10084
10364
|
for (const [key, value] of Object.entries(elevenlabsOpts)) {
|
|
10085
10365
|
if (value === void 0 || value === null) continue;
|
|
@@ -10102,6 +10382,22 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10102
10382
|
"Content-Type": "multipart/form-data"
|
|
10103
10383
|
}
|
|
10104
10384
|
});
|
|
10385
|
+
if (useWebhook) {
|
|
10386
|
+
const ack = response.data;
|
|
10387
|
+
return {
|
|
10388
|
+
success: true,
|
|
10389
|
+
provider: this.name,
|
|
10390
|
+
data: {
|
|
10391
|
+
id: ack.request_id || ack.transcription_id || `elevenlabs_${Date.now()}`,
|
|
10392
|
+
text: "",
|
|
10393
|
+
status: "queued"
|
|
10394
|
+
},
|
|
10395
|
+
tracking: {
|
|
10396
|
+
requestId: ack.request_id
|
|
10397
|
+
},
|
|
10398
|
+
raw: response.data
|
|
10399
|
+
};
|
|
10400
|
+
}
|
|
10105
10401
|
return this.normalizeResponse(response.data);
|
|
10106
10402
|
} catch (error) {
|
|
10107
10403
|
return this.createErrorResponse(error);
|
|
@@ -10194,20 +10490,9 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10194
10490
|
ws.onmessage = (event) => {
|
|
10195
10491
|
receivedData = true;
|
|
10196
10492
|
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
10197
|
-
let messageType;
|
|
10198
10493
|
try {
|
|
10199
10494
|
const data = JSON.parse(rawPayload);
|
|
10200
|
-
|
|
10201
|
-
messageType = "error";
|
|
10202
|
-
} else if (data.message_type === "session_started") {
|
|
10203
|
-
messageType = "session_started";
|
|
10204
|
-
} else if (data.message_type === "partial_transcript") {
|
|
10205
|
-
messageType = "partial_transcript";
|
|
10206
|
-
} else if (data.message_type === "committed_transcript") {
|
|
10207
|
-
messageType = "committed_transcript";
|
|
10208
|
-
} else if (data.message_type === "committed_transcript_with_timestamps") {
|
|
10209
|
-
messageType = "committed_transcript_with_timestamps";
|
|
10210
|
-
}
|
|
10495
|
+
const messageType = "error" in data ? "error" : data.message_type;
|
|
10211
10496
|
if (callbacks?.onRawMessage) {
|
|
10212
10497
|
callbacks.onRawMessage({
|
|
10213
10498
|
provider: this.name,
|
|
@@ -10217,50 +10502,62 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10217
10502
|
messageType
|
|
10218
10503
|
});
|
|
10219
10504
|
}
|
|
10220
|
-
if (data
|
|
10505
|
+
if ("error" in data) {
|
|
10221
10506
|
callbacks?.onError?.({
|
|
10222
|
-
code: data.
|
|
10507
|
+
code: data.message_type || "STREAM_ERROR",
|
|
10223
10508
|
message: data.error
|
|
10224
10509
|
});
|
|
10225
10510
|
return;
|
|
10226
10511
|
}
|
|
10227
|
-
|
|
10228
|
-
|
|
10229
|
-
|
|
10230
|
-
|
|
10231
|
-
|
|
10232
|
-
|
|
10233
|
-
|
|
10234
|
-
|
|
10235
|
-
|
|
10236
|
-
|
|
10237
|
-
|
|
10238
|
-
|
|
10239
|
-
|
|
10240
|
-
|
|
10241
|
-
|
|
10242
|
-
|
|
10243
|
-
|
|
10244
|
-
|
|
10245
|
-
|
|
10246
|
-
|
|
10247
|
-
|
|
10248
|
-
|
|
10249
|
-
|
|
10250
|
-
|
|
10251
|
-
|
|
10252
|
-
|
|
10253
|
-
|
|
10254
|
-
|
|
10255
|
-
|
|
10256
|
-
|
|
10257
|
-
|
|
10258
|
-
|
|
10259
|
-
|
|
10260
|
-
|
|
10261
|
-
|
|
10262
|
-
|
|
10512
|
+
switch (data.message_type) {
|
|
10513
|
+
case "session_started":
|
|
10514
|
+
break;
|
|
10515
|
+
case "partial_transcript": {
|
|
10516
|
+
const streamEvent = {
|
|
10517
|
+
type: "transcript",
|
|
10518
|
+
text: data.text || "",
|
|
10519
|
+
isFinal: false,
|
|
10520
|
+
confidence: void 0
|
|
10521
|
+
};
|
|
10522
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10523
|
+
break;
|
|
10524
|
+
}
|
|
10525
|
+
case "committed_transcript": {
|
|
10526
|
+
const streamEvent = {
|
|
10527
|
+
type: "transcript",
|
|
10528
|
+
text: data.text || "",
|
|
10529
|
+
isFinal: true,
|
|
10530
|
+
confidence: void 0
|
|
10531
|
+
};
|
|
10532
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10533
|
+
break;
|
|
10534
|
+
}
|
|
10535
|
+
case "committed_transcript_with_timestamps": {
|
|
10536
|
+
const tsData = data;
|
|
10537
|
+
const words = tsData.words ? tsData.words.map((w) => ({
|
|
10538
|
+
word: w.text || "",
|
|
10539
|
+
start: w.start || 0,
|
|
10540
|
+
end: w.end || 0,
|
|
10541
|
+
confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
|
|
10542
|
+
speaker: w.speaker_id
|
|
10543
|
+
})) : [];
|
|
10544
|
+
const streamEvent = {
|
|
10545
|
+
type: "transcript",
|
|
10546
|
+
text: tsData.text || "",
|
|
10547
|
+
isFinal: true,
|
|
10548
|
+
words: words.length > 0 ? words : void 0,
|
|
10549
|
+
speaker: words[0]?.speaker,
|
|
10550
|
+
language: tsData.language_code,
|
|
10551
|
+
confidence: void 0
|
|
10552
|
+
};
|
|
10553
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10554
|
+
if (options?.diarization && words.length > 0) {
|
|
10555
|
+
const utterances = buildUtterancesFromWords(words);
|
|
10556
|
+
for (const utterance of utterances) {
|
|
10557
|
+
callbacks?.onUtterance?.(utterance);
|
|
10558
|
+
}
|
|
10263
10559
|
}
|
|
10560
|
+
break;
|
|
10264
10561
|
}
|
|
10265
10562
|
}
|
|
10266
10563
|
} catch (error) {
|
|
@@ -39463,6 +39760,7 @@ var deleteTranscriptByIdResponse = import_zod13.z.any();
|
|
|
39463
39760
|
AssemblyAILanguage,
|
|
39464
39761
|
AssemblyAILanguageCodes,
|
|
39465
39762
|
AssemblyAIListFilterSchema,
|
|
39763
|
+
AssemblyAIRegion,
|
|
39466
39764
|
AssemblyAISampleRate,
|
|
39467
39765
|
AssemblyAISpeechModel,
|
|
39468
39766
|
AssemblyAIStatus,
|
|
@@ -39513,6 +39811,7 @@ var deleteTranscriptByIdResponse = import_zod13.z.any();
|
|
|
39513
39811
|
ElevenLabsLanguageCodes,
|
|
39514
39812
|
ElevenLabsLanguageLabels,
|
|
39515
39813
|
ElevenLabsLanguages,
|
|
39814
|
+
ElevenLabsRegion,
|
|
39516
39815
|
ElevenLabsTypes,
|
|
39517
39816
|
ElevenLabsZodSchemas,
|
|
39518
39817
|
GladiaAdapter,
|