voice-router-dev 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/index.d.mts +179 -151
- package/dist/index.d.ts +179 -151
- package/dist/index.js +367 -78
- package/dist/index.mjs +367 -78
- package/package.json +1 -1
- package/dist/{field-configs-CH0lgAe8.d.mts → field-configs-FbtCPxzs.d.mts} +60 -60
- package/dist/{field-configs-CH0lgAe8.d.ts → field-configs-FbtCPxzs.d.ts} +60 -60
package/dist/index.mjs
CHANGED
|
@@ -6566,9 +6566,13 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6566
6566
|
* Submit audio for transcription
|
|
6567
6567
|
*
|
|
6568
6568
|
* Sends audio to Deepgram API for transcription. Deepgram normally processes
|
|
6569
|
-
* synchronously and returns results immediately.
|
|
6570
|
-
*
|
|
6571
|
-
*
|
|
6569
|
+
* synchronously and returns results immediately.
|
|
6570
|
+
*
|
|
6571
|
+
* **Callback mode:** When `webhookUrl` is set, Deepgram returns immediately
|
|
6572
|
+
* with a `request_id` (status `"queued"`). The full transcript is POSTed to
|
|
6573
|
+
* the webhook URL — this is the primary delivery mechanism. `getTranscript()`
|
|
6574
|
+
* can attempt to retrieve the result later via request history, but that
|
|
6575
|
+
* endpoint is best-effort and not a guaranteed durable store.
|
|
6572
6576
|
*
|
|
6573
6577
|
* @param audio - Audio input (URL or file buffer)
|
|
6574
6578
|
* @param options - Transcription options
|
|
@@ -6678,30 +6682,22 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6678
6682
|
}
|
|
6679
6683
|
}
|
|
6680
6684
|
/**
|
|
6681
|
-
* Get transcription result by ID
|
|
6682
|
-
*
|
|
6683
|
-
* Retrieves a previous transcription from Deepgram's request history.
|
|
6685
|
+
* Get transcription result by ID (best-effort)
|
|
6684
6686
|
*
|
|
6685
|
-
*
|
|
6686
|
-
*
|
|
6687
|
+
* Retrieves a previous transcription from Deepgram's request history API.
|
|
6688
|
+
* Requires `projectId` to be set during initialization.
|
|
6687
6689
|
*
|
|
6688
|
-
*
|
|
6689
|
-
*
|
|
6690
|
+
* **Important:** Deepgram's request history is best-effort. Requests may
|
|
6691
|
+
* expire or be unavailable depending on your plan and retention settings.
|
|
6692
|
+
* This is NOT a durable transcript store — for reliable retrieval, use
|
|
6693
|
+
* callback mode (`webhookUrl`) and persist the webhook payload yourself.
|
|
6690
6694
|
*
|
|
6691
|
-
*
|
|
6692
|
-
*
|
|
6693
|
-
*
|
|
6694
|
-
* adapter.initialize({
|
|
6695
|
-
* apiKey: process.env.DEEPGRAM_API_KEY,
|
|
6696
|
-
* projectId: process.env.DEEPGRAM_PROJECT_ID
|
|
6697
|
-
* })
|
|
6695
|
+
* The response field on the request history entry is cast to
|
|
6696
|
+
* `ListenV1Response` — this appears to work in practice but is not
|
|
6697
|
+
* explicitly documented by Deepgram as a guaranteed contract.
|
|
6698
6698
|
*
|
|
6699
|
-
*
|
|
6700
|
-
* if
|
|
6701
|
-
* console.log(result.data?.text)
|
|
6702
|
-
* console.log(result.data?.words)
|
|
6703
|
-
* }
|
|
6704
|
-
* ```
|
|
6699
|
+
* @param transcriptId - Request ID from a previous transcription
|
|
6700
|
+
* @returns Transcript response if still available in request history
|
|
6705
6701
|
*
|
|
6706
6702
|
* @see https://developers.deepgram.com/reference/get-request
|
|
6707
6703
|
*/
|
|
@@ -8784,8 +8780,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8784
8780
|
super(...arguments);
|
|
8785
8781
|
this.name = "speechmatics";
|
|
8786
8782
|
this.capabilities = {
|
|
8787
|
-
streaming:
|
|
8788
|
-
// Batch only (streaming available via separate WebSocket API)
|
|
8783
|
+
streaming: true,
|
|
8789
8784
|
diarization: true,
|
|
8790
8785
|
wordTimestamps: true,
|
|
8791
8786
|
languageDetection: false,
|
|
@@ -9031,6 +9026,271 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9031
9026
|
throw error;
|
|
9032
9027
|
}
|
|
9033
9028
|
}
|
|
9029
|
+
/**
|
|
9030
|
+
* Get the regional WebSocket host for real-time streaming
|
|
9031
|
+
*
|
|
9032
|
+
* Speechmatics RT uses a different host pattern: {region}.rt.speechmatics.com
|
|
9033
|
+
*/
|
|
9034
|
+
getRegionalWsHost(region) {
|
|
9035
|
+
const regionPrefix = region || "eu1";
|
|
9036
|
+
return `${regionPrefix}.rt.speechmatics.com`;
|
|
9037
|
+
}
|
|
9038
|
+
/**
|
|
9039
|
+
* Stream audio for real-time transcription
|
|
9040
|
+
*
|
|
9041
|
+
* Creates a WebSocket connection to the Speechmatics Real-Time API.
|
|
9042
|
+
* Protocol: send StartRecognition config, then AddAudio binary frames,
|
|
9043
|
+
* receive AddPartialTranscript/AddTranscript/EndOfUtterance messages.
|
|
9044
|
+
*
|
|
9045
|
+
* @param options - Streaming configuration
|
|
9046
|
+
* @param callbacks - Event callbacks
|
|
9047
|
+
* @returns StreamingSession for sending audio and closing
|
|
9048
|
+
*
|
|
9049
|
+
* @see https://docs.speechmatics.com/rt-api-ref
|
|
9050
|
+
*/
|
|
9051
|
+
async transcribeStream(options, callbacks) {
|
|
9052
|
+
this.validateConfig();
|
|
9053
|
+
const sessionId = `speechmatics_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9054
|
+
const createdAt = /* @__PURE__ */ new Date();
|
|
9055
|
+
const smOpts = options?.speechmaticsStreaming;
|
|
9056
|
+
const region = smOpts?.region || this.config?.region;
|
|
9057
|
+
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost(region)}`);
|
|
9058
|
+
const wsUrl = `${wsBase}/v2`;
|
|
9059
|
+
let status = "connecting";
|
|
9060
|
+
let recognitionStarted = false;
|
|
9061
|
+
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : __require("ws");
|
|
9062
|
+
const ws = new WebSocketImpl(wsUrl);
|
|
9063
|
+
const language = smOpts?.language || options?.language || "en";
|
|
9064
|
+
const transcriptionConfig = {
|
|
9065
|
+
language,
|
|
9066
|
+
enable_entities: smOpts?.enableEntities ?? options?.entityDetection ?? false,
|
|
9067
|
+
enable_partials: smOpts?.enablePartials ?? options?.interimResults !== false,
|
|
9068
|
+
operating_point: smOpts?.operatingPoint || OperatingPoint.enhanced,
|
|
9069
|
+
...smOpts?.maxDelay !== void 0 && { max_delay: smOpts.maxDelay },
|
|
9070
|
+
...smOpts?.maxDelayMode && {
|
|
9071
|
+
max_delay_mode: smOpts.maxDelayMode
|
|
9072
|
+
},
|
|
9073
|
+
...smOpts?.domain && { domain: smOpts.domain },
|
|
9074
|
+
...(options?.diarization || smOpts?.diarization === TranscriptionConfigDiarization.speaker) && {
|
|
9075
|
+
diarization: TranscriptionConfigDiarization.speaker,
|
|
9076
|
+
...smOpts?.maxSpeakers !== void 0 && {
|
|
9077
|
+
speaker_diarization_config: { max_speakers: smOpts.maxSpeakers }
|
|
9078
|
+
}
|
|
9079
|
+
},
|
|
9080
|
+
...(options?.customVocabulary?.length || smOpts?.additionalVocab?.length) && {
|
|
9081
|
+
additional_vocab: (smOpts?.additionalVocab || options?.customVocabulary || []).map(
|
|
9082
|
+
(term) => ({ content: term })
|
|
9083
|
+
)
|
|
9084
|
+
}
|
|
9085
|
+
};
|
|
9086
|
+
const startRecognition = {
|
|
9087
|
+
message: "StartRecognition",
|
|
9088
|
+
audio_format: {
|
|
9089
|
+
type: "raw",
|
|
9090
|
+
encoding: smOpts?.encoding || "pcm_s16le",
|
|
9091
|
+
sample_rate: smOpts?.sampleRate || options?.sampleRate || 16e3
|
|
9092
|
+
},
|
|
9093
|
+
transcription_config: transcriptionConfig,
|
|
9094
|
+
...smOpts?.conversationConfig && {
|
|
9095
|
+
conversation_config: {
|
|
9096
|
+
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9097
|
+
}
|
|
9098
|
+
}
|
|
9099
|
+
};
|
|
9100
|
+
ws.onopen = () => {
|
|
9101
|
+
status = "open";
|
|
9102
|
+
const msg = JSON.stringify(startRecognition);
|
|
9103
|
+
if (callbacks?.onRawMessage) {
|
|
9104
|
+
callbacks.onRawMessage({
|
|
9105
|
+
provider: this.name,
|
|
9106
|
+
direction: "outgoing",
|
|
9107
|
+
timestamp: Date.now(),
|
|
9108
|
+
payload: msg,
|
|
9109
|
+
messageType: "StartRecognition"
|
|
9110
|
+
});
|
|
9111
|
+
}
|
|
9112
|
+
ws.send(msg);
|
|
9113
|
+
};
|
|
9114
|
+
ws.onmessage = (event) => {
|
|
9115
|
+
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
9116
|
+
try {
|
|
9117
|
+
const data = JSON.parse(rawPayload);
|
|
9118
|
+
const messageType = data.message;
|
|
9119
|
+
if (callbacks?.onRawMessage) {
|
|
9120
|
+
callbacks.onRawMessage({
|
|
9121
|
+
provider: this.name,
|
|
9122
|
+
direction: "incoming",
|
|
9123
|
+
timestamp: Date.now(),
|
|
9124
|
+
payload: rawPayload,
|
|
9125
|
+
messageType
|
|
9126
|
+
});
|
|
9127
|
+
}
|
|
9128
|
+
switch (messageType) {
|
|
9129
|
+
case "RecognitionStarted": {
|
|
9130
|
+
recognitionStarted = true;
|
|
9131
|
+
callbacks?.onOpen?.();
|
|
9132
|
+
callbacks?.onMetadata?.({
|
|
9133
|
+
id: data.id,
|
|
9134
|
+
languagePackInfo: data.language_pack_info
|
|
9135
|
+
});
|
|
9136
|
+
break;
|
|
9137
|
+
}
|
|
9138
|
+
case "AddPartialTranscript": {
|
|
9139
|
+
const partial = data;
|
|
9140
|
+
const words = this.resultsToWords(partial.results);
|
|
9141
|
+
callbacks?.onTranscript?.({
|
|
9142
|
+
type: "transcript",
|
|
9143
|
+
text: partial.metadata.transcript,
|
|
9144
|
+
isFinal: false,
|
|
9145
|
+
words,
|
|
9146
|
+
speaker: words[0]?.speaker,
|
|
9147
|
+
confidence: partial.results[0]?.alternatives?.[0]?.confidence,
|
|
9148
|
+
channel: partial.channel ? parseInt(partial.channel) : void 0
|
|
9149
|
+
});
|
|
9150
|
+
break;
|
|
9151
|
+
}
|
|
9152
|
+
case "AddTranscript": {
|
|
9153
|
+
const final = data;
|
|
9154
|
+
const words = this.resultsToWords(final.results);
|
|
9155
|
+
callbacks?.onTranscript?.({
|
|
9156
|
+
type: "transcript",
|
|
9157
|
+
text: final.metadata.transcript,
|
|
9158
|
+
isFinal: true,
|
|
9159
|
+
words,
|
|
9160
|
+
speaker: words[0]?.speaker,
|
|
9161
|
+
confidence: final.results[0]?.alternatives?.[0]?.confidence,
|
|
9162
|
+
channel: final.channel ? parseInt(final.channel) : void 0
|
|
9163
|
+
});
|
|
9164
|
+
if (options?.diarization || smOpts?.diarization === "speaker") {
|
|
9165
|
+
const utterances = buildUtterancesFromWords(words);
|
|
9166
|
+
for (const utterance of utterances) {
|
|
9167
|
+
callbacks?.onUtterance?.(utterance);
|
|
9168
|
+
}
|
|
9169
|
+
}
|
|
9170
|
+
break;
|
|
9171
|
+
}
|
|
9172
|
+
case "EndOfUtterance": {
|
|
9173
|
+
break;
|
|
9174
|
+
}
|
|
9175
|
+
case "EndOfTranscript": {
|
|
9176
|
+
callbacks?.onClose?.(1e3, "Transcription complete");
|
|
9177
|
+
break;
|
|
9178
|
+
}
|
|
9179
|
+
case "Error": {
|
|
9180
|
+
const err = data;
|
|
9181
|
+
callbacks?.onError?.({
|
|
9182
|
+
code: err.type || "SPEECHMATICS_ERROR",
|
|
9183
|
+
message: err.reason || "Unknown error"
|
|
9184
|
+
});
|
|
9185
|
+
break;
|
|
9186
|
+
}
|
|
9187
|
+
case "Warning": {
|
|
9188
|
+
const warn = data;
|
|
9189
|
+
callbacks?.onMetadata?.({
|
|
9190
|
+
warning: warn.type,
|
|
9191
|
+
reason: warn.reason
|
|
9192
|
+
});
|
|
9193
|
+
break;
|
|
9194
|
+
}
|
|
9195
|
+
case "Info": {
|
|
9196
|
+
callbacks?.onMetadata?.(data);
|
|
9197
|
+
break;
|
|
9198
|
+
}
|
|
9199
|
+
case "AudioAdded":
|
|
9200
|
+
case "ChannelAudioAdded":
|
|
9201
|
+
break;
|
|
9202
|
+
default:
|
|
9203
|
+
callbacks?.onMetadata?.(data);
|
|
9204
|
+
break;
|
|
9205
|
+
}
|
|
9206
|
+
} catch (error) {
|
|
9207
|
+
callbacks?.onError?.({
|
|
9208
|
+
code: "PARSE_ERROR",
|
|
9209
|
+
message: `Failed to parse message: ${error}`
|
|
9210
|
+
});
|
|
9211
|
+
}
|
|
9212
|
+
};
|
|
9213
|
+
ws.onerror = () => {
|
|
9214
|
+
callbacks?.onError?.({
|
|
9215
|
+
code: "WEBSOCKET_ERROR",
|
|
9216
|
+
message: "WebSocket error occurred"
|
|
9217
|
+
});
|
|
9218
|
+
};
|
|
9219
|
+
ws.onclose = (event) => {
|
|
9220
|
+
status = "closed";
|
|
9221
|
+
callbacks?.onClose?.(event.code, event.reason);
|
|
9222
|
+
};
|
|
9223
|
+
await new Promise((resolve, reject) => {
|
|
9224
|
+
const timeout = setTimeout(() => {
|
|
9225
|
+
reject(new Error("WebSocket connection timeout"));
|
|
9226
|
+
}, 1e4);
|
|
9227
|
+
const checkReady = () => {
|
|
9228
|
+
if (recognitionStarted) {
|
|
9229
|
+
clearTimeout(timeout);
|
|
9230
|
+
resolve();
|
|
9231
|
+
} else if (status === "closed") {
|
|
9232
|
+
clearTimeout(timeout);
|
|
9233
|
+
reject(new Error("WebSocket connection failed"));
|
|
9234
|
+
} else {
|
|
9235
|
+
setTimeout(checkReady, 100);
|
|
9236
|
+
}
|
|
9237
|
+
};
|
|
9238
|
+
checkReady();
|
|
9239
|
+
});
|
|
9240
|
+
return {
|
|
9241
|
+
id: sessionId,
|
|
9242
|
+
provider: this.name,
|
|
9243
|
+
createdAt,
|
|
9244
|
+
getStatus: () => status,
|
|
9245
|
+
sendAudio: async (chunk) => {
|
|
9246
|
+
if (status !== "open") {
|
|
9247
|
+
throw new Error("Session is not open");
|
|
9248
|
+
}
|
|
9249
|
+
if (callbacks?.onRawMessage) {
|
|
9250
|
+
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9251
|
+
chunk.data.byteOffset,
|
|
9252
|
+
chunk.data.byteOffset + chunk.data.byteLength
|
|
9253
|
+
);
|
|
9254
|
+
callbacks.onRawMessage({
|
|
9255
|
+
provider: this.name,
|
|
9256
|
+
direction: "outgoing",
|
|
9257
|
+
timestamp: Date.now(),
|
|
9258
|
+
payload: audioPayload,
|
|
9259
|
+
messageType: "audio"
|
|
9260
|
+
});
|
|
9261
|
+
}
|
|
9262
|
+
ws.send(chunk.data);
|
|
9263
|
+
},
|
|
9264
|
+
close: async () => {
|
|
9265
|
+
if (status === "open") {
|
|
9266
|
+
status = "closing";
|
|
9267
|
+
const endMsg = JSON.stringify({ message: "EndOfStream", last_seq_no: 0 });
|
|
9268
|
+
if (callbacks?.onRawMessage) {
|
|
9269
|
+
callbacks.onRawMessage({
|
|
9270
|
+
provider: this.name,
|
|
9271
|
+
direction: "outgoing",
|
|
9272
|
+
timestamp: Date.now(),
|
|
9273
|
+
payload: endMsg,
|
|
9274
|
+
messageType: "EndOfStream"
|
|
9275
|
+
});
|
|
9276
|
+
}
|
|
9277
|
+
ws.send(endMsg);
|
|
9278
|
+
}
|
|
9279
|
+
}
|
|
9280
|
+
};
|
|
9281
|
+
}
|
|
9282
|
+
/**
|
|
9283
|
+
* Convert Speechmatics RecognitionResult[] to unified Word[]
|
|
9284
|
+
*/
|
|
9285
|
+
resultsToWords(results) {
|
|
9286
|
+
return results.filter((r) => r.type === "word").map((r) => ({
|
|
9287
|
+
word: r.alternatives?.[0]?.content || "",
|
|
9288
|
+
start: r.start_time,
|
|
9289
|
+
end: r.end_time,
|
|
9290
|
+
confidence: r.alternatives?.[0]?.confidence,
|
|
9291
|
+
speaker: r.alternatives?.[0]?.speaker
|
|
9292
|
+
}));
|
|
9293
|
+
}
|
|
9034
9294
|
/**
|
|
9035
9295
|
* Normalize Speechmatics status to unified status
|
|
9036
9296
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9450,7 +9710,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9450
9710
|
let messageType;
|
|
9451
9711
|
try {
|
|
9452
9712
|
const data = JSON.parse(rawPayload);
|
|
9453
|
-
const errorMessage = data.error_message
|
|
9713
|
+
const errorMessage = data.error_message;
|
|
9454
9714
|
if (errorMessage) {
|
|
9455
9715
|
messageType = "error";
|
|
9456
9716
|
} else if (data.finished) {
|
|
@@ -9809,7 +10069,15 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9809
10069
|
/**
|
|
9810
10070
|
* Submit audio for transcription
|
|
9811
10071
|
*
|
|
9812
|
-
* ElevenLabs batch is synchronous
|
|
10072
|
+
* ElevenLabs batch is normally synchronous — the API returns results directly.
|
|
10073
|
+
*
|
|
10074
|
+
* **Webhook mode:** When `webhookUrl` is set (or `elevenlabs.webhook` is true),
|
|
10075
|
+
* the request is processed asynchronously. ElevenLabs returns a 202 with a
|
|
10076
|
+
* `request_id` and delivers results to a webhook configured in the ElevenLabs
|
|
10077
|
+
* dashboard. The unified `webhookUrl` acts as an intent flag to enable async
|
|
10078
|
+
* mode — the actual delivery destination must be pre-configured in your
|
|
10079
|
+
* ElevenLabs dashboard. Use `elevenlabs.webhook_id` to target a specific
|
|
10080
|
+
* webhook endpoint.
|
|
9813
10081
|
*/
|
|
9814
10082
|
async transcribe(audio, options) {
|
|
9815
10083
|
this.validateConfig();
|
|
@@ -9832,6 +10100,11 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9832
10100
|
}
|
|
9833
10101
|
};
|
|
9834
10102
|
}
|
|
10103
|
+
const elevenlabsOpts = options?.elevenlabs;
|
|
10104
|
+
const useWebhook = options?.webhookUrl || elevenlabsOpts?.webhook;
|
|
10105
|
+
if (useWebhook) {
|
|
10106
|
+
formData.append("webhook", "true");
|
|
10107
|
+
}
|
|
9835
10108
|
if (options?.language) {
|
|
9836
10109
|
formData.append("language_code", options.language);
|
|
9837
10110
|
}
|
|
@@ -9850,7 +10123,6 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9850
10123
|
if (options?.entityDetection) {
|
|
9851
10124
|
formData.append("entity_detection", "all");
|
|
9852
10125
|
}
|
|
9853
|
-
const elevenlabsOpts = options?.elevenlabs;
|
|
9854
10126
|
if (elevenlabsOpts) {
|
|
9855
10127
|
for (const [key, value] of Object.entries(elevenlabsOpts)) {
|
|
9856
10128
|
if (value === void 0 || value === null) continue;
|
|
@@ -9873,6 +10145,22 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9873
10145
|
"Content-Type": "multipart/form-data"
|
|
9874
10146
|
}
|
|
9875
10147
|
});
|
|
10148
|
+
if (useWebhook) {
|
|
10149
|
+
const ack = response.data;
|
|
10150
|
+
return {
|
|
10151
|
+
success: true,
|
|
10152
|
+
provider: this.name,
|
|
10153
|
+
data: {
|
|
10154
|
+
id: ack.request_id || ack.transcription_id || `elevenlabs_${Date.now()}`,
|
|
10155
|
+
text: "",
|
|
10156
|
+
status: "queued"
|
|
10157
|
+
},
|
|
10158
|
+
tracking: {
|
|
10159
|
+
requestId: ack.request_id
|
|
10160
|
+
},
|
|
10161
|
+
raw: response.data
|
|
10162
|
+
};
|
|
10163
|
+
}
|
|
9876
10164
|
return this.normalizeResponse(response.data);
|
|
9877
10165
|
} catch (error) {
|
|
9878
10166
|
return this.createErrorResponse(error);
|
|
@@ -9965,20 +10253,9 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9965
10253
|
ws.onmessage = (event) => {
|
|
9966
10254
|
receivedData = true;
|
|
9967
10255
|
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
9968
|
-
let messageType;
|
|
9969
10256
|
try {
|
|
9970
10257
|
const data = JSON.parse(rawPayload);
|
|
9971
|
-
|
|
9972
|
-
messageType = "error";
|
|
9973
|
-
} else if (data.message_type === "session_started") {
|
|
9974
|
-
messageType = "session_started";
|
|
9975
|
-
} else if (data.message_type === "partial_transcript") {
|
|
9976
|
-
messageType = "partial_transcript";
|
|
9977
|
-
} else if (data.message_type === "committed_transcript") {
|
|
9978
|
-
messageType = "committed_transcript";
|
|
9979
|
-
} else if (data.message_type === "committed_transcript_with_timestamps") {
|
|
9980
|
-
messageType = "committed_transcript_with_timestamps";
|
|
9981
|
-
}
|
|
10258
|
+
const messageType = "error" in data ? "error" : data.message_type;
|
|
9982
10259
|
if (callbacks?.onRawMessage) {
|
|
9983
10260
|
callbacks.onRawMessage({
|
|
9984
10261
|
provider: this.name,
|
|
@@ -9988,50 +10265,62 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9988
10265
|
messageType
|
|
9989
10266
|
});
|
|
9990
10267
|
}
|
|
9991
|
-
if (data
|
|
10268
|
+
if ("error" in data) {
|
|
9992
10269
|
callbacks?.onError?.({
|
|
9993
|
-
code: data.
|
|
10270
|
+
code: data.message_type || "STREAM_ERROR",
|
|
9994
10271
|
message: data.error
|
|
9995
10272
|
});
|
|
9996
10273
|
return;
|
|
9997
10274
|
}
|
|
9998
|
-
|
|
9999
|
-
|
|
10000
|
-
|
|
10001
|
-
|
|
10002
|
-
|
|
10003
|
-
|
|
10004
|
-
|
|
10005
|
-
|
|
10006
|
-
|
|
10007
|
-
|
|
10008
|
-
|
|
10009
|
-
|
|
10010
|
-
|
|
10011
|
-
|
|
10012
|
-
|
|
10013
|
-
|
|
10014
|
-
|
|
10015
|
-
|
|
10016
|
-
|
|
10017
|
-
|
|
10018
|
-
|
|
10019
|
-
|
|
10020
|
-
|
|
10021
|
-
|
|
10022
|
-
|
|
10023
|
-
|
|
10024
|
-
|
|
10025
|
-
|
|
10026
|
-
|
|
10027
|
-
|
|
10028
|
-
|
|
10029
|
-
|
|
10030
|
-
|
|
10031
|
-
|
|
10032
|
-
|
|
10033
|
-
|
|
10275
|
+
switch (data.message_type) {
|
|
10276
|
+
case "session_started":
|
|
10277
|
+
break;
|
|
10278
|
+
case "partial_transcript": {
|
|
10279
|
+
const streamEvent = {
|
|
10280
|
+
type: "transcript",
|
|
10281
|
+
text: data.text || "",
|
|
10282
|
+
isFinal: false,
|
|
10283
|
+
confidence: void 0
|
|
10284
|
+
};
|
|
10285
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10286
|
+
break;
|
|
10287
|
+
}
|
|
10288
|
+
case "committed_transcript": {
|
|
10289
|
+
const streamEvent = {
|
|
10290
|
+
type: "transcript",
|
|
10291
|
+
text: data.text || "",
|
|
10292
|
+
isFinal: true,
|
|
10293
|
+
confidence: void 0
|
|
10294
|
+
};
|
|
10295
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10296
|
+
break;
|
|
10297
|
+
}
|
|
10298
|
+
case "committed_transcript_with_timestamps": {
|
|
10299
|
+
const tsData = data;
|
|
10300
|
+
const words = tsData.words ? tsData.words.map((w) => ({
|
|
10301
|
+
word: w.text || "",
|
|
10302
|
+
start: w.start || 0,
|
|
10303
|
+
end: w.end || 0,
|
|
10304
|
+
confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
|
|
10305
|
+
speaker: w.speaker_id
|
|
10306
|
+
})) : [];
|
|
10307
|
+
const streamEvent = {
|
|
10308
|
+
type: "transcript",
|
|
10309
|
+
text: tsData.text || "",
|
|
10310
|
+
isFinal: true,
|
|
10311
|
+
words: words.length > 0 ? words : void 0,
|
|
10312
|
+
speaker: words[0]?.speaker,
|
|
10313
|
+
language: tsData.language_code,
|
|
10314
|
+
confidence: void 0
|
|
10315
|
+
};
|
|
10316
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10317
|
+
if (options?.diarization && words.length > 0) {
|
|
10318
|
+
const utterances = buildUtterancesFromWords(words);
|
|
10319
|
+
for (const utterance of utterances) {
|
|
10320
|
+
callbacks?.onUtterance?.(utterance);
|
|
10321
|
+
}
|
|
10034
10322
|
}
|
|
10323
|
+
break;
|
|
10035
10324
|
}
|
|
10036
10325
|
}
|
|
10037
10326
|
} catch (error) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "voice-router-dev",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.2",
|
|
4
4
|
"description": "Universal speech-to-text router for Gladia, AssemblyAI, Deepgram, Azure, OpenAI Whisper, Speechmatics, Soniox, and ElevenLabs",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|