voice-router-dev 0.9.1 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -0
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/index.d.mts +181 -153
- package/dist/index.d.ts +181 -153
- package/dist/index.js +377 -78
- package/dist/index.mjs +375 -78
- package/package.json +1 -1
- package/dist/{field-configs-CH0lgAe8.d.mts → field-configs-FbtCPxzs.d.mts} +60 -60
- package/dist/{field-configs-CH0lgAe8.d.ts → field-configs-FbtCPxzs.d.ts} +60 -60
package/dist/index.mjs
CHANGED
|
@@ -2591,6 +2591,12 @@ var AssemblyAISampleRate = {
|
|
|
2591
2591
|
rate48000: 48e3
|
|
2592
2592
|
};
|
|
2593
2593
|
var AssemblyAIStatus = TranscriptStatus;
|
|
2594
|
+
var AssemblyAIRegion = {
|
|
2595
|
+
/** United States (default) */
|
|
2596
|
+
us: "us",
|
|
2597
|
+
/** European Union — data never leaves the EU */
|
|
2598
|
+
eu: "eu"
|
|
2599
|
+
};
|
|
2594
2600
|
var GladiaStatus = TranscriptionControllerListV2StatusItem;
|
|
2595
2601
|
var DeepgramStatus = V1ProjectsProjectIdRequestsGetParametersStatus;
|
|
2596
2602
|
var SpeechmaticsRegion = {
|
|
@@ -6566,9 +6572,13 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6566
6572
|
* Submit audio for transcription
|
|
6567
6573
|
*
|
|
6568
6574
|
* Sends audio to Deepgram API for transcription. Deepgram normally processes
|
|
6569
|
-
* synchronously and returns results immediately.
|
|
6570
|
-
*
|
|
6571
|
-
*
|
|
6575
|
+
* synchronously and returns results immediately.
|
|
6576
|
+
*
|
|
6577
|
+
* **Callback mode:** When `webhookUrl` is set, Deepgram returns immediately
|
|
6578
|
+
* with a `request_id` (status `"queued"`). The full transcript is POSTed to
|
|
6579
|
+
* the webhook URL — this is the primary delivery mechanism. `getTranscript()`
|
|
6580
|
+
* can attempt to retrieve the result later via request history, but that
|
|
6581
|
+
* endpoint is best-effort and not a guaranteed durable store.
|
|
6572
6582
|
*
|
|
6573
6583
|
* @param audio - Audio input (URL or file buffer)
|
|
6574
6584
|
* @param options - Transcription options
|
|
@@ -6678,30 +6688,22 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6678
6688
|
}
|
|
6679
6689
|
}
|
|
6680
6690
|
/**
|
|
6681
|
-
* Get transcription result by ID
|
|
6691
|
+
* Get transcription result by ID (best-effort)
|
|
6682
6692
|
*
|
|
6683
|
-
* Retrieves a previous transcription from Deepgram's request history.
|
|
6693
|
+
* Retrieves a previous transcription from Deepgram's request history API.
|
|
6694
|
+
* Requires `projectId` to be set during initialization.
|
|
6684
6695
|
*
|
|
6685
|
-
*
|
|
6686
|
-
*
|
|
6696
|
+
* **Important:** Deepgram's request history is best-effort. Requests may
|
|
6697
|
+
* expire or be unavailable depending on your plan and retention settings.
|
|
6698
|
+
* This is NOT a durable transcript store — for reliable retrieval, use
|
|
6699
|
+
* callback mode (`webhookUrl`) and persist the webhook payload yourself.
|
|
6687
6700
|
*
|
|
6688
|
-
*
|
|
6689
|
-
*
|
|
6701
|
+
* The response field on the request history entry is cast to
|
|
6702
|
+
* `ListenV1Response` — this appears to work in practice but is not
|
|
6703
|
+
* explicitly documented by Deepgram as a guaranteed contract.
|
|
6690
6704
|
*
|
|
6691
|
-
* @
|
|
6692
|
-
*
|
|
6693
|
-
* const adapter = new DeepgramAdapter()
|
|
6694
|
-
* adapter.initialize({
|
|
6695
|
-
* apiKey: process.env.DEEPGRAM_API_KEY,
|
|
6696
|
-
* projectId: process.env.DEEPGRAM_PROJECT_ID
|
|
6697
|
-
* })
|
|
6698
|
-
*
|
|
6699
|
-
* const result = await adapter.getTranscript('abc123-request-id')
|
|
6700
|
-
* if (result.success) {
|
|
6701
|
-
* console.log(result.data?.text)
|
|
6702
|
-
* console.log(result.data?.words)
|
|
6703
|
-
* }
|
|
6704
|
-
* ```
|
|
6705
|
+
* @param transcriptId - Request ID from a previous transcription
|
|
6706
|
+
* @returns Transcript response if still available in request history
|
|
6705
6707
|
*
|
|
6706
6708
|
* @see https://developers.deepgram.com/reference/get-request
|
|
6707
6709
|
*/
|
|
@@ -8784,8 +8786,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8784
8786
|
super(...arguments);
|
|
8785
8787
|
this.name = "speechmatics";
|
|
8786
8788
|
this.capabilities = {
|
|
8787
|
-
streaming:
|
|
8788
|
-
// Batch only (streaming available via separate WebSocket API)
|
|
8789
|
+
streaming: true,
|
|
8789
8790
|
diarization: true,
|
|
8790
8791
|
wordTimestamps: true,
|
|
8791
8792
|
languageDetection: false,
|
|
@@ -9031,6 +9032,271 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9031
9032
|
throw error;
|
|
9032
9033
|
}
|
|
9033
9034
|
}
|
|
9035
|
+
/**
|
|
9036
|
+
* Get the regional WebSocket host for real-time streaming
|
|
9037
|
+
*
|
|
9038
|
+
* Speechmatics RT uses a different host pattern: {region}.rt.speechmatics.com
|
|
9039
|
+
*/
|
|
9040
|
+
getRegionalWsHost(region) {
|
|
9041
|
+
const regionPrefix = region || "eu1";
|
|
9042
|
+
return `${regionPrefix}.rt.speechmatics.com`;
|
|
9043
|
+
}
|
|
9044
|
+
/**
|
|
9045
|
+
* Stream audio for real-time transcription
|
|
9046
|
+
*
|
|
9047
|
+
* Creates a WebSocket connection to the Speechmatics Real-Time API.
|
|
9048
|
+
* Protocol: send StartRecognition config, then AddAudio binary frames,
|
|
9049
|
+
* receive AddPartialTranscript/AddTranscript/EndOfUtterance messages.
|
|
9050
|
+
*
|
|
9051
|
+
* @param options - Streaming configuration
|
|
9052
|
+
* @param callbacks - Event callbacks
|
|
9053
|
+
* @returns StreamingSession for sending audio and closing
|
|
9054
|
+
*
|
|
9055
|
+
* @see https://docs.speechmatics.com/rt-api-ref
|
|
9056
|
+
*/
|
|
9057
|
+
async transcribeStream(options, callbacks) {
|
|
9058
|
+
this.validateConfig();
|
|
9059
|
+
const sessionId = `speechmatics_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9060
|
+
const createdAt = /* @__PURE__ */ new Date();
|
|
9061
|
+
const smOpts = options?.speechmaticsStreaming;
|
|
9062
|
+
const region = smOpts?.region || this.config?.region;
|
|
9063
|
+
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost(region)}`);
|
|
9064
|
+
const wsUrl = `${wsBase}/v2`;
|
|
9065
|
+
let status = "connecting";
|
|
9066
|
+
let recognitionStarted = false;
|
|
9067
|
+
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : __require("ws");
|
|
9068
|
+
const ws = new WebSocketImpl(wsUrl);
|
|
9069
|
+
const language = smOpts?.language || options?.language || "en";
|
|
9070
|
+
const transcriptionConfig = {
|
|
9071
|
+
language,
|
|
9072
|
+
enable_entities: smOpts?.enableEntities ?? options?.entityDetection ?? false,
|
|
9073
|
+
enable_partials: smOpts?.enablePartials ?? options?.interimResults !== false,
|
|
9074
|
+
operating_point: smOpts?.operatingPoint || OperatingPoint.enhanced,
|
|
9075
|
+
...smOpts?.maxDelay !== void 0 && { max_delay: smOpts.maxDelay },
|
|
9076
|
+
...smOpts?.maxDelayMode && {
|
|
9077
|
+
max_delay_mode: smOpts.maxDelayMode
|
|
9078
|
+
},
|
|
9079
|
+
...smOpts?.domain && { domain: smOpts.domain },
|
|
9080
|
+
...(options?.diarization || smOpts?.diarization === TranscriptionConfigDiarization.speaker) && {
|
|
9081
|
+
diarization: TranscriptionConfigDiarization.speaker,
|
|
9082
|
+
...smOpts?.maxSpeakers !== void 0 && {
|
|
9083
|
+
speaker_diarization_config: { max_speakers: smOpts.maxSpeakers }
|
|
9084
|
+
}
|
|
9085
|
+
},
|
|
9086
|
+
...(options?.customVocabulary?.length || smOpts?.additionalVocab?.length) && {
|
|
9087
|
+
additional_vocab: (smOpts?.additionalVocab || options?.customVocabulary || []).map(
|
|
9088
|
+
(term) => ({ content: term })
|
|
9089
|
+
)
|
|
9090
|
+
}
|
|
9091
|
+
};
|
|
9092
|
+
const startRecognition = {
|
|
9093
|
+
message: "StartRecognition",
|
|
9094
|
+
audio_format: {
|
|
9095
|
+
type: "raw",
|
|
9096
|
+
encoding: smOpts?.encoding || "pcm_s16le",
|
|
9097
|
+
sample_rate: smOpts?.sampleRate || options?.sampleRate || 16e3
|
|
9098
|
+
},
|
|
9099
|
+
transcription_config: transcriptionConfig,
|
|
9100
|
+
...smOpts?.conversationConfig && {
|
|
9101
|
+
conversation_config: {
|
|
9102
|
+
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9103
|
+
}
|
|
9104
|
+
}
|
|
9105
|
+
};
|
|
9106
|
+
ws.onopen = () => {
|
|
9107
|
+
status = "open";
|
|
9108
|
+
const msg = JSON.stringify(startRecognition);
|
|
9109
|
+
if (callbacks?.onRawMessage) {
|
|
9110
|
+
callbacks.onRawMessage({
|
|
9111
|
+
provider: this.name,
|
|
9112
|
+
direction: "outgoing",
|
|
9113
|
+
timestamp: Date.now(),
|
|
9114
|
+
payload: msg,
|
|
9115
|
+
messageType: "StartRecognition"
|
|
9116
|
+
});
|
|
9117
|
+
}
|
|
9118
|
+
ws.send(msg);
|
|
9119
|
+
};
|
|
9120
|
+
ws.onmessage = (event) => {
|
|
9121
|
+
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
9122
|
+
try {
|
|
9123
|
+
const data = JSON.parse(rawPayload);
|
|
9124
|
+
const messageType = data.message;
|
|
9125
|
+
if (callbacks?.onRawMessage) {
|
|
9126
|
+
callbacks.onRawMessage({
|
|
9127
|
+
provider: this.name,
|
|
9128
|
+
direction: "incoming",
|
|
9129
|
+
timestamp: Date.now(),
|
|
9130
|
+
payload: rawPayload,
|
|
9131
|
+
messageType
|
|
9132
|
+
});
|
|
9133
|
+
}
|
|
9134
|
+
switch (messageType) {
|
|
9135
|
+
case "RecognitionStarted": {
|
|
9136
|
+
recognitionStarted = true;
|
|
9137
|
+
callbacks?.onOpen?.();
|
|
9138
|
+
callbacks?.onMetadata?.({
|
|
9139
|
+
id: data.id,
|
|
9140
|
+
languagePackInfo: data.language_pack_info
|
|
9141
|
+
});
|
|
9142
|
+
break;
|
|
9143
|
+
}
|
|
9144
|
+
case "AddPartialTranscript": {
|
|
9145
|
+
const partial = data;
|
|
9146
|
+
const words = this.resultsToWords(partial.results);
|
|
9147
|
+
callbacks?.onTranscript?.({
|
|
9148
|
+
type: "transcript",
|
|
9149
|
+
text: partial.metadata.transcript,
|
|
9150
|
+
isFinal: false,
|
|
9151
|
+
words,
|
|
9152
|
+
speaker: words[0]?.speaker,
|
|
9153
|
+
confidence: partial.results[0]?.alternatives?.[0]?.confidence,
|
|
9154
|
+
channel: partial.channel ? parseInt(partial.channel) : void 0
|
|
9155
|
+
});
|
|
9156
|
+
break;
|
|
9157
|
+
}
|
|
9158
|
+
case "AddTranscript": {
|
|
9159
|
+
const final = data;
|
|
9160
|
+
const words = this.resultsToWords(final.results);
|
|
9161
|
+
callbacks?.onTranscript?.({
|
|
9162
|
+
type: "transcript",
|
|
9163
|
+
text: final.metadata.transcript,
|
|
9164
|
+
isFinal: true,
|
|
9165
|
+
words,
|
|
9166
|
+
speaker: words[0]?.speaker,
|
|
9167
|
+
confidence: final.results[0]?.alternatives?.[0]?.confidence,
|
|
9168
|
+
channel: final.channel ? parseInt(final.channel) : void 0
|
|
9169
|
+
});
|
|
9170
|
+
if (options?.diarization || smOpts?.diarization === "speaker") {
|
|
9171
|
+
const utterances = buildUtterancesFromWords(words);
|
|
9172
|
+
for (const utterance of utterances) {
|
|
9173
|
+
callbacks?.onUtterance?.(utterance);
|
|
9174
|
+
}
|
|
9175
|
+
}
|
|
9176
|
+
break;
|
|
9177
|
+
}
|
|
9178
|
+
case "EndOfUtterance": {
|
|
9179
|
+
break;
|
|
9180
|
+
}
|
|
9181
|
+
case "EndOfTranscript": {
|
|
9182
|
+
callbacks?.onClose?.(1e3, "Transcription complete");
|
|
9183
|
+
break;
|
|
9184
|
+
}
|
|
9185
|
+
case "Error": {
|
|
9186
|
+
const err = data;
|
|
9187
|
+
callbacks?.onError?.({
|
|
9188
|
+
code: err.type || "SPEECHMATICS_ERROR",
|
|
9189
|
+
message: err.reason || "Unknown error"
|
|
9190
|
+
});
|
|
9191
|
+
break;
|
|
9192
|
+
}
|
|
9193
|
+
case "Warning": {
|
|
9194
|
+
const warn = data;
|
|
9195
|
+
callbacks?.onMetadata?.({
|
|
9196
|
+
warning: warn.type,
|
|
9197
|
+
reason: warn.reason
|
|
9198
|
+
});
|
|
9199
|
+
break;
|
|
9200
|
+
}
|
|
9201
|
+
case "Info": {
|
|
9202
|
+
callbacks?.onMetadata?.(data);
|
|
9203
|
+
break;
|
|
9204
|
+
}
|
|
9205
|
+
case "AudioAdded":
|
|
9206
|
+
case "ChannelAudioAdded":
|
|
9207
|
+
break;
|
|
9208
|
+
default:
|
|
9209
|
+
callbacks?.onMetadata?.(data);
|
|
9210
|
+
break;
|
|
9211
|
+
}
|
|
9212
|
+
} catch (error) {
|
|
9213
|
+
callbacks?.onError?.({
|
|
9214
|
+
code: "PARSE_ERROR",
|
|
9215
|
+
message: `Failed to parse message: ${error}`
|
|
9216
|
+
});
|
|
9217
|
+
}
|
|
9218
|
+
};
|
|
9219
|
+
ws.onerror = () => {
|
|
9220
|
+
callbacks?.onError?.({
|
|
9221
|
+
code: "WEBSOCKET_ERROR",
|
|
9222
|
+
message: "WebSocket error occurred"
|
|
9223
|
+
});
|
|
9224
|
+
};
|
|
9225
|
+
ws.onclose = (event) => {
|
|
9226
|
+
status = "closed";
|
|
9227
|
+
callbacks?.onClose?.(event.code, event.reason);
|
|
9228
|
+
};
|
|
9229
|
+
await new Promise((resolve, reject) => {
|
|
9230
|
+
const timeout = setTimeout(() => {
|
|
9231
|
+
reject(new Error("WebSocket connection timeout"));
|
|
9232
|
+
}, 1e4);
|
|
9233
|
+
const checkReady = () => {
|
|
9234
|
+
if (recognitionStarted) {
|
|
9235
|
+
clearTimeout(timeout);
|
|
9236
|
+
resolve();
|
|
9237
|
+
} else if (status === "closed") {
|
|
9238
|
+
clearTimeout(timeout);
|
|
9239
|
+
reject(new Error("WebSocket connection failed"));
|
|
9240
|
+
} else {
|
|
9241
|
+
setTimeout(checkReady, 100);
|
|
9242
|
+
}
|
|
9243
|
+
};
|
|
9244
|
+
checkReady();
|
|
9245
|
+
});
|
|
9246
|
+
return {
|
|
9247
|
+
id: sessionId,
|
|
9248
|
+
provider: this.name,
|
|
9249
|
+
createdAt,
|
|
9250
|
+
getStatus: () => status,
|
|
9251
|
+
sendAudio: async (chunk) => {
|
|
9252
|
+
if (status !== "open") {
|
|
9253
|
+
throw new Error("Session is not open");
|
|
9254
|
+
}
|
|
9255
|
+
if (callbacks?.onRawMessage) {
|
|
9256
|
+
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9257
|
+
chunk.data.byteOffset,
|
|
9258
|
+
chunk.data.byteOffset + chunk.data.byteLength
|
|
9259
|
+
);
|
|
9260
|
+
callbacks.onRawMessage({
|
|
9261
|
+
provider: this.name,
|
|
9262
|
+
direction: "outgoing",
|
|
9263
|
+
timestamp: Date.now(),
|
|
9264
|
+
payload: audioPayload,
|
|
9265
|
+
messageType: "audio"
|
|
9266
|
+
});
|
|
9267
|
+
}
|
|
9268
|
+
ws.send(chunk.data);
|
|
9269
|
+
},
|
|
9270
|
+
close: async () => {
|
|
9271
|
+
if (status === "open") {
|
|
9272
|
+
status = "closing";
|
|
9273
|
+
const endMsg = JSON.stringify({ message: "EndOfStream", last_seq_no: 0 });
|
|
9274
|
+
if (callbacks?.onRawMessage) {
|
|
9275
|
+
callbacks.onRawMessage({
|
|
9276
|
+
provider: this.name,
|
|
9277
|
+
direction: "outgoing",
|
|
9278
|
+
timestamp: Date.now(),
|
|
9279
|
+
payload: endMsg,
|
|
9280
|
+
messageType: "EndOfStream"
|
|
9281
|
+
});
|
|
9282
|
+
}
|
|
9283
|
+
ws.send(endMsg);
|
|
9284
|
+
}
|
|
9285
|
+
}
|
|
9286
|
+
};
|
|
9287
|
+
}
|
|
9288
|
+
/**
|
|
9289
|
+
* Convert Speechmatics RecognitionResult[] to unified Word[]
|
|
9290
|
+
*/
|
|
9291
|
+
resultsToWords(results) {
|
|
9292
|
+
return results.filter((r) => r.type === "word").map((r) => ({
|
|
9293
|
+
word: r.alternatives?.[0]?.content || "",
|
|
9294
|
+
start: r.start_time,
|
|
9295
|
+
end: r.end_time,
|
|
9296
|
+
confidence: r.alternatives?.[0]?.confidence,
|
|
9297
|
+
speaker: r.alternatives?.[0]?.speaker
|
|
9298
|
+
}));
|
|
9299
|
+
}
|
|
9034
9300
|
/**
|
|
9035
9301
|
* Normalize Speechmatics status to unified status
|
|
9036
9302
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9450,7 +9716,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9450
9716
|
let messageType;
|
|
9451
9717
|
try {
|
|
9452
9718
|
const data = JSON.parse(rawPayload);
|
|
9453
|
-
const errorMessage = data.error_message
|
|
9719
|
+
const errorMessage = data.error_message;
|
|
9454
9720
|
if (errorMessage) {
|
|
9455
9721
|
messageType = "error";
|
|
9456
9722
|
} else if (data.finished) {
|
|
@@ -9809,7 +10075,15 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9809
10075
|
/**
|
|
9810
10076
|
* Submit audio for transcription
|
|
9811
10077
|
*
|
|
9812
|
-
* ElevenLabs batch is synchronous
|
|
10078
|
+
* ElevenLabs batch is normally synchronous — the API returns results directly.
|
|
10079
|
+
*
|
|
10080
|
+
* **Webhook mode:** When `webhookUrl` is set (or `elevenlabs.webhook` is true),
|
|
10081
|
+
* the request is processed asynchronously. ElevenLabs returns a 202 with a
|
|
10082
|
+
* `request_id` and delivers results to a webhook configured in the ElevenLabs
|
|
10083
|
+
* dashboard. The unified `webhookUrl` acts as an intent flag to enable async
|
|
10084
|
+
* mode — the actual delivery destination must be pre-configured in your
|
|
10085
|
+
* ElevenLabs dashboard. Use `elevenlabs.webhook_id` to target a specific
|
|
10086
|
+
* webhook endpoint.
|
|
9813
10087
|
*/
|
|
9814
10088
|
async transcribe(audio, options) {
|
|
9815
10089
|
this.validateConfig();
|
|
@@ -9832,6 +10106,11 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9832
10106
|
}
|
|
9833
10107
|
};
|
|
9834
10108
|
}
|
|
10109
|
+
const elevenlabsOpts = options?.elevenlabs;
|
|
10110
|
+
const useWebhook = options?.webhookUrl || elevenlabsOpts?.webhook;
|
|
10111
|
+
if (useWebhook) {
|
|
10112
|
+
formData.append("webhook", "true");
|
|
10113
|
+
}
|
|
9835
10114
|
if (options?.language) {
|
|
9836
10115
|
formData.append("language_code", options.language);
|
|
9837
10116
|
}
|
|
@@ -9850,7 +10129,6 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9850
10129
|
if (options?.entityDetection) {
|
|
9851
10130
|
formData.append("entity_detection", "all");
|
|
9852
10131
|
}
|
|
9853
|
-
const elevenlabsOpts = options?.elevenlabs;
|
|
9854
10132
|
if (elevenlabsOpts) {
|
|
9855
10133
|
for (const [key, value] of Object.entries(elevenlabsOpts)) {
|
|
9856
10134
|
if (value === void 0 || value === null) continue;
|
|
@@ -9873,6 +10151,22 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9873
10151
|
"Content-Type": "multipart/form-data"
|
|
9874
10152
|
}
|
|
9875
10153
|
});
|
|
10154
|
+
if (useWebhook) {
|
|
10155
|
+
const ack = response.data;
|
|
10156
|
+
return {
|
|
10157
|
+
success: true,
|
|
10158
|
+
provider: this.name,
|
|
10159
|
+
data: {
|
|
10160
|
+
id: ack.request_id || ack.transcription_id || `elevenlabs_${Date.now()}`,
|
|
10161
|
+
text: "",
|
|
10162
|
+
status: "queued"
|
|
10163
|
+
},
|
|
10164
|
+
tracking: {
|
|
10165
|
+
requestId: ack.request_id
|
|
10166
|
+
},
|
|
10167
|
+
raw: response.data
|
|
10168
|
+
};
|
|
10169
|
+
}
|
|
9876
10170
|
return this.normalizeResponse(response.data);
|
|
9877
10171
|
} catch (error) {
|
|
9878
10172
|
return this.createErrorResponse(error);
|
|
@@ -9965,20 +10259,9 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9965
10259
|
ws.onmessage = (event) => {
|
|
9966
10260
|
receivedData = true;
|
|
9967
10261
|
const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
|
|
9968
|
-
let messageType;
|
|
9969
10262
|
try {
|
|
9970
10263
|
const data = JSON.parse(rawPayload);
|
|
9971
|
-
|
|
9972
|
-
messageType = "error";
|
|
9973
|
-
} else if (data.message_type === "session_started") {
|
|
9974
|
-
messageType = "session_started";
|
|
9975
|
-
} else if (data.message_type === "partial_transcript") {
|
|
9976
|
-
messageType = "partial_transcript";
|
|
9977
|
-
} else if (data.message_type === "committed_transcript") {
|
|
9978
|
-
messageType = "committed_transcript";
|
|
9979
|
-
} else if (data.message_type === "committed_transcript_with_timestamps") {
|
|
9980
|
-
messageType = "committed_transcript_with_timestamps";
|
|
9981
|
-
}
|
|
10264
|
+
const messageType = "error" in data ? "error" : data.message_type;
|
|
9982
10265
|
if (callbacks?.onRawMessage) {
|
|
9983
10266
|
callbacks.onRawMessage({
|
|
9984
10267
|
provider: this.name,
|
|
@@ -9988,50 +10271,62 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
9988
10271
|
messageType
|
|
9989
10272
|
});
|
|
9990
10273
|
}
|
|
9991
|
-
if (data
|
|
10274
|
+
if ("error" in data) {
|
|
9992
10275
|
callbacks?.onError?.({
|
|
9993
|
-
code: data.
|
|
10276
|
+
code: data.message_type || "STREAM_ERROR",
|
|
9994
10277
|
message: data.error
|
|
9995
10278
|
});
|
|
9996
10279
|
return;
|
|
9997
10280
|
}
|
|
9998
|
-
|
|
9999
|
-
|
|
10000
|
-
|
|
10001
|
-
|
|
10002
|
-
|
|
10003
|
-
|
|
10004
|
-
|
|
10005
|
-
|
|
10006
|
-
|
|
10007
|
-
|
|
10008
|
-
|
|
10009
|
-
|
|
10010
|
-
|
|
10011
|
-
|
|
10012
|
-
|
|
10013
|
-
|
|
10014
|
-
|
|
10015
|
-
|
|
10016
|
-
|
|
10017
|
-
|
|
10018
|
-
|
|
10019
|
-
|
|
10020
|
-
|
|
10021
|
-
|
|
10022
|
-
|
|
10023
|
-
|
|
10024
|
-
|
|
10025
|
-
|
|
10026
|
-
|
|
10027
|
-
|
|
10028
|
-
|
|
10029
|
-
|
|
10030
|
-
|
|
10031
|
-
|
|
10032
|
-
|
|
10033
|
-
|
|
10281
|
+
switch (data.message_type) {
|
|
10282
|
+
case "session_started":
|
|
10283
|
+
break;
|
|
10284
|
+
case "partial_transcript": {
|
|
10285
|
+
const streamEvent = {
|
|
10286
|
+
type: "transcript",
|
|
10287
|
+
text: data.text || "",
|
|
10288
|
+
isFinal: false,
|
|
10289
|
+
confidence: void 0
|
|
10290
|
+
};
|
|
10291
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10292
|
+
break;
|
|
10293
|
+
}
|
|
10294
|
+
case "committed_transcript": {
|
|
10295
|
+
const streamEvent = {
|
|
10296
|
+
type: "transcript",
|
|
10297
|
+
text: data.text || "",
|
|
10298
|
+
isFinal: true,
|
|
10299
|
+
confidence: void 0
|
|
10300
|
+
};
|
|
10301
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10302
|
+
break;
|
|
10303
|
+
}
|
|
10304
|
+
case "committed_transcript_with_timestamps": {
|
|
10305
|
+
const tsData = data;
|
|
10306
|
+
const words = tsData.words ? tsData.words.map((w) => ({
|
|
10307
|
+
word: w.text || "",
|
|
10308
|
+
start: w.start || 0,
|
|
10309
|
+
end: w.end || 0,
|
|
10310
|
+
confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
|
|
10311
|
+
speaker: w.speaker_id
|
|
10312
|
+
})) : [];
|
|
10313
|
+
const streamEvent = {
|
|
10314
|
+
type: "transcript",
|
|
10315
|
+
text: tsData.text || "",
|
|
10316
|
+
isFinal: true,
|
|
10317
|
+
words: words.length > 0 ? words : void 0,
|
|
10318
|
+
speaker: words[0]?.speaker,
|
|
10319
|
+
language: tsData.language_code,
|
|
10320
|
+
confidence: void 0
|
|
10321
|
+
};
|
|
10322
|
+
callbacks?.onTranscript?.(streamEvent);
|
|
10323
|
+
if (options?.diarization && words.length > 0) {
|
|
10324
|
+
const utterances = buildUtterancesFromWords(words);
|
|
10325
|
+
for (const utterance of utterances) {
|
|
10326
|
+
callbacks?.onUtterance?.(utterance);
|
|
10327
|
+
}
|
|
10034
10328
|
}
|
|
10329
|
+
break;
|
|
10035
10330
|
}
|
|
10036
10331
|
}
|
|
10037
10332
|
} catch (error) {
|
|
@@ -39233,6 +39528,7 @@ export {
|
|
|
39233
39528
|
AssemblyAILanguage,
|
|
39234
39529
|
AssemblyAILanguageCodes,
|
|
39235
39530
|
AssemblyAIListFilterSchema,
|
|
39531
|
+
AssemblyAIRegion,
|
|
39236
39532
|
AssemblyAISampleRate,
|
|
39237
39533
|
AssemblyAISpeechModel,
|
|
39238
39534
|
AssemblyAIStatus,
|
|
@@ -39283,6 +39579,7 @@ export {
|
|
|
39283
39579
|
ElevenLabsLanguageCodes,
|
|
39284
39580
|
ElevenLabsLanguageLabels,
|
|
39285
39581
|
ElevenLabsLanguages,
|
|
39582
|
+
ElevenLabsRegion,
|
|
39286
39583
|
schema_exports8 as ElevenLabsTypes,
|
|
39287
39584
|
elevenLabsSpeechToTextAPI_zod_exports as ElevenLabsZodSchemas,
|
|
39288
39585
|
GladiaAdapter,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "voice-router-dev",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.3",
|
|
4
4
|
"description": "Universal speech-to-text router for Gladia, AssemblyAI, Deepgram, Azure, OpenAI Whisper, Speechmatics, Soniox, and ElevenLabs",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|