voice-router-dev 0.8.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +169 -0
- package/README.md +21 -2
- package/dist/constants.d.mts +577 -7
- package/dist/constants.d.ts +577 -7
- package/dist/constants.js +493 -1
- package/dist/constants.mjs +482 -1
- package/dist/{field-configs-CDeDcDz_.d.mts → field-configs-DN2_WrYr.d.mts} +568 -568
- package/dist/{field-configs-CDeDcDz_.d.ts → field-configs-DN2_WrYr.d.ts} +568 -568
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/index.d.mts +2784 -1246
- package/dist/index.d.ts +2784 -1246
- package/dist/index.js +1466 -91
- package/dist/index.mjs +1458 -91
- package/dist/{provider-metadata-BHbouRC9.d.mts → provider-metadata-BnkedpXm.d.mts} +34 -4
- package/dist/{provider-metadata-Dsk2PVud.d.ts → provider-metadata-DbsSGAO7.d.ts} +34 -4
- package/dist/provider-metadata.d.mts +2 -2
- package/dist/provider-metadata.d.ts +2 -2
- package/dist/provider-metadata.js +349 -6
- package/dist/provider-metadata.mjs +345 -6
- package/dist/{transcriptWebhookNotification-Cz9RsK5D.d.mts → speechToTextChunkResponseModel-3IUnJXKx.d.mts} +975 -9
- package/dist/{transcriptWebhookNotification-D1iE2_a4.d.ts → speechToTextChunkResponseModel-DExUFZT3.d.ts} +975 -9
- package/dist/webhooks.d.mts +103 -5
- package/dist/webhooks.d.ts +103 -5
- package/dist/webhooks.js +342 -39
- package/dist/webhooks.mjs +340 -39
- package/package.json +14 -6
package/dist/webhooks.mjs
CHANGED
|
@@ -88,7 +88,7 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
|
|
|
88
88
|
end: utterance.end,
|
|
89
89
|
confidence: utterance.confidence,
|
|
90
90
|
speaker: utterance.speaker !== void 0 ? String(utterance.speaker) : void 0,
|
|
91
|
-
words: utterance.words?.map((w) => this.mapWord(w))
|
|
91
|
+
words: utterance.words?.map((w) => this.mapWord(w)) ?? []
|
|
92
92
|
};
|
|
93
93
|
}
|
|
94
94
|
/**
|
|
@@ -150,7 +150,7 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
|
|
|
150
150
|
speakerIds.add(u.speaker);
|
|
151
151
|
}
|
|
152
152
|
});
|
|
153
|
-
const speakers = speakerIds.size > 0 ? Array.from(speakerIds).map((id) => ({ id: String(id) })) : void 0;
|
|
153
|
+
const speakers = speakerIds.size > 0 ? Array.from(speakerIds).map((id) => ({ id: String(id), label: `Speaker ${id}` })) : void 0;
|
|
154
154
|
const summary = result.summarization?.success && result.summarization.results ? result.summarization.results : void 0;
|
|
155
155
|
return {
|
|
156
156
|
success: true,
|
|
@@ -228,34 +228,68 @@ var AssemblyAIWebhookHandler = class extends BaseWebhookHandler {
|
|
|
228
228
|
}
|
|
229
229
|
/**
|
|
230
230
|
* Check if payload matches AssemblyAI webhook format
|
|
231
|
+
*
|
|
232
|
+
* Supports two formats:
|
|
233
|
+
* - Notification format: `{ transcript_id, status }` (lightweight callback)
|
|
234
|
+
* - Full transcript format: `{ id, status, audio_url, text, words, ... }` (complete response)
|
|
231
235
|
*/
|
|
232
236
|
matches(payload, _options) {
|
|
233
237
|
if (!payload || typeof payload !== "object") {
|
|
234
238
|
return false;
|
|
235
239
|
}
|
|
236
240
|
const obj = payload;
|
|
237
|
-
if (
|
|
238
|
-
return false;
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
return false;
|
|
241
|
+
if ("transcript_id" in obj && "status" in obj) {
|
|
242
|
+
if (typeof obj.transcript_id !== "string") return false;
|
|
243
|
+
if (obj.status !== "completed" && obj.status !== "error") return false;
|
|
244
|
+
return true;
|
|
242
245
|
}
|
|
243
|
-
if (obj
|
|
244
|
-
return false;
|
|
246
|
+
if ("id" in obj && "status" in obj && "audio_url" in obj) {
|
|
247
|
+
if (typeof obj.id !== "string") return false;
|
|
248
|
+
if (obj.status !== "completed" && obj.status !== "error") return false;
|
|
249
|
+
return true;
|
|
245
250
|
}
|
|
246
|
-
return
|
|
251
|
+
return false;
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Determine if the payload is a full transcript (vs a lightweight notification)
|
|
255
|
+
*/
|
|
256
|
+
isFullTranscript(payload) {
|
|
257
|
+
return "audio_url" in payload && "id" in payload;
|
|
247
258
|
}
|
|
248
259
|
/**
|
|
249
260
|
* Parse AssemblyAI webhook payload to unified format
|
|
261
|
+
*
|
|
262
|
+
* Supports two payload formats:
|
|
263
|
+
* - Notification: `{ transcript_id, status }` — returns minimal event (ID + status only)
|
|
264
|
+
* - Full transcript: `{ id, status, text, words, utterances, ... }` — returns complete data
|
|
250
265
|
*/
|
|
251
266
|
parse(payload, _options) {
|
|
252
267
|
if (!this.matches(payload)) {
|
|
253
268
|
return this.createErrorEvent(payload, "Invalid AssemblyAI webhook payload");
|
|
254
269
|
}
|
|
255
|
-
const
|
|
256
|
-
const
|
|
257
|
-
const
|
|
270
|
+
const obj = payload;
|
|
271
|
+
const isFullFormat = this.isFullTranscript(obj);
|
|
272
|
+
const transcriptId = isFullFormat ? payload.id : payload.transcript_id;
|
|
273
|
+
const status = obj.status;
|
|
274
|
+
if (status === "error") {
|
|
275
|
+
const error = isFullFormat ? payload.error : void 0;
|
|
276
|
+
return {
|
|
277
|
+
success: false,
|
|
278
|
+
provider: this.provider,
|
|
279
|
+
eventType: "transcription.failed",
|
|
280
|
+
data: {
|
|
281
|
+
id: transcriptId,
|
|
282
|
+
status: "error",
|
|
283
|
+
error: error || "Transcription failed"
|
|
284
|
+
},
|
|
285
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
286
|
+
raw: payload
|
|
287
|
+
};
|
|
288
|
+
}
|
|
258
289
|
if (status === "completed") {
|
|
290
|
+
if (isFullFormat) {
|
|
291
|
+
return this.parseFullTranscript(payload, payload);
|
|
292
|
+
}
|
|
259
293
|
return {
|
|
260
294
|
success: true,
|
|
261
295
|
provider: this.provider,
|
|
@@ -263,28 +297,76 @@ var AssemblyAIWebhookHandler = class extends BaseWebhookHandler {
|
|
|
263
297
|
data: {
|
|
264
298
|
id: transcriptId,
|
|
265
299
|
status: "completed"
|
|
266
|
-
// Note: Full transcript data needs to be fetched via API
|
|
267
|
-
// using AssemblyAIAdapter.getTranscript(transcriptId)
|
|
268
300
|
},
|
|
269
301
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
270
302
|
raw: payload
|
|
271
303
|
};
|
|
272
304
|
}
|
|
273
|
-
|
|
305
|
+
return this.createErrorEvent(payload, `Unknown AssemblyAI status: ${status}`);
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Parse a full AssemblyAI transcript response into unified format
|
|
309
|
+
*
|
|
310
|
+
* AssemblyAI times are in milliseconds — converted to seconds for unified format.
|
|
311
|
+
*/
|
|
312
|
+
parseFullTranscript(transcript, raw) {
|
|
313
|
+
try {
|
|
314
|
+
const words = transcript.words ? transcript.words.map((w) => ({
|
|
315
|
+
word: w.text,
|
|
316
|
+
start: w.start / 1e3,
|
|
317
|
+
end: w.end / 1e3,
|
|
318
|
+
confidence: w.confidence,
|
|
319
|
+
speaker: w.speaker ?? void 0
|
|
320
|
+
})) : void 0;
|
|
321
|
+
const utterances = transcript.utterances ? transcript.utterances.map((u) => ({
|
|
322
|
+
text: u.text,
|
|
323
|
+
start: u.start / 1e3,
|
|
324
|
+
end: u.end / 1e3,
|
|
325
|
+
speaker: u.speaker,
|
|
326
|
+
confidence: u.confidence,
|
|
327
|
+
words: u.words.map((w) => ({
|
|
328
|
+
word: w.text,
|
|
329
|
+
start: w.start / 1e3,
|
|
330
|
+
end: w.end / 1e3,
|
|
331
|
+
confidence: w.confidence,
|
|
332
|
+
speaker: w.speaker ?? void 0
|
|
333
|
+
}))
|
|
334
|
+
})) : void 0;
|
|
335
|
+
const speakerIds = /* @__PURE__ */ new Set();
|
|
336
|
+
transcript.utterances?.forEach((u) => {
|
|
337
|
+
if (u.speaker) speakerIds.add(u.speaker);
|
|
338
|
+
});
|
|
339
|
+
const speakers = speakerIds.size > 0 ? Array.from(speakerIds).map((id) => ({ id, label: `Speaker ${id}` })) : void 0;
|
|
274
340
|
return {
|
|
275
|
-
success:
|
|
341
|
+
success: true,
|
|
276
342
|
provider: this.provider,
|
|
277
|
-
eventType: "transcription.
|
|
343
|
+
eventType: "transcription.completed",
|
|
278
344
|
data: {
|
|
279
|
-
id:
|
|
280
|
-
status: "
|
|
281
|
-
|
|
345
|
+
id: transcript.id,
|
|
346
|
+
status: "completed",
|
|
347
|
+
text: transcript.text ?? void 0,
|
|
348
|
+
confidence: transcript.confidence ?? void 0,
|
|
349
|
+
duration: transcript.audio_duration ?? void 0,
|
|
350
|
+
language: transcript.language_code ?? void 0,
|
|
351
|
+
speakers,
|
|
352
|
+
words,
|
|
353
|
+
utterances,
|
|
354
|
+
summary: transcript.summary ?? void 0,
|
|
355
|
+
metadata: {
|
|
356
|
+
speech_model: transcript.speech_model,
|
|
357
|
+
audio_channels: transcript.audio_channels,
|
|
358
|
+
webhook_status_code: transcript.webhook_status_code
|
|
359
|
+
}
|
|
282
360
|
},
|
|
283
361
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
284
|
-
raw
|
|
362
|
+
raw
|
|
285
363
|
};
|
|
364
|
+
} catch (error) {
|
|
365
|
+
return this.createErrorEvent(
|
|
366
|
+
raw,
|
|
367
|
+
`Failed to parse AssemblyAI transcript: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
368
|
+
);
|
|
286
369
|
}
|
|
287
|
-
return this.createErrorEvent(payload, `Unknown AssemblyAI status: ${status}`);
|
|
288
370
|
}
|
|
289
371
|
/**
|
|
290
372
|
* Verify AssemblyAI webhook signature
|
|
@@ -419,24 +501,33 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
|
|
|
419
501
|
end: w.end || 0,
|
|
420
502
|
confidence: w.confidence
|
|
421
503
|
})) : void 0;
|
|
422
|
-
const
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
504
|
+
const speakerIds = /* @__PURE__ */ new Set();
|
|
505
|
+
if (response.results.utterances) {
|
|
506
|
+
for (const utterance of response.results.utterances) {
|
|
507
|
+
if (utterance.speaker !== void 0) {
|
|
508
|
+
speakerIds.add(utterance.speaker.toString());
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
const speakers = speakerIds.size > 0 ? Array.from(speakerIds).map((id) => ({
|
|
513
|
+
id,
|
|
514
|
+
label: `Speaker ${id}`
|
|
427
515
|
})) : void 0;
|
|
428
516
|
const utterances = response.results.utterances && response.results.utterances.length > 0 ? response.results.utterances.map((utterance) => ({
|
|
517
|
+
id: utterance.id,
|
|
429
518
|
text: utterance.transcript || "",
|
|
430
519
|
start: utterance.start || 0,
|
|
431
520
|
end: utterance.end || 0,
|
|
432
521
|
speaker: utterance.speaker?.toString(),
|
|
522
|
+
channel: utterance.channel,
|
|
433
523
|
confidence: utterance.confidence,
|
|
434
|
-
words: utterance.words
|
|
524
|
+
words: utterance.words?.map((w) => ({
|
|
435
525
|
word: w.word || "",
|
|
436
526
|
start: w.start || 0,
|
|
437
527
|
end: w.end || 0,
|
|
438
|
-
confidence: w.confidence
|
|
439
|
-
|
|
528
|
+
confidence: w.confidence,
|
|
529
|
+
speaker: w.speaker?.toString()
|
|
530
|
+
})) ?? []
|
|
440
531
|
})) : void 0;
|
|
441
532
|
const summary = alternative.summaries?.[0]?.summary;
|
|
442
533
|
return {
|
|
@@ -449,7 +540,7 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
|
|
|
449
540
|
text: transcript,
|
|
450
541
|
confidence: alternative.confidence,
|
|
451
542
|
duration,
|
|
452
|
-
language:
|
|
543
|
+
language: channel.detected_language,
|
|
453
544
|
speakers: speakers && speakers.length > 0 ? speakers : void 0,
|
|
454
545
|
words: words && words.length > 0 ? words : void 0,
|
|
455
546
|
utterances: utterances && utterances.length > 0 ? utterances : void 0,
|
|
@@ -636,6 +727,72 @@ function createAzureWebhookHandler() {
|
|
|
636
727
|
return new AzureWebhookHandler();
|
|
637
728
|
}
|
|
638
729
|
|
|
730
|
+
// src/utils/transcription-helpers.ts
|
|
731
|
+
function buildUtterancesFromWords(words) {
|
|
732
|
+
const utterances = [];
|
|
733
|
+
let currentSpeaker;
|
|
734
|
+
let currentWords = [];
|
|
735
|
+
let utteranceStart = 0;
|
|
736
|
+
for (const word of words) {
|
|
737
|
+
if (!word.speaker) continue;
|
|
738
|
+
if (word.speaker !== currentSpeaker) {
|
|
739
|
+
if (currentSpeaker && currentWords.length > 0) {
|
|
740
|
+
utterances.push({
|
|
741
|
+
text: currentWords.map((w) => w.word).join(" "),
|
|
742
|
+
start: utteranceStart,
|
|
743
|
+
end: currentWords[currentWords.length - 1].end,
|
|
744
|
+
speaker: currentSpeaker,
|
|
745
|
+
words: currentWords
|
|
746
|
+
});
|
|
747
|
+
}
|
|
748
|
+
currentSpeaker = word.speaker;
|
|
749
|
+
currentWords = [word];
|
|
750
|
+
utteranceStart = word.start;
|
|
751
|
+
} else {
|
|
752
|
+
currentWords.push(word);
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
if (currentSpeaker && currentWords.length > 0) {
|
|
756
|
+
utterances.push({
|
|
757
|
+
text: currentWords.map((w) => w.word).join(" "),
|
|
758
|
+
start: utteranceStart,
|
|
759
|
+
end: currentWords[currentWords.length - 1].end,
|
|
760
|
+
speaker: currentSpeaker,
|
|
761
|
+
words: currentWords
|
|
762
|
+
});
|
|
763
|
+
}
|
|
764
|
+
return utterances;
|
|
765
|
+
}
|
|
766
|
+
function buildTextFromSpeechmaticsResults(results) {
|
|
767
|
+
const parts = [];
|
|
768
|
+
let attachNext = false;
|
|
769
|
+
for (const result of results) {
|
|
770
|
+
if (result.type !== "word" && result.type !== "punctuation") continue;
|
|
771
|
+
const content = result.alternatives?.[0]?.content;
|
|
772
|
+
if (!content) continue;
|
|
773
|
+
if (result.type === "punctuation") {
|
|
774
|
+
const attaches = result.attaches_to;
|
|
775
|
+
if (attaches === "previous" || attaches === "both") {
|
|
776
|
+
parts.push(content);
|
|
777
|
+
attachNext = attaches === "both";
|
|
778
|
+
} else if (attaches === "next") {
|
|
779
|
+
if (parts.length > 0) parts.push(" ");
|
|
780
|
+
parts.push(content);
|
|
781
|
+
attachNext = true;
|
|
782
|
+
} else {
|
|
783
|
+
if (parts.length > 0 && !attachNext) parts.push(" ");
|
|
784
|
+
parts.push(content);
|
|
785
|
+
attachNext = false;
|
|
786
|
+
}
|
|
787
|
+
} else {
|
|
788
|
+
if (parts.length > 0 && !attachNext) parts.push(" ");
|
|
789
|
+
parts.push(content);
|
|
790
|
+
attachNext = false;
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
return parts.join("");
|
|
794
|
+
}
|
|
795
|
+
|
|
639
796
|
// src/webhooks/speechmatics-webhook.ts
|
|
640
797
|
var SpeechmaticsWebhookHandler = class extends BaseWebhookHandler {
|
|
641
798
|
constructor() {
|
|
@@ -717,18 +874,25 @@ var SpeechmaticsWebhookHandler = class extends BaseWebhookHandler {
|
|
|
717
874
|
if (status === "success" && payload && typeof payload === "object") {
|
|
718
875
|
const transcript = payload;
|
|
719
876
|
if (transcript.results && transcript.job) {
|
|
720
|
-
const text = transcript.results
|
|
877
|
+
const text = buildTextFromSpeechmaticsResults(transcript.results);
|
|
878
|
+
const wordResults = transcript.results.filter((r) => r.type === "word" && r.alternatives);
|
|
879
|
+
const words = wordResults.filter((r) => r.start_time !== void 0 && r.end_time !== void 0).map((r) => ({
|
|
880
|
+
word: r.alternatives[0]?.content || "",
|
|
881
|
+
start: r.start_time,
|
|
882
|
+
end: r.end_time,
|
|
883
|
+
confidence: r.alternatives[0]?.confidence,
|
|
884
|
+
speaker: r.alternatives[0]?.speaker
|
|
885
|
+
}));
|
|
721
886
|
const speakerSet = /* @__PURE__ */ new Set();
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
if (speaker) speakerSet.add(speaker);
|
|
726
|
-
}
|
|
887
|
+
wordResults.forEach((r) => {
|
|
888
|
+
const speaker = r.alternatives[0]?.speaker;
|
|
889
|
+
if (speaker) speakerSet.add(speaker);
|
|
727
890
|
});
|
|
728
891
|
const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
|
|
729
892
|
id,
|
|
730
893
|
label: `Speaker ${id}`
|
|
731
894
|
})) : void 0;
|
|
895
|
+
const utterances = buildUtterancesFromWords(words);
|
|
732
896
|
return {
|
|
733
897
|
success: true,
|
|
734
898
|
provider: this.provider,
|
|
@@ -741,6 +905,8 @@ var SpeechmaticsWebhookHandler = class extends BaseWebhookHandler {
|
|
|
741
905
|
language: transcript.metadata.transcription_config?.language,
|
|
742
906
|
duration: transcript.job.duration,
|
|
743
907
|
speakers,
|
|
908
|
+
words: words.length > 0 ? words : void 0,
|
|
909
|
+
utterances: utterances.length > 0 ? utterances : void 0,
|
|
744
910
|
createdAt: transcript.job.created_at
|
|
745
911
|
},
|
|
746
912
|
raw: payload
|
|
@@ -762,6 +928,138 @@ var SpeechmaticsWebhookHandler = class extends BaseWebhookHandler {
|
|
|
762
928
|
}
|
|
763
929
|
};
|
|
764
930
|
|
|
931
|
+
// src/webhooks/elevenlabs-webhook.ts
|
|
932
|
+
var ElevenLabsWebhookHandler = class extends BaseWebhookHandler {
|
|
933
|
+
constructor() {
|
|
934
|
+
super(...arguments);
|
|
935
|
+
this.provider = "elevenlabs";
|
|
936
|
+
}
|
|
937
|
+
/**
|
|
938
|
+
* Check if payload matches ElevenLabs webhook format
|
|
939
|
+
*
|
|
940
|
+
* ElevenLabs webhook payloads contain the full transcription result
|
|
941
|
+
* with `words` array and `language_code` / `language_probability` fields.
|
|
942
|
+
*/
|
|
943
|
+
matches(payload, _options) {
|
|
944
|
+
if (!payload || typeof payload !== "object") {
|
|
945
|
+
return false;
|
|
946
|
+
}
|
|
947
|
+
const obj = payload;
|
|
948
|
+
if (!("words" in obj) || !("language_code" in obj) || !("language_probability" in obj)) {
|
|
949
|
+
return false;
|
|
950
|
+
}
|
|
951
|
+
if (!Array.isArray(obj.words)) {
|
|
952
|
+
return false;
|
|
953
|
+
}
|
|
954
|
+
if (!("text" in obj)) {
|
|
955
|
+
return false;
|
|
956
|
+
}
|
|
957
|
+
if (obj.words.length > 0) {
|
|
958
|
+
const firstWord = obj.words[0];
|
|
959
|
+
if (!("logprob" in firstWord) || !("type" in firstWord)) {
|
|
960
|
+
return false;
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
return true;
|
|
964
|
+
}
|
|
965
|
+
/**
|
|
966
|
+
* Parse ElevenLabs webhook payload to unified format
|
|
967
|
+
*/
|
|
968
|
+
parse(payload, _options) {
|
|
969
|
+
if (!this.matches(payload)) {
|
|
970
|
+
return this.createErrorEvent(payload, "Invalid ElevenLabs webhook payload");
|
|
971
|
+
}
|
|
972
|
+
const response = payload;
|
|
973
|
+
try {
|
|
974
|
+
const transcriptionId = response.transcription_id?.toString() || "";
|
|
975
|
+
const transcript = response.text;
|
|
976
|
+
if (!transcript) {
|
|
977
|
+
return {
|
|
978
|
+
success: false,
|
|
979
|
+
provider: this.provider,
|
|
980
|
+
eventType: "transcription.failed",
|
|
981
|
+
data: {
|
|
982
|
+
id: transcriptionId,
|
|
983
|
+
status: "error",
|
|
984
|
+
error: "Empty transcript"
|
|
985
|
+
},
|
|
986
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
987
|
+
raw: payload
|
|
988
|
+
};
|
|
989
|
+
}
|
|
990
|
+
const words = response.words && response.words.length > 0 ? response.words.filter((w) => w.type === "word").map((w) => ({
|
|
991
|
+
word: w.text || "",
|
|
992
|
+
start: typeof w.start === "number" ? w.start : 0,
|
|
993
|
+
end: typeof w.end === "number" ? w.end : 0,
|
|
994
|
+
confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
|
|
995
|
+
speaker: w.speaker_id?.toString()
|
|
996
|
+
})) : void 0;
|
|
997
|
+
const speakerIds = /* @__PURE__ */ new Set();
|
|
998
|
+
if (response.words) {
|
|
999
|
+
for (const w of response.words) {
|
|
1000
|
+
if (w.speaker_id !== void 0 && w.speaker_id !== null) {
|
|
1001
|
+
speakerIds.add(w.speaker_id.toString());
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
}
|
|
1005
|
+
const speakers = speakerIds.size > 0 ? Array.from(speakerIds).map((id) => ({
|
|
1006
|
+
id,
|
|
1007
|
+
label: `Speaker ${id}`
|
|
1008
|
+
})) : void 0;
|
|
1009
|
+
const utterances = words && words.length > 0 ? buildUtterancesFromWords(
|
|
1010
|
+
words.map((w) => ({
|
|
1011
|
+
word: w.word,
|
|
1012
|
+
start: w.start,
|
|
1013
|
+
end: w.end,
|
|
1014
|
+
confidence: w.confidence,
|
|
1015
|
+
speaker: w.speaker
|
|
1016
|
+
}))
|
|
1017
|
+
) : void 0;
|
|
1018
|
+
return {
|
|
1019
|
+
success: true,
|
|
1020
|
+
provider: this.provider,
|
|
1021
|
+
eventType: "transcription.completed",
|
|
1022
|
+
data: {
|
|
1023
|
+
id: transcriptionId,
|
|
1024
|
+
status: "completed",
|
|
1025
|
+
text: transcript,
|
|
1026
|
+
language: response.language_code,
|
|
1027
|
+
speakers: speakers && speakers.length > 0 ? speakers : void 0,
|
|
1028
|
+
words: words && words.length > 0 ? words : void 0,
|
|
1029
|
+
utterances: utterances && utterances.length > 0 ? utterances : void 0,
|
|
1030
|
+
metadata: {
|
|
1031
|
+
language_probability: response.language_probability,
|
|
1032
|
+
entities: response.entities,
|
|
1033
|
+
channel_index: response.channel_index
|
|
1034
|
+
}
|
|
1035
|
+
},
|
|
1036
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1037
|
+
raw: payload
|
|
1038
|
+
};
|
|
1039
|
+
} catch (error) {
|
|
1040
|
+
return this.createErrorEvent(
|
|
1041
|
+
payload,
|
|
1042
|
+
`Failed to parse ElevenLabs webhook: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1043
|
+
);
|
|
1044
|
+
}
|
|
1045
|
+
}
|
|
1046
|
+
/**
|
|
1047
|
+
* Verify ElevenLabs webhook signature
|
|
1048
|
+
*
|
|
1049
|
+
* Note: ElevenLabs does not currently provide a standard webhook signature
|
|
1050
|
+
* verification mechanism for STT webhooks. For security, use HTTPS and
|
|
1051
|
+
* validate the request source.
|
|
1052
|
+
*
|
|
1053
|
+
* @returns Always returns true (no verification available)
|
|
1054
|
+
*/
|
|
1055
|
+
verify() {
|
|
1056
|
+
return true;
|
|
1057
|
+
}
|
|
1058
|
+
};
|
|
1059
|
+
function createElevenLabsWebhookHandler() {
|
|
1060
|
+
return new ElevenLabsWebhookHandler();
|
|
1061
|
+
}
|
|
1062
|
+
|
|
765
1063
|
// src/webhooks/webhook-router.ts
|
|
766
1064
|
var WebhookRouter = class {
|
|
767
1065
|
constructor() {
|
|
@@ -770,7 +1068,8 @@ var WebhookRouter = class {
|
|
|
770
1068
|
["assemblyai", new AssemblyAIWebhookHandler()],
|
|
771
1069
|
["deepgram", new DeepgramWebhookHandler()],
|
|
772
1070
|
["azure-stt", new AzureWebhookHandler()],
|
|
773
|
-
["speechmatics", new SpeechmaticsWebhookHandler()]
|
|
1071
|
+
["speechmatics", new SpeechmaticsWebhookHandler()],
|
|
1072
|
+
["elevenlabs", new ElevenLabsWebhookHandler()]
|
|
774
1073
|
]);
|
|
775
1074
|
}
|
|
776
1075
|
/**
|
|
@@ -949,12 +1248,14 @@ export {
|
|
|
949
1248
|
AzureWebhookHandler,
|
|
950
1249
|
BaseWebhookHandler,
|
|
951
1250
|
DeepgramWebhookHandler,
|
|
1251
|
+
ElevenLabsWebhookHandler,
|
|
952
1252
|
GladiaWebhookHandler,
|
|
953
1253
|
SpeechmaticsWebhookHandler,
|
|
954
1254
|
WebhookRouter,
|
|
955
1255
|
createAssemblyAIWebhookHandler,
|
|
956
1256
|
createAzureWebhookHandler,
|
|
957
1257
|
createDeepgramWebhookHandler,
|
|
1258
|
+
createElevenLabsWebhookHandler,
|
|
958
1259
|
createGladiaWebhookHandler,
|
|
959
1260
|
createWebhookRouter
|
|
960
1261
|
};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "voice-router-dev",
|
|
3
|
-
"version": "0.8.
|
|
4
|
-
"description": "Universal speech-to-text router for Gladia, AssemblyAI, Deepgram, Azure, OpenAI Whisper, Speechmatics, and
|
|
3
|
+
"version": "0.8.2",
|
|
4
|
+
"description": "Universal speech-to-text router for Gladia, AssemblyAI, Deepgram, Azure, OpenAI Whisper, Speechmatics, Soniox, and ElevenLabs",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
@@ -100,6 +100,7 @@
|
|
|
100
100
|
"whisper",
|
|
101
101
|
"speechmatics",
|
|
102
102
|
"soniox",
|
|
103
|
+
"elevenlabs",
|
|
103
104
|
"multi-provider",
|
|
104
105
|
"router",
|
|
105
106
|
"sdk",
|
|
@@ -155,14 +156,16 @@
|
|
|
155
156
|
"openapi:sync:deepgram": "node scripts/sync-specs.js --provider deepgram",
|
|
156
157
|
"openapi:sync:speechmatics": "node scripts/sync-specs.js --provider speechmatics && node scripts/sync-specs.js --provider speechmaticsAsync",
|
|
157
158
|
"openapi:sync:soniox": "node scripts/sync-specs.js --provider soniox",
|
|
159
|
+
"openapi:sync:elevenlabs": "node scripts/sync-specs.js --provider elevenlabs",
|
|
158
160
|
"openapi:validate": "node scripts/sync-specs.js --validate-only",
|
|
159
|
-
"openapi:fix-specs": "node scripts/fix-openai-spec.js && node scripts/fix-speechmatics-spec.js && node scripts/fix-deepgram-spec.js",
|
|
161
|
+
"openapi:fix-specs": "node scripts/fix-openai-spec.js && node scripts/fix-speechmatics-spec.js && node scripts/fix-deepgram-spec.js && node scripts/fix-elevenlabs-spec.js",
|
|
160
162
|
"openapi:fix": "node scripts/fix-generated.js && node scripts/fix-assemblyai-missing-schemas.js && sed -i \"s/'\\([a-z_]*\\)\\.'\\([a-z_]*\\)'/'\\1.\\2'/g\" src/generated/speechmatics/api/speechmaticsASRRESTAPI.zod.ts 2>/dev/null || true",
|
|
161
163
|
"openapi:clean": "rm -rf src/generated",
|
|
162
164
|
"openapi:clean:gladia": "rm -rf src/generated/gladia",
|
|
163
165
|
"openapi:clean:assemblyai": "rm -rf src/generated/assemblyai",
|
|
164
166
|
"openapi:clean:deepgram": "rm -rf src/generated/deepgram",
|
|
165
167
|
"openapi:clean:soniox": "rm -rf src/generated/soniox",
|
|
168
|
+
"openapi:clean:elevenlabs": "rm -rf src/generated/elevenlabs",
|
|
166
169
|
"openapi:clean:speechmatics": "rm -rf src/generated/speechmatics",
|
|
167
170
|
"openapi:sync-assemblyai-streaming": "node scripts/sync-assemblyai-streaming-types.js",
|
|
168
171
|
"openapi:sync-deepgram-streaming": "node scripts/sync-deepgram-streaming-types.js",
|
|
@@ -175,7 +178,9 @@
|
|
|
175
178
|
"openapi:sync-openai-models": "node scripts/generate-openai-models.js",
|
|
176
179
|
"openapi:sync-speechmatics-languages": "node scripts/generate-speechmatics-languages.js",
|
|
177
180
|
"openapi:sync-azure-locales": "node scripts/generate-azure-locales.js",
|
|
178
|
-
"openapi:
|
|
181
|
+
"openapi:sync-elevenlabs-languages": "node scripts/generate-elevenlabs-languages.js",
|
|
182
|
+
"openapi:sync-elevenlabs-models": "node scripts/generate-elevenlabs-models.js",
|
|
183
|
+
"openapi:generate": "pnpm openapi:fix-specs && pnpm openapi:clean && orval && pnpm openapi:sync-assemblyai-streaming && pnpm openapi:sync-deepgram-streaming && pnpm openapi:sync-deepgram-languages && pnpm openapi:sync-deepgram-models && pnpm openapi:sync-speechmatics-streaming && pnpm openapi:sync-speechmatics-languages && pnpm openapi:sync-soniox-streaming && pnpm openapi:sync-soniox-languages && pnpm openapi:sync-soniox-models && pnpm openapi:sync-openai-models && pnpm openapi:sync-azure-locales && pnpm openapi:sync-elevenlabs-languages && pnpm openapi:sync-elevenlabs-models && pnpm openapi:fix",
|
|
179
184
|
"openapi:generate:gladia": "pnpm openapi:clean:gladia && orval --config orval.config.ts --project gladiaApi && orval --config orval.config.ts --project gladiaZod && pnpm openapi:fix",
|
|
180
185
|
"openapi:generate:assemblyai": "pnpm openapi:clean:assemblyai && orval --config orval.config.ts --project assemblyaiApi && orval --config orval.config.ts --project assemblyaiZod && pnpm openapi:sync-assemblyai-streaming && pnpm openapi:fix",
|
|
181
186
|
"openapi:generate:deepgram": "pnpm openapi:clean:deepgram && orval --config orval.config.ts --project deepgramApi && orval --config orval.config.ts --project deepgramZod && pnpm openapi:sync-deepgram-streaming && pnpm openapi:sync-deepgram-languages && pnpm openapi:sync-deepgram-models && pnpm openapi:fix",
|
|
@@ -183,6 +188,7 @@
|
|
|
183
188
|
"openapi:generate:speechmatics-streaming": "pnpm openapi:sync-speechmatics-streaming",
|
|
184
189
|
"openapi:generate:soniox": "pnpm openapi:clean:soniox && orval --config orval.config.ts --project sonioxApi && orval --config orval.config.ts --project sonioxZod && pnpm openapi:sync-soniox-streaming && pnpm openapi:sync-soniox-languages && pnpm openapi:sync-soniox-models && pnpm openapi:fix",
|
|
185
190
|
"openapi:generate:soniox-streaming": "pnpm openapi:sync-soniox-streaming",
|
|
191
|
+
"openapi:generate:elevenlabs": "node scripts/fix-elevenlabs-spec.js && pnpm openapi:clean:elevenlabs && orval --config orval.config.ts --project elevenlabsApi && orval --config orval.config.ts --project elevenlabsZod && pnpm openapi:sync-elevenlabs-languages && pnpm openapi:sync-elevenlabs-models && pnpm openapi:fix",
|
|
186
192
|
"openapi:generate-field-metadata": "node scripts/generate-field-metadata.js",
|
|
187
193
|
"openapi:clean:csharp": "rm -rf generated-csharp",
|
|
188
194
|
"openapi:clean:csharp:gladia": "rm -rf generated-csharp/gladia",
|
|
@@ -191,7 +197,7 @@
|
|
|
191
197
|
"openapi:diagram": "node scripts/generate-pipeline-diagram.js",
|
|
192
198
|
"openapi:rebuild": "pnpm openapi:sync && pnpm openapi:generate && pnpm build",
|
|
193
199
|
"docs:clean": "rm -rf docs/generated",
|
|
194
|
-
"docs:generate": "pnpm docs:clean && pnpm docs:generate:router && pnpm docs:generate:webhooks && pnpm docs:generate:gladia && pnpm docs:generate:assemblyai && pnpm docs:generate:deepgram && pnpm docs:generate:azure && pnpm docs:generate:openai && pnpm docs:generate:speechmatics && pnpm docs:field-equivalences",
|
|
200
|
+
"docs:generate": "pnpm docs:clean && pnpm docs:generate:router && pnpm docs:generate:webhooks && pnpm docs:generate:gladia && pnpm docs:generate:assemblyai && pnpm docs:generate:deepgram && pnpm docs:generate:azure && pnpm docs:generate:openai && pnpm docs:generate:speechmatics && pnpm docs:generate:soniox && pnpm docs:generate:elevenlabs && pnpm docs:field-equivalences",
|
|
195
201
|
"docs:generate:router": "typedoc --options typedoc.router.config.mjs",
|
|
196
202
|
"docs:generate:webhooks": "typedoc --options typedoc.webhooks.config.mjs",
|
|
197
203
|
"docs:generate:gladia": "typedoc --options typedoc.gladia.config.mjs",
|
|
@@ -200,7 +206,9 @@
|
|
|
200
206
|
"docs:generate:azure": "typedoc --options typedoc.azure.config.mjs",
|
|
201
207
|
"docs:generate:openai": "typedoc --options typedoc.openai.config.mjs",
|
|
202
208
|
"docs:generate:speechmatics": "typedoc --options typedoc.speechmatics.config.mjs",
|
|
209
|
+
"docs:generate:soniox": "typedoc --options typedoc.soniox.config.mjs",
|
|
210
|
+
"docs:generate:elevenlabs": "typedoc --options typedoc.elevenlabs.config.mjs",
|
|
203
211
|
"docs:field-equivalences": "node scripts/generate-field-equivalences.js",
|
|
204
|
-
"prebuild": "pnpm openapi:fix-specs && pnpm openapi:fix && pnpm openapi:sync-soniox-languages && pnpm openapi:sync-soniox-models && pnpm openapi:sync-speechmatics-languages && pnpm openapi:sync-azure-locales && pnpm openapi:sync-deepgram-languages && pnpm openapi:sync-deepgram-models && pnpm openapi:sync-openai-models"
|
|
212
|
+
"prebuild": "pnpm openapi:fix-specs && pnpm openapi:fix && pnpm openapi:sync-soniox-languages && pnpm openapi:sync-soniox-models && pnpm openapi:sync-speechmatics-languages && pnpm openapi:sync-azure-locales && pnpm openapi:sync-deepgram-languages && pnpm openapi:sync-deepgram-models && pnpm openapi:sync-openai-models && pnpm openapi:sync-elevenlabs-languages && pnpm openapi:sync-elevenlabs-models"
|
|
205
213
|
}
|
|
206
214
|
}
|