npm - voice-router-dev - Versions diffs - 0.7.9 → 0.8.1 - Mend

voice-router-dev 0.7.9 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +202 -1
package/README.md +21 -2
package/dist/constants.d.mts +600 -12
package/dist/constants.d.ts +600 -12
package/dist/constants.js +548 -5
package/dist/constants.mjs +537 -5
package/dist/{field-configs-CaXYfrJg.d.mts → field-configs-CDVygOte.d.mts} +26 -20
package/dist/{field-configs-CaXYfrJg.d.ts → field-configs-CDVygOte.d.ts} +26 -20
package/dist/field-configs.d.mts +1 -1
package/dist/field-configs.d.ts +1 -1
package/dist/field-configs.js +7 -4
package/dist/field-configs.mjs +7 -4
package/dist/index.d.mts +3184 -1367
package/dist/index.d.ts +3184 -1367
package/dist/index.js +1529 -105
package/dist/index.mjs +1521 -105
package/dist/{provider-metadata-DVQcYIHe.d.mts → provider-metadata-BnkedpXm.d.mts} +34 -4
package/dist/{provider-metadata-Derls1wa.d.ts → provider-metadata-DbsSGAO7.d.ts} +34 -4
package/dist/provider-metadata.d.mts +2 -2
package/dist/provider-metadata.d.ts +2 -2
package/dist/provider-metadata.js +349 -9
package/dist/provider-metadata.mjs +345 -9
package/dist/{transcriptWebhookNotification-BTxv69ck.d.ts → transcriptWebhookNotification-BJk1CEF5.d.ts} +712 -9
package/dist/{transcriptWebhookNotification-DCcbnAKP.d.mts → transcriptWebhookNotification-CNFpns9f.d.mts} +712 -9
package/dist/webhooks.d.mts +102 -5
package/dist/webhooks.d.ts +102 -5
package/dist/webhooks.js +342 -39
package/dist/webhooks.mjs +340 -39
package/package.json +11 -5

package/dist/webhooks.js CHANGED Viewed

@@ -35,12 +35,14 @@ __export(webhooks_exports, {
   AzureWebhookHandler: () => AzureWebhookHandler,
   BaseWebhookHandler: () => BaseWebhookHandler,
   DeepgramWebhookHandler: () => DeepgramWebhookHandler,
+  ElevenLabsWebhookHandler: () => ElevenLabsWebhookHandler,
   GladiaWebhookHandler: () => GladiaWebhookHandler,
   SpeechmaticsWebhookHandler: () => SpeechmaticsWebhookHandler,
   WebhookRouter: () => WebhookRouter,
   createAssemblyAIWebhookHandler: () => createAssemblyAIWebhookHandler,
   createAzureWebhookHandler: () => createAzureWebhookHandler,
   createDeepgramWebhookHandler: () => createDeepgramWebhookHandler,
+  createElevenLabsWebhookHandler: () => createElevenLabsWebhookHandler,
   createGladiaWebhookHandler: () => createGladiaWebhookHandler,
   createWebhookRouter: () => createWebhookRouter
 });
@@ -134,7 +136,7 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
       end: utterance.end,
       confidence: utterance.confidence,
       speaker: utterance.speaker !== void 0 ? String(utterance.speaker) : void 0,
-      words: utterance.words?.map((w) => this.mapWord(w))
+      words: utterance.words?.map((w) => this.mapWord(w)) ?? []
     };
   }
   /**
@@ -196,7 +198,7 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
           speakerIds.add(u.speaker);
         }
       });
-      const speakers = speakerIds.size > 0 ? Array.from(speakerIds).map((id) => ({ id: String(id) })) : void 0;
+      const speakers = speakerIds.size > 0 ? Array.from(speakerIds).map((id) => ({ id: String(id), label: `Speaker ${id}` })) : void 0;
       const summary = result.summarization?.success && result.summarization.results ? result.summarization.results : void 0;
       return {
         success: true,
@@ -274,34 +276,68 @@ var AssemblyAIWebhookHandler = class extends BaseWebhookHandler {
   }
   /**
    * Check if payload matches AssemblyAI webhook format
+   *
+   * Supports two formats:
+   * - Notification format: `{ transcript_id, status }` (lightweight callback)
+   * - Full transcript format: `{ id, status, audio_url, text, words, ... }` (complete response)
    */
   matches(payload, _options) {
     if (!payload || typeof payload !== "object") {
       return false;
     }
     const obj = payload;
-    if (!("transcript_id" in obj) || !("status" in obj)) {
-      return false;
-    }
-    if (typeof obj.transcript_id !== "string") {
-      return false;
+    if ("transcript_id" in obj && "status" in obj) {
+      if (typeof obj.transcript_id !== "string") return false;
+      if (obj.status !== "completed" && obj.status !== "error") return false;
+      return true;
     }
-    if (obj.status !== "completed" && obj.status !== "error") {
-      return false;
+    if ("id" in obj && "status" in obj && "audio_url" in obj) {
+      if (typeof obj.id !== "string") return false;
+      if (obj.status !== "completed" && obj.status !== "error") return false;
+      return true;
     }
-    return true;
+    return false;
+  }
+  /**
+   * Determine if the payload is a full transcript (vs a lightweight notification)
+   */
+  isFullTranscript(payload) {
+    return "audio_url" in payload && "id" in payload;
   }
   /**
    * Parse AssemblyAI webhook payload to unified format
+   *
+   * Supports two payload formats:
+   * - Notification: `{ transcript_id, status }` — returns minimal event (ID + status only)
+   * - Full transcript: `{ id, status, text, words, utterances, ... }` — returns complete data
    */
   parse(payload, _options) {
     if (!this.matches(payload)) {
       return this.createErrorEvent(payload, "Invalid AssemblyAI webhook payload");
     }
-    const notification = payload;
-    const transcriptId = notification.transcript_id;
-    const status = notification.status;
+    const obj = payload;
+    const isFullFormat = this.isFullTranscript(obj);
+    const transcriptId = isFullFormat ? payload.id : payload.transcript_id;
+    const status = obj.status;
+    if (status === "error") {
+      const error = isFullFormat ? payload.error : void 0;
+      return {
+        success: false,
+        provider: this.provider,
+        eventType: "transcription.failed",
+        data: {
+          id: transcriptId,
+          status: "error",
+          error: error || "Transcription failed"
+        },
+        timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+        raw: payload
+      };
+    }
     if (status === "completed") {
+      if (isFullFormat) {
+        return this.parseFullTranscript(payload, payload);
+      }
       return {
         success: true,
         provider: this.provider,
@@ -309,28 +345,76 @@ var AssemblyAIWebhookHandler = class extends BaseWebhookHandler {
         data: {
           id: transcriptId,
           status: "completed"
-          // Note: Full transcript data needs to be fetched via API
-          // using AssemblyAIAdapter.getTranscript(transcriptId)
         },
         timestamp: (/* @__PURE__ */ new Date()).toISOString(),
         raw: payload
       };
     }
-    if (status === "error") {
+    return this.createErrorEvent(payload, `Unknown AssemblyAI status: ${status}`);
+  }
+  /**
+   * Parse a full AssemblyAI transcript response into unified format
+   *
+   * AssemblyAI times are in milliseconds — converted to seconds for unified format.
+   */
+  parseFullTranscript(transcript, raw) {
+    try {
+      const words = transcript.words ? transcript.words.map((w) => ({
+        word: w.text,
+        start: w.start / 1e3,
+        end: w.end / 1e3,
+        confidence: w.confidence,
+        speaker: w.speaker ?? void 0
+      })) : void 0;
+      const utterances = transcript.utterances ? transcript.utterances.map((u) => ({
+        text: u.text,
+        start: u.start / 1e3,
+        end: u.end / 1e3,
+        speaker: u.speaker,
+        confidence: u.confidence,
+        words: u.words.map((w) => ({
+          word: w.text,
+          start: w.start / 1e3,
+          end: w.end / 1e3,
+          confidence: w.confidence,
+          speaker: w.speaker ?? void 0
+        }))
+      })) : void 0;
+      const speakerIds = /* @__PURE__ */ new Set();
+      transcript.utterances?.forEach((u) => {
+        if (u.speaker) speakerIds.add(u.speaker);
+      });
+      const speakers = speakerIds.size > 0 ? Array.from(speakerIds).map((id) => ({ id, label: `Speaker ${id}` })) : void 0;
       return {
-        success: false,
+        success: true,
         provider: this.provider,
-        eventType: "transcription.failed",
+        eventType: "transcription.completed",
         data: {
-          id: transcriptId,
-          status: "error",
-          error: "Transcription failed"
+          id: transcript.id,
+          status: "completed",
+          text: transcript.text ?? void 0,
+          confidence: transcript.confidence ?? void 0,
+          duration: transcript.audio_duration ?? void 0,
+          language: transcript.language_code ?? void 0,
+          speakers,
+          words,
+          utterances,
+          summary: transcript.summary ?? void 0,
+          metadata: {
+            speech_model: transcript.speech_model,
+            audio_channels: transcript.audio_channels,
+            webhook_status_code: transcript.webhook_status_code
+          }
         },
         timestamp: (/* @__PURE__ */ new Date()).toISOString(),
-        raw: payload
+        raw
       };
+    } catch (error) {
+      return this.createErrorEvent(
+        raw,
+        `Failed to parse AssemblyAI transcript: ${error instanceof Error ? error.message : "Unknown error"}`
+      );
     }
-    return this.createErrorEvent(payload, `Unknown AssemblyAI status: ${status}`);
   }
   /**
    * Verify AssemblyAI webhook signature
@@ -465,24 +549,33 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
         end: w.end || 0,
         confidence: w.confidence
       })) : void 0;
-      const speakers = response.results.utterances && response.results.utterances.length > 0 ? response.results.utterances.map((utterance) => ({
-        id: utterance.speaker?.toString() || "unknown",
-        speaker: utterance.speaker?.toString() || "unknown",
-        text: utterance.transcript || "",
-        confidence: utterance.confidence
+      const speakerIds = /* @__PURE__ */ new Set();
+      if (response.results.utterances) {
+        for (const utterance of response.results.utterances) {
+          if (utterance.speaker !== void 0) {
+            speakerIds.add(utterance.speaker.toString());
+          }
+        }
+      }
+      const speakers = speakerIds.size > 0 ? Array.from(speakerIds).map((id) => ({
+        id,
+        label: `Speaker ${id}`
       })) : void 0;
       const utterances = response.results.utterances && response.results.utterances.length > 0 ? response.results.utterances.map((utterance) => ({
+        id: utterance.id,
         text: utterance.transcript || "",
         start: utterance.start || 0,
         end: utterance.end || 0,
         speaker: utterance.speaker?.toString(),
+        channel: utterance.channel,
         confidence: utterance.confidence,
-        words: utterance.words && utterance.words.length > 0 ? utterance.words.map((w) => ({
+        words: utterance.words?.map((w) => ({
           word: w.word || "",
           start: w.start || 0,
           end: w.end || 0,
-          confidence: w.confidence
-        })) : void 0
+          confidence: w.confidence,
+          speaker: w.speaker?.toString()
+        })) ?? []
       })) : void 0;
       const summary = alternative.summaries?.[0]?.summary;
       return {
@@ -495,7 +588,7 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
           text: transcript,
           confidence: alternative.confidence,
           duration,
-          language: response.metadata.models?.[0] || void 0,
+          language: channel.detected_language,
           speakers: speakers && speakers.length > 0 ? speakers : void 0,
           words: words && words.length > 0 ? words : void 0,
           utterances: utterances && utterances.length > 0 ? utterances : void 0,
@@ -682,6 +775,72 @@ function createAzureWebhookHandler() {
   return new AzureWebhookHandler();
 }
+// src/utils/transcription-helpers.ts
+function buildUtterancesFromWords(words) {
+  const utterances = [];
+  let currentSpeaker;
+  let currentWords = [];
+  let utteranceStart = 0;
+  for (const word of words) {
+    if (!word.speaker) continue;
+    if (word.speaker !== currentSpeaker) {
+      if (currentSpeaker && currentWords.length > 0) {
+        utterances.push({
+          text: currentWords.map((w) => w.word).join(" "),
+          start: utteranceStart,
+          end: currentWords[currentWords.length - 1].end,
+          speaker: currentSpeaker,
+          words: currentWords
+        });
+      }
+      currentSpeaker = word.speaker;
+      currentWords = [word];
+      utteranceStart = word.start;
+    } else {
+      currentWords.push(word);
+    }
+  }
+  if (currentSpeaker && currentWords.length > 0) {
+    utterances.push({
+      text: currentWords.map((w) => w.word).join(" "),
+      start: utteranceStart,
+      end: currentWords[currentWords.length - 1].end,
+      speaker: currentSpeaker,
+      words: currentWords
+    });
+  }
+  return utterances;
+}
+function buildTextFromSpeechmaticsResults(results) {
+  const parts = [];
+  let attachNext = false;
+  for (const result of results) {
+    if (result.type !== "word" && result.type !== "punctuation") continue;
+    const content = result.alternatives?.[0]?.content;
+    if (!content) continue;
+    if (result.type === "punctuation") {
+      const attaches = result.attaches_to;
+      if (attaches === "previous" || attaches === "both") {
+        parts.push(content);
+        attachNext = attaches === "both";
+      } else if (attaches === "next") {
+        if (parts.length > 0) parts.push(" ");
+        parts.push(content);
+        attachNext = true;
+      } else {
+        if (parts.length > 0 && !attachNext) parts.push(" ");
+        parts.push(content);
+        attachNext = false;
+      }
+    } else {
+      if (parts.length > 0 && !attachNext) parts.push(" ");
+      parts.push(content);
+      attachNext = false;
+    }
+  }
+  return parts.join("");
+}
 // src/webhooks/speechmatics-webhook.ts
 var SpeechmaticsWebhookHandler = class extends BaseWebhookHandler {
   constructor() {
@@ -763,18 +922,25 @@ var SpeechmaticsWebhookHandler = class extends BaseWebhookHandler {
     if (status === "success" && payload && typeof payload === "object") {
       const transcript = payload;
       if (transcript.results && transcript.job) {
-        const text = transcript.results.filter((r) => r.type === "word" && r.alternatives).map((r) => r.alternatives[0]?.content || "").join(" ");
+        const text = buildTextFromSpeechmaticsResults(transcript.results);
+        const wordResults = transcript.results.filter((r) => r.type === "word" && r.alternatives);
+        const words = wordResults.filter((r) => r.start_time !== void 0 && r.end_time !== void 0).map((r) => ({
+          word: r.alternatives[0]?.content || "",
+          start: r.start_time,
+          end: r.end_time,
+          confidence: r.alternatives[0]?.confidence,
+          speaker: r.alternatives[0]?.speaker
+        }));
         const speakerSet = /* @__PURE__ */ new Set();
-        transcript.results.forEach((r) => {
-          if (r.alternatives) {
-            const speaker = r.alternatives[0]?.speaker;
-            if (speaker) speakerSet.add(speaker);
-          }
+        wordResults.forEach((r) => {
+          const speaker = r.alternatives[0]?.speaker;
+          if (speaker) speakerSet.add(speaker);
         });
         const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
           id,
           label: `Speaker ${id}`
         })) : void 0;
+        const utterances = buildUtterancesFromWords(words);
         return {
           success: true,
           provider: this.provider,
@@ -787,6 +953,8 @@ var SpeechmaticsWebhookHandler = class extends BaseWebhookHandler {
             language: transcript.metadata.transcription_config?.language,
             duration: transcript.job.duration,
             speakers,
+            words: words.length > 0 ? words : void 0,
+            utterances: utterances.length > 0 ? utterances : void 0,
             createdAt: transcript.job.created_at
           },
           raw: payload
@@ -808,6 +976,138 @@ var SpeechmaticsWebhookHandler = class extends BaseWebhookHandler {
   }
 };
+// src/webhooks/elevenlabs-webhook.ts
+var ElevenLabsWebhookHandler = class extends BaseWebhookHandler {
+  constructor() {
+    super(...arguments);
+    this.provider = "elevenlabs";
+  }
+  /**
+   * Check if payload matches ElevenLabs webhook format
+   *
+   * ElevenLabs webhook payloads contain the full transcription result
+   * with `words` array and `language_code` / `language_probability` fields.
+   */
+  matches(payload, _options) {
+    if (!payload || typeof payload !== "object") {
+      return false;
+    }
+    const obj = payload;
+    if (!("words" in obj) || !("language_code" in obj) || !("language_probability" in obj)) {
+      return false;
+    }
+    if (!Array.isArray(obj.words)) {
+      return false;
+    }
+    if (!("text" in obj)) {
+      return false;
+    }
+    if (obj.words.length > 0) {
+      const firstWord = obj.words[0];
+      if (!("logprob" in firstWord) || !("type" in firstWord)) {
+        return false;
+      }
+    }
+    return true;
+  }
+  /**
+   * Parse ElevenLabs webhook payload to unified format
+   */
+  parse(payload, _options) {
+    if (!this.matches(payload)) {
+      return this.createErrorEvent(payload, "Invalid ElevenLabs webhook payload");
+    }
+    const response = payload;
+    try {
+      const transcriptionId = response.transcription_id?.toString() || "";
+      const transcript = response.text;
+      if (!transcript) {
+        return {
+          success: false,
+          provider: this.provider,
+          eventType: "transcription.failed",
+          data: {
+            id: transcriptionId,
+            status: "error",
+            error: "Empty transcript"
+          },
+          timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+          raw: payload
+        };
+      }
+      const words = response.words && response.words.length > 0 ? response.words.filter((w) => w.type === "word").map((w) => ({
+        word: w.text || "",
+        start: typeof w.start === "number" ? w.start : 0,
+        end: typeof w.end === "number" ? w.end : 0,
+        confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
+        speaker: w.speaker_id?.toString()
+      })) : void 0;
+      const speakerIds = /* @__PURE__ */ new Set();
+      if (response.words) {
+        for (const w of response.words) {
+          if (w.speaker_id !== void 0 && w.speaker_id !== null) {
+            speakerIds.add(w.speaker_id.toString());
+          }
+        }
+      }
+      const speakers = speakerIds.size > 0 ? Array.from(speakerIds).map((id) => ({
+        id,
+        label: `Speaker ${id}`
+      })) : void 0;
+      const utterances = words && words.length > 0 ? buildUtterancesFromWords(
+        words.map((w) => ({
+          word: w.word,
+          start: w.start,
+          end: w.end,
+          confidence: w.confidence,
+          speaker: w.speaker
+        }))
+      ) : void 0;
+      return {
+        success: true,
+        provider: this.provider,
+        eventType: "transcription.completed",
+        data: {
+          id: transcriptionId,
+          status: "completed",
+          text: transcript,
+          language: response.language_code,
+          speakers: speakers && speakers.length > 0 ? speakers : void 0,
+          words: words && words.length > 0 ? words : void 0,
+          utterances: utterances && utterances.length > 0 ? utterances : void 0,
+          metadata: {
+            language_probability: response.language_probability,
+            entities: response.entities,
+            channel_index: response.channel_index
+          }
+        },
+        timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+        raw: payload
+      };
+    } catch (error) {
+      return this.createErrorEvent(
+        payload,
+        `Failed to parse ElevenLabs webhook: ${error instanceof Error ? error.message : "Unknown error"}`
+      );
+    }
+  }
+  /**
+   * Verify ElevenLabs webhook signature
+   *
+   * Note: ElevenLabs does not currently provide a standard webhook signature
+   * verification mechanism for STT webhooks. For security, use HTTPS and
+   * validate the request source.
+   *
+   * @returns Always returns true (no verification available)
+   */
+  verify() {
+    return true;
+  }
+};
+function createElevenLabsWebhookHandler() {
+  return new ElevenLabsWebhookHandler();
+}
 // src/webhooks/webhook-router.ts
 var WebhookRouter = class {
   constructor() {
@@ -816,7 +1116,8 @@ var WebhookRouter = class {
       ["assemblyai", new AssemblyAIWebhookHandler()],
       ["deepgram", new DeepgramWebhookHandler()],
       ["azure-stt", new AzureWebhookHandler()],
-      ["speechmatics", new SpeechmaticsWebhookHandler()]
+      ["speechmatics", new SpeechmaticsWebhookHandler()],
+      ["elevenlabs", new ElevenLabsWebhookHandler()]
     ]);
   }
   /**
@@ -996,12 +1297,14 @@ function createWebhookRouter() {
   AzureWebhookHandler,
   BaseWebhookHandler,
   DeepgramWebhookHandler,
+  ElevenLabsWebhookHandler,
   GladiaWebhookHandler,
   SpeechmaticsWebhookHandler,
   WebhookRouter,
   createAssemblyAIWebhookHandler,
   createAzureWebhookHandler,
   createDeepgramWebhookHandler,
+  createElevenLabsWebhookHandler,
   createGladiaWebhookHandler,
   createWebhookRouter
 });