npm - @pompeii-labs/audio - Versions diffs - 0.2.2 → 0.3.1 - Mend

@pompeii-labs/audio 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/voice.d.mts CHANGED Viewed

@@ -43,6 +43,7 @@ type MagmaFlowArgs = {
     onSpeechDetected: () => void;
     onTranscription: (transcription: MagmaFlowSTTOutput) => void;
     onAudioOutput: (audio: Buffer) => void;
+    onNormalizedAudio?: (audio: Buffer) => void;
     config?: MagmaFlowConfig;
 };
 declare class MagmaFlow {
@@ -51,6 +52,7 @@ declare class MagmaFlow {
     private inputFormat;
     private outputFormat;
     private onAudioOutput;
+    private onNormalizedAudio?;
     private textBuffer;
     private textQueue;
     private generatingAudio;
@@ -107,6 +109,36 @@ declare class DeepgramSTT extends MagmaFlowSpeechToText {
     private computeTurns;
 }
+type DeepgramFluxConfig = {
+    apiKey?: string;
+    eotThreshold?: number;
+    eagerEotThreshold?: number;
+    eotTimeoutMs?: number;
+};
+declare class DeepgramFluxSTT extends MagmaFlowSpeechToText {
+    private ws;
+    private apiKey;
+    private eotThreshold;
+    private eagerEotThreshold?;
+    private eotTimeoutMs;
+    private audioQueue;
+    private connecting;
+    private killed;
+    private reconnectAttempts;
+    private reconnectTimer?;
+    constructor(config?: DeepgramFluxConfig);
+    private connect;
+    private scheduleReconnect;
+    private clearConnection;
+    private handleMessage;
+    private handleTurnInfo;
+    input(audio: Buffer): void;
+    flush(): void;
+    kill(): void;
+    onEagerEndOfTurn(transcript: string): void;
+    onTurnResumed(): void;
+}
 declare enum GladiaModel {
     SOLARIA_1 = "solaria-1"
 }
@@ -278,9 +310,21 @@ declare class ElevenLabsTTS extends MagmaFlowTextToSpeech {
 type HumeTTSArgs = {
     client?: HumeClient;
+    voice?: {
+        name?: string;
+        id?: string;
+        provider?: string;
+    };
+    description?: string;
+    speed?: number;
+    version?: string;
 };
 declare class HumeTTS extends MagmaFlowTextToSpeech {
     private client;
+    private voice?;
+    private description?;
+    private speed?;
+    private version?;
     constructor(args: HumeTTSArgs);
     setup(): Promise<void>;
     input(text: string | null, requestId: string): void;
@@ -302,4 +346,4 @@ declare class WhisperTTS extends MagmaFlowTextToSpeech {
 declare function splitTextIntoChunks(text: string, targetLength?: number): string[];
-export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, GladiaLanguage, GladiaModel, GladiaSTT, type GladiaSTTArgs, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };
+export { type DeepgramFluxConfig, DeepgramFluxSTT, DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, GladiaLanguage, GladiaModel, GladiaSTT, type GladiaSTTArgs, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };

package/dist/voice.d.ts CHANGED Viewed

@@ -43,6 +43,7 @@ type MagmaFlowArgs = {
     onSpeechDetected: () => void;
     onTranscription: (transcription: MagmaFlowSTTOutput) => void;
     onAudioOutput: (audio: Buffer) => void;
+    onNormalizedAudio?: (audio: Buffer) => void;
     config?: MagmaFlowConfig;
 };
 declare class MagmaFlow {
@@ -51,6 +52,7 @@ declare class MagmaFlow {
     private inputFormat;
     private outputFormat;
     private onAudioOutput;
+    private onNormalizedAudio?;
     private textBuffer;
     private textQueue;
     private generatingAudio;
@@ -107,6 +109,36 @@ declare class DeepgramSTT extends MagmaFlowSpeechToText {
     private computeTurns;
 }
+type DeepgramFluxConfig = {
+    apiKey?: string;
+    eotThreshold?: number;
+    eagerEotThreshold?: number;
+    eotTimeoutMs?: number;
+};
+declare class DeepgramFluxSTT extends MagmaFlowSpeechToText {
+    private ws;
+    private apiKey;
+    private eotThreshold;
+    private eagerEotThreshold?;
+    private eotTimeoutMs;
+    private audioQueue;
+    private connecting;
+    private killed;
+    private reconnectAttempts;
+    private reconnectTimer?;
+    constructor(config?: DeepgramFluxConfig);
+    private connect;
+    private scheduleReconnect;
+    private clearConnection;
+    private handleMessage;
+    private handleTurnInfo;
+    input(audio: Buffer): void;
+    flush(): void;
+    kill(): void;
+    onEagerEndOfTurn(transcript: string): void;
+    onTurnResumed(): void;
+}
 declare enum GladiaModel {
     SOLARIA_1 = "solaria-1"
 }
@@ -278,9 +310,21 @@ declare class ElevenLabsTTS extends MagmaFlowTextToSpeech {
 type HumeTTSArgs = {
     client?: HumeClient;
+    voice?: {
+        name?: string;
+        id?: string;
+        provider?: string;
+    };
+    description?: string;
+    speed?: number;
+    version?: string;
 };
 declare class HumeTTS extends MagmaFlowTextToSpeech {
     private client;
+    private voice?;
+    private description?;
+    private speed?;
+    private version?;
     constructor(args: HumeTTSArgs);
     setup(): Promise<void>;
     input(text: string | null, requestId: string): void;
@@ -302,4 +346,4 @@ declare class WhisperTTS extends MagmaFlowTextToSpeech {
 declare function splitTextIntoChunks(text: string, targetLength?: number): string[];
-export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, GladiaLanguage, GladiaModel, GladiaSTT, type GladiaSTTArgs, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };
+export { type DeepgramFluxConfig, DeepgramFluxSTT, DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, GladiaLanguage, GladiaModel, GladiaSTT, type GladiaSTTArgs, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };

package/dist/voice.js CHANGED Viewed

@@ -1,12 +1,13 @@
 'use strict';
 var sdk = require('@deepgram/sdk');
-var ws = require('ws');
+var WebSocket2 = require('ws');
 var hume = require('hume');
 var OpenAI = require('openai');
 function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
+var WebSocket2__default = /*#__PURE__*/_interopDefault(WebSocket2);
 var OpenAI__default = /*#__PURE__*/_interopDefault(OpenAI);
 // src/helpers/bufferToInt16Array.ts
@@ -424,6 +425,7 @@ var MagmaFlow = class {
   inputFormat;
   outputFormat;
   onAudioOutput;
+  onNormalizedAudio;
   textBuffer = "";
   textQueue = [];
   generatingAudio = false;
@@ -440,6 +442,7 @@ var MagmaFlow = class {
     this.inputFormat = args.inputFormat;
     this.outputFormat = args.outputFormat;
     this.onAudioOutput = args.onAudioOutput;
+    this.onNormalizedAudio = args.onNormalizedAudio;
     this.config = { ...this.config, ...args.config };
     this.tts.onOutput = (audio, requestId) => {
       if (this.currentRequestId !== requestId) {
@@ -478,7 +481,9 @@ var MagmaFlow = class {
   inputAudio(audio) {
     const decodedAudio = decodeToPcm(audio, this.inputFormat.encoding);
     const resampledPCM = resamplePcm(decodedAudio, this.inputFormat.sampleRate, 48e3);
-    this.stt.input(int16ArrayToBuffer(resampledPCM));
+    const pcmBuffer = int16ArrayToBuffer(resampledPCM);
+    this.onNormalizedAudio?.(pcmBuffer);
+    this.stt.input(pcmBuffer);
   }
   inputText(text) {
     if (text === void 0 || text === null) {
@@ -694,6 +699,11 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
         } else {
           if (currentTurn) {
             currentTurn.confidence = currentTurnConfidence / currentTurnWordCount;
+            if (currentTurn.confidence < 0.5) {
+              currentTurn.text = "[inaudible]";
+            } else if (currentTurn.confidence < 0.75) {
+              currentTurn.text = `[unclear, confidence=${currentTurn.confidence.toFixed(2)}] ${currentTurn.text}`;
+            }
             turns.push(currentTurn);
           }
           currentTurn = { speaker, text: utterance, confidence: 0 };
@@ -712,6 +722,156 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
     }
   }
 };
+var kReconnectBaseMs = 500;
+var kReconnectMaxMs = 1e4;
+var DeepgramFluxSTT = class extends MagmaFlowSpeechToText {
+  ws = null;
+  apiKey;
+  eotThreshold;
+  eagerEotThreshold;
+  eotTimeoutMs;
+  audioQueue = [];
+  connecting = false;
+  killed = false;
+  reconnectAttempts = 0;
+  reconnectTimer;
+  constructor(config) {
+    super();
+    this.apiKey = config?.apiKey ?? process.env.DEEPGRAM_API_KEY;
+    this.eotThreshold = config?.eotThreshold ?? 0.7;
+    this.eagerEotThreshold = config?.eagerEotThreshold;
+    this.eotTimeoutMs = config?.eotTimeoutMs ?? 5e3;
+  }
+  connect() {
+    if (this.connecting || this.killed) return;
+    this.connecting = true;
+    const params = new URLSearchParams({
+      model: "flux-general-en",
+      encoding: "linear16",
+      sample_rate: "48000",
+      eot_threshold: this.eotThreshold.toString(),
+      eot_timeout_ms: this.eotTimeoutMs.toString()
+    });
+    if (this.eagerEotThreshold !== void 0) {
+      params.set("eager_eot_threshold", this.eagerEotThreshold.toString());
+    }
+    const url = `wss://api.deepgram.com/v2/listen?${params.toString()}`;
+    this.ws = new WebSocket2__default.default(url, {
+      headers: { Authorization: `Token ${this.apiKey}` }
+    });
+    this.ws.on("open", () => {
+      console.log("[DeepgramFlux] Connected");
+      this.connecting = false;
+      this.reconnectAttempts = 0;
+      for (const queued of this.audioQueue) {
+        this.ws.send(queued);
+      }
+      this.audioQueue = [];
+    });
+    this.ws.on("message", (data) => {
+      try {
+        const msg = JSON.parse(data.toString());
+        this.handleMessage(msg);
+      } catch (err) {
+        console.error(`[DeepgramFlux] Parse error: ${err.message}`);
+      }
+    });
+    this.ws.on("error", (err) => {
+      console.error(`[DeepgramFlux] Error: ${err.message}`);
+      this.connecting = false;
+    });
+    this.ws.on("close", (code, reason) => {
+      console.log(`[DeepgramFlux] Closed: ${code} ${reason.toString()}`);
+      this.clearConnection();
+      if (!this.killed) {
+        this.scheduleReconnect();
+      }
+    });
+  }
+  scheduleReconnect() {
+    const delay = Math.min(
+      kReconnectBaseMs * Math.pow(2, this.reconnectAttempts),
+      kReconnectMaxMs
+    );
+    this.reconnectAttempts++;
+    console.log(
+      `[DeepgramFlux] Reconnecting in ${delay}ms (attempt ${this.reconnectAttempts})`
+    );
+    this.reconnectTimer = setTimeout(() => {
+      this.reconnectTimer = void 0;
+      if (!this.killed && !this.ws) {
+        this.connect();
+      }
+    }, delay);
+  }
+  clearConnection() {
+    this.ws = null;
+    this.connecting = false;
+  }
+  handleMessage(msg) {
+    switch (msg.type) {
+      case "TurnInfo":
+        this.handleTurnInfo(msg);
+        break;
+      case "Connected":
+        break;
+      case "Error":
+        console.error(`[DeepgramFlux] Server error: ${JSON.stringify(msg)}`);
+        break;
+      default:
+        console.log(`[DeepgramFlux] Unknown message: ${JSON.stringify(msg)}`);
+        break;
+    }
+  }
+  handleTurnInfo(msg) {
+    switch (msg.event) {
+      case "StartOfTurn":
+        this.onSpeechDetected();
+        break;
+      case "EndOfTurn":
+        this.onOutput({ text: msg.transcript });
+        break;
+      case "EagerEndOfTurn":
+        this.onEagerEndOfTurn(msg.transcript);
+        break;
+      case "TurnResumed":
+        this.onTurnResumed();
+        break;
+    }
+  }
+  input(audio) {
+    if (!this.ws && !this.connecting && !this.reconnectTimer) {
+      this.connect();
+    }
+    if (this.ws?.readyState === WebSocket2__default.default.OPEN) {
+      this.ws.send(audio);
+    } else {
+      this.audioQueue.push(audio);
+    }
+  }
+  flush() {
+    if (this.ws?.readyState === WebSocket2__default.default.OPEN) {
+      this.ws.send(JSON.stringify({ type: "Finalize" }));
+    }
+  }
+  kill() {
+    this.killed = true;
+    if (this.reconnectTimer) {
+      clearTimeout(this.reconnectTimer);
+      this.reconnectTimer = void 0;
+    }
+    if (this.ws?.readyState === WebSocket2__default.default.OPEN) {
+      this.ws.send(JSON.stringify({ type: "CloseStream" }));
+      this.ws.close();
+    }
+    this.clearConnection();
+    this.audioQueue = [];
+  }
+  onEagerEndOfTurn(transcript) {
+  }
+  onTurnResumed() {
+  }
+};
 var NATIVE_WEBSOCKET_AVAILABLE = typeof WebSocket !== "undefined";
 var DummyWebSocket = class {
   url;
@@ -740,7 +900,7 @@ var QueueWebSocket = class {
       if (NATIVE_WEBSOCKET_AVAILABLE) {
         this.ws = new WebSocket(this.url);
       } else {
-        this.ws = new ws.WebSocket(this.url);
+        this.ws = new WebSocket2.WebSocket(this.url);
       }
     } else {
       this.ws = new DummyWebSocket(null);
@@ -754,7 +914,7 @@ var QueueWebSocket = class {
       if (NATIVE_WEBSOCKET_AVAILABLE) {
         this.ws = new WebSocket(this.url);
       } else {
-        this.ws = new ws.WebSocket(this.url);
+        this.ws = new WebSocket2.WebSocket(this.url);
       }
     } else {
       this.ws = new DummyWebSocket(null);
@@ -1079,7 +1239,6 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
     ).then(async (response) => {
       const reader = response.body?.getReader();
       if (!reader) return;
-      new TextDecoder();
       while (true) {
         const { done, value } = await reader.read();
         if (done) break;
@@ -1096,9 +1255,17 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
 };
 var HumeTTS = class extends MagmaFlowTextToSpeech {
   client;
+  voice;
+  description;
+  speed;
+  version;
   constructor(args) {
     super();
     this.client = args.client ?? new hume.HumeClient({ apiKey: process.env.HUME_API_KEY });
+    this.voice = args.voice;
+    this.description = args.description;
+    this.speed = args.speed;
+    this.version = args.version;
   }
   async setup() {
   }
@@ -1106,22 +1273,28 @@ var HumeTTS = class extends MagmaFlowTextToSpeech {
     if (!text) {
       return;
     }
-    this.client.tts.synthesizeJsonStreaming({
-      utterances: [
-        {
-          text
-        }
-      ],
-      format: {
-        type: "pcm"
-      },
-      instantMode: true
-    }).then(async (stream) => {
+    const utterance = { text };
+    if (this.voice) utterance.voice = this.voice;
+    if (this.description) utterance.description = this.description;
+    if (this.speed !== void 0) utterance.speed = this.speed;
+    const params = {
+      utterances: [utterance],
+      format: { type: "pcm" },
+      instantMode: true,
+      stripHeaders: true
+    };
+    if (this.version) params.version = this.version;
+    this.client.tts.synthesizeJsonStreaming(params).then(async (stream) => {
       for await (const chunk of stream) {
-        this.onOutput(Buffer.from(chunk.audio, "base64"), requestId);
+        if (chunk.type === "audio") {
+          this.onOutput(Buffer.from(chunk.audio, "base64"), requestId);
+        }
       }
       this.onOutput(null, requestId);
       console.log("[Hume] Finished:", text);
+    }).catch((err) => {
+      console.error(`[Hume] Error: ${err.message}`);
+      this.onOutput(null, requestId);
     });
   }
   kill() {
@@ -1164,6 +1337,7 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
   }
 };
+exports.DeepgramFluxSTT = DeepgramFluxSTT;
 exports.DeepgramLanguage = DeepgramLanguage;
 exports.DeepgramModel = DeepgramModel;
 exports.DeepgramSTT = DeepgramSTT;

package/dist/voice.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
 import { DeepgramClient, LiveTranscriptionEvents } from '@deepgram/sdk';
-import { WebSocket as WebSocket$1 } from 'ws';
+import WebSocket2, { WebSocket as WebSocket$1 } from 'ws';
 import { HumeClient } from 'hume';
 import OpenAI from 'openai';
@@ -418,6 +418,7 @@ var MagmaFlow = class {
   inputFormat;
   outputFormat;
   onAudioOutput;
+  onNormalizedAudio;
   textBuffer = "";
   textQueue = [];
   generatingAudio = false;
@@ -434,6 +435,7 @@ var MagmaFlow = class {
     this.inputFormat = args.inputFormat;
     this.outputFormat = args.outputFormat;
     this.onAudioOutput = args.onAudioOutput;
+    this.onNormalizedAudio = args.onNormalizedAudio;
     this.config = { ...this.config, ...args.config };
     this.tts.onOutput = (audio, requestId) => {
       if (this.currentRequestId !== requestId) {
@@ -472,7 +474,9 @@ var MagmaFlow = class {
   inputAudio(audio) {
     const decodedAudio = decodeToPcm(audio, this.inputFormat.encoding);
     const resampledPCM = resamplePcm(decodedAudio, this.inputFormat.sampleRate, 48e3);
-    this.stt.input(int16ArrayToBuffer(resampledPCM));
+    const pcmBuffer = int16ArrayToBuffer(resampledPCM);
+    this.onNormalizedAudio?.(pcmBuffer);
+    this.stt.input(pcmBuffer);
   }
   inputText(text) {
     if (text === void 0 || text === null) {
@@ -688,6 +692,11 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
         } else {
           if (currentTurn) {
             currentTurn.confidence = currentTurnConfidence / currentTurnWordCount;
+            if (currentTurn.confidence < 0.5) {
+              currentTurn.text = "[inaudible]";
+            } else if (currentTurn.confidence < 0.75) {
+              currentTurn.text = `[unclear, confidence=${currentTurn.confidence.toFixed(2)}] ${currentTurn.text}`;
+            }
             turns.push(currentTurn);
           }
           currentTurn = { speaker, text: utterance, confidence: 0 };
@@ -706,6 +715,156 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
     }
   }
 };
+var kReconnectBaseMs = 500;
+var kReconnectMaxMs = 1e4;
+var DeepgramFluxSTT = class extends MagmaFlowSpeechToText {
+  ws = null;
+  apiKey;
+  eotThreshold;
+  eagerEotThreshold;
+  eotTimeoutMs;
+  audioQueue = [];
+  connecting = false;
+  killed = false;
+  reconnectAttempts = 0;
+  reconnectTimer;
+  constructor(config) {
+    super();
+    this.apiKey = config?.apiKey ?? process.env.DEEPGRAM_API_KEY;
+    this.eotThreshold = config?.eotThreshold ?? 0.7;
+    this.eagerEotThreshold = config?.eagerEotThreshold;
+    this.eotTimeoutMs = config?.eotTimeoutMs ?? 5e3;
+  }
+  connect() {
+    if (this.connecting || this.killed) return;
+    this.connecting = true;
+    const params = new URLSearchParams({
+      model: "flux-general-en",
+      encoding: "linear16",
+      sample_rate: "48000",
+      eot_threshold: this.eotThreshold.toString(),
+      eot_timeout_ms: this.eotTimeoutMs.toString()
+    });
+    if (this.eagerEotThreshold !== void 0) {
+      params.set("eager_eot_threshold", this.eagerEotThreshold.toString());
+    }
+    const url = `wss://api.deepgram.com/v2/listen?${params.toString()}`;
+    this.ws = new WebSocket2(url, {
+      headers: { Authorization: `Token ${this.apiKey}` }
+    });
+    this.ws.on("open", () => {
+      console.log("[DeepgramFlux] Connected");
+      this.connecting = false;
+      this.reconnectAttempts = 0;
+      for (const queued of this.audioQueue) {
+        this.ws.send(queued);
+      }
+      this.audioQueue = [];
+    });
+    this.ws.on("message", (data) => {
+      try {
+        const msg = JSON.parse(data.toString());
+        this.handleMessage(msg);
+      } catch (err) {
+        console.error(`[DeepgramFlux] Parse error: ${err.message}`);
+      }
+    });
+    this.ws.on("error", (err) => {
+      console.error(`[DeepgramFlux] Error: ${err.message}`);
+      this.connecting = false;
+    });
+    this.ws.on("close", (code, reason) => {
+      console.log(`[DeepgramFlux] Closed: ${code} ${reason.toString()}`);
+      this.clearConnection();
+      if (!this.killed) {
+        this.scheduleReconnect();
+      }
+    });
+  }
+  scheduleReconnect() {
+    const delay = Math.min(
+      kReconnectBaseMs * Math.pow(2, this.reconnectAttempts),
+      kReconnectMaxMs
+    );
+    this.reconnectAttempts++;
+    console.log(
+      `[DeepgramFlux] Reconnecting in ${delay}ms (attempt ${this.reconnectAttempts})`
+    );
+    this.reconnectTimer = setTimeout(() => {
+      this.reconnectTimer = void 0;
+      if (!this.killed && !this.ws) {
+        this.connect();
+      }
+    }, delay);
+  }
+  clearConnection() {
+    this.ws = null;
+    this.connecting = false;
+  }
+  handleMessage(msg) {
+    switch (msg.type) {
+      case "TurnInfo":
+        this.handleTurnInfo(msg);
+        break;
+      case "Connected":
+        break;
+      case "Error":
+        console.error(`[DeepgramFlux] Server error: ${JSON.stringify(msg)}`);
+        break;
+      default:
+        console.log(`[DeepgramFlux] Unknown message: ${JSON.stringify(msg)}`);
+        break;
+    }
+  }
+  handleTurnInfo(msg) {
+    switch (msg.event) {
+      case "StartOfTurn":
+        this.onSpeechDetected();
+        break;
+      case "EndOfTurn":
+        this.onOutput({ text: msg.transcript });
+        break;
+      case "EagerEndOfTurn":
+        this.onEagerEndOfTurn(msg.transcript);
+        break;
+      case "TurnResumed":
+        this.onTurnResumed();
+        break;
+    }
+  }
+  input(audio) {
+    if (!this.ws && !this.connecting && !this.reconnectTimer) {
+      this.connect();
+    }
+    if (this.ws?.readyState === WebSocket2.OPEN) {
+      this.ws.send(audio);
+    } else {
+      this.audioQueue.push(audio);
+    }
+  }
+  flush() {
+    if (this.ws?.readyState === WebSocket2.OPEN) {
+      this.ws.send(JSON.stringify({ type: "Finalize" }));
+    }
+  }
+  kill() {
+    this.killed = true;
+    if (this.reconnectTimer) {
+      clearTimeout(this.reconnectTimer);
+      this.reconnectTimer = void 0;
+    }
+    if (this.ws?.readyState === WebSocket2.OPEN) {
+      this.ws.send(JSON.stringify({ type: "CloseStream" }));
+      this.ws.close();
+    }
+    this.clearConnection();
+    this.audioQueue = [];
+  }
+  onEagerEndOfTurn(transcript) {
+  }
+  onTurnResumed() {
+  }
+};
 var NATIVE_WEBSOCKET_AVAILABLE = typeof WebSocket !== "undefined";
 var DummyWebSocket = class {
   url;
@@ -1073,7 +1232,6 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
     ).then(async (response) => {
       const reader = response.body?.getReader();
       if (!reader) return;
-      new TextDecoder();
       while (true) {
         const { done, value } = await reader.read();
         if (done) break;
@@ -1090,9 +1248,17 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
 };
 var HumeTTS = class extends MagmaFlowTextToSpeech {
   client;
+  voice;
+  description;
+  speed;
+  version;
   constructor(args) {
     super();
     this.client = args.client ?? new HumeClient({ apiKey: process.env.HUME_API_KEY });
+    this.voice = args.voice;
+    this.description = args.description;
+    this.speed = args.speed;
+    this.version = args.version;
   }
   async setup() {
   }
@@ -1100,22 +1266,28 @@ var HumeTTS = class extends MagmaFlowTextToSpeech {
     if (!text) {
       return;
     }
-    this.client.tts.synthesizeJsonStreaming({
-      utterances: [
-        {
-          text
-        }
-      ],
-      format: {
-        type: "pcm"
-      },
-      instantMode: true
-    }).then(async (stream) => {
+    const utterance = { text };
+    if (this.voice) utterance.voice = this.voice;
+    if (this.description) utterance.description = this.description;
+    if (this.speed !== void 0) utterance.speed = this.speed;
+    const params = {
+      utterances: [utterance],
+      format: { type: "pcm" },
+      instantMode: true,
+      stripHeaders: true
+    };
+    if (this.version) params.version = this.version;
+    this.client.tts.synthesizeJsonStreaming(params).then(async (stream) => {
       for await (const chunk of stream) {
-        this.onOutput(Buffer.from(chunk.audio, "base64"), requestId);
+        if (chunk.type === "audio") {
+          this.onOutput(Buffer.from(chunk.audio, "base64"), requestId);
+        }
       }
       this.onOutput(null, requestId);
       console.log("[Hume] Finished:", text);
+    }).catch((err) => {
+      console.error(`[Hume] Error: ${err.message}`);
+      this.onOutput(null, requestId);
     });
   }
   kill() {
@@ -1158,4 +1330,4 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
   }
 };
-export { DeepgramLanguage, DeepgramModel, DeepgramSTT, DeepgramTTS, ElevenLabsTTS, ElevenLabsVoice, GladiaLanguage, GladiaModel, GladiaSTT, HumeTTS, MagmaFlow, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, WhisperTTS, splitTextIntoChunks };
+export { DeepgramFluxSTT, DeepgramLanguage, DeepgramModel, DeepgramSTT, DeepgramTTS, ElevenLabsTTS, ElevenLabsVoice, GladiaLanguage, GladiaModel, GladiaSTT, HumeTTS, MagmaFlow, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, WhisperTTS, splitTextIntoChunks };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pompeii-labs/audio",
-  "version": "0.2.2",
+  "version": "0.3.1",
   "description": "The Audio SDK from Pompeii Labs",
   "keywords": [
     "Pompeii",
@@ -42,7 +42,7 @@
   },
   "dependencies": {
     "@deepgram/sdk": "4.2.0",
-    "hume": "0.11.1",
+    "hume": "0.15.13",
     "openai": "4.86.2"
   },
   "devDependencies": {