npm - @pompeii-labs/audio - Versions diffs - 0.2.0 → 0.2.2 - Mend

@pompeii-labs/audio 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/voice.d.mts CHANGED Viewed

@@ -107,6 +107,115 @@ declare class DeepgramSTT extends MagmaFlowSpeechToText {
     private computeTurns;
 }
+declare enum GladiaModel {
+    SOLARIA_1 = "solaria-1"
+}
+declare enum GladiaLanguage {
+    EN = "en"
+}
+type GladiaConfig = {
+    model: GladiaModel;
+    encoding?: 'wav/pcm' | 'wav/alaw' | 'wav/ulaw';
+    sample_rate?: number;
+    bit_depth?: number;
+    channels?: number;
+    custom_metadata?: Record<string, unknown>;
+    endpointing?: number;
+    maximum_duration_without_endpointing?: number;
+    language_config?: {
+        languages?: GladiaLanguage[];
+    };
+    pre_processing?: {
+        audio_enhancer?: boolean;
+        speech_threshold?: number;
+    };
+    realtime_processing?: {
+        custom_vocabulary?: boolean;
+        custom_vocabulary_config?: {
+            vocabulary: {
+                value: string;
+                intensity?: number;
+                pronunciations?: string[];
+                language?: string;
+            }[];
+            default_intensity?: number;
+        };
+        custom_spelling?: boolean;
+        custom_spelling_config?: {
+            spelling_dictionary: Record<string, string[]>;
+        };
+        translation?: boolean;
+        translation_config?: {
+            target_languages: GladiaLanguage[];
+            model?: 'base' | 'enhanced';
+            match_original_utterances?: boolean;
+            lipsync?: boolean;
+            context_adaptation?: boolean;
+            context?: string;
+            informal?: boolean;
+        };
+        named_entity_recognition?: boolean;
+        sentiment_analysis?: boolean;
+    };
+    post_processing?: {
+        summarization?: boolean;
+        summarization_config?: {
+            type?: 'general' | 'bullet_points' | 'concise';
+        };
+        chapterization?: boolean;
+    };
+    messages_config?: {
+        receive_partial_transcripts?: boolean;
+        receive_final_transcripts?: boolean;
+        receive_speech_events?: boolean;
+        receive_pre_processing_events?: boolean;
+        receive_realtime_processing_events?: boolean;
+        receive_post_processing_events?: boolean;
+        receive_acknowledgements?: boolean;
+        receive_errors?: boolean;
+        receive_lifecycle_events?: boolean;
+    };
+    callback?: boolean;
+    callback_config?: {
+        url: string;
+        receive_partial_transcripts?: boolean;
+        receive_final_transcripts?: boolean;
+        receive_speech_events?: boolean;
+        receive_pre_processing_events?: boolean;
+        receive_realtime_processing_events?: boolean;
+        receive_post_processing_events?: boolean;
+        receive_acknowledgements?: boolean;
+        receive_errors?: boolean;
+        receive_lifecycle_events?: boolean;
+    };
+};
+type GladiaSTTArgs = {
+    model: GladiaModel;
+    apiKey?: string;
+    config?: Omit<GladiaConfig, 'model' | 'encoding' | 'sample_rate' | 'channels'>;
+};
+declare class GladiaSTT extends MagmaFlowSpeechToText {
+    private connection;
+    private connectionUrl;
+    private settingUp;
+    private apiKey;
+    private config;
+    private turnBuffer;
+    private utteranceEnded;
+    constructor(args: GladiaSTTArgs);
+    private setup;
+    private connectWS;
+    input(audio: Buffer): void;
+    flush(): void;
+    kill(): void;
+    private handleTranscriptionEvent;
+    private handleUtteranceEnd;
+    private sendOutput;
+    private onOpen;
+    private computeTurns;
+    private handleMessage;
+}
 type DeepgramTTSArgs = {
     client?: DeepgramClient;
 };
@@ -193,4 +302,4 @@ declare class WhisperTTS extends MagmaFlowTextToSpeech {
 declare function splitTextIntoChunks(text: string, targetLength?: number): string[];
-export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };
+export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, GladiaLanguage, GladiaModel, GladiaSTT, type GladiaSTTArgs, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };

package/dist/voice.d.ts CHANGED Viewed

@@ -107,6 +107,115 @@ declare class DeepgramSTT extends MagmaFlowSpeechToText {
     private computeTurns;
 }
+declare enum GladiaModel {
+    SOLARIA_1 = "solaria-1"
+}
+declare enum GladiaLanguage {
+    EN = "en"
+}
+type GladiaConfig = {
+    model: GladiaModel;
+    encoding?: 'wav/pcm' | 'wav/alaw' | 'wav/ulaw';
+    sample_rate?: number;
+    bit_depth?: number;
+    channels?: number;
+    custom_metadata?: Record<string, unknown>;
+    endpointing?: number;
+    maximum_duration_without_endpointing?: number;
+    language_config?: {
+        languages?: GladiaLanguage[];
+    };
+    pre_processing?: {
+        audio_enhancer?: boolean;
+        speech_threshold?: number;
+    };
+    realtime_processing?: {
+        custom_vocabulary?: boolean;
+        custom_vocabulary_config?: {
+            vocabulary: {
+                value: string;
+                intensity?: number;
+                pronunciations?: string[];
+                language?: string;
+            }[];
+            default_intensity?: number;
+        };
+        custom_spelling?: boolean;
+        custom_spelling_config?: {
+            spelling_dictionary: Record<string, string[]>;
+        };
+        translation?: boolean;
+        translation_config?: {
+            target_languages: GladiaLanguage[];
+            model?: 'base' | 'enhanced';
+            match_original_utterances?: boolean;
+            lipsync?: boolean;
+            context_adaptation?: boolean;
+            context?: string;
+            informal?: boolean;
+        };
+        named_entity_recognition?: boolean;
+        sentiment_analysis?: boolean;
+    };
+    post_processing?: {
+        summarization?: boolean;
+        summarization_config?: {
+            type?: 'general' | 'bullet_points' | 'concise';
+        };
+        chapterization?: boolean;
+    };
+    messages_config?: {
+        receive_partial_transcripts?: boolean;
+        receive_final_transcripts?: boolean;
+        receive_speech_events?: boolean;
+        receive_pre_processing_events?: boolean;
+        receive_realtime_processing_events?: boolean;
+        receive_post_processing_events?: boolean;
+        receive_acknowledgements?: boolean;
+        receive_errors?: boolean;
+        receive_lifecycle_events?: boolean;
+    };
+    callback?: boolean;
+    callback_config?: {
+        url: string;
+        receive_partial_transcripts?: boolean;
+        receive_final_transcripts?: boolean;
+        receive_speech_events?: boolean;
+        receive_pre_processing_events?: boolean;
+        receive_realtime_processing_events?: boolean;
+        receive_post_processing_events?: boolean;
+        receive_acknowledgements?: boolean;
+        receive_errors?: boolean;
+        receive_lifecycle_events?: boolean;
+    };
+};
+type GladiaSTTArgs = {
+    model: GladiaModel;
+    apiKey?: string;
+    config?: Omit<GladiaConfig, 'model' | 'encoding' | 'sample_rate' | 'channels'>;
+};
+declare class GladiaSTT extends MagmaFlowSpeechToText {
+    private connection;
+    private connectionUrl;
+    private settingUp;
+    private apiKey;
+    private config;
+    private turnBuffer;
+    private utteranceEnded;
+    constructor(args: GladiaSTTArgs);
+    private setup;
+    private connectWS;
+    input(audio: Buffer): void;
+    flush(): void;
+    kill(): void;
+    private handleTranscriptionEvent;
+    private handleUtteranceEnd;
+    private sendOutput;
+    private onOpen;
+    private computeTurns;
+    private handleMessage;
+}
 type DeepgramTTSArgs = {
     client?: DeepgramClient;
 };
@@ -193,4 +302,4 @@ declare class WhisperTTS extends MagmaFlowTextToSpeech {
 declare function splitTextIntoChunks(text: string, targetLength?: number): string[];
-export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };
+export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, GladiaLanguage, GladiaModel, GladiaSTT, type GladiaSTTArgs, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };

package/dist/voice.js CHANGED Viewed

@@ -1,6 +1,7 @@
 'use strict';
 var sdk = require('@deepgram/sdk');
+var ws = require('ws');
 var hume = require('hume');
 var OpenAI = require('openai');
@@ -693,11 +694,6 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
         } else {
           if (currentTurn) {
             currentTurn.confidence = currentTurnConfidence / currentTurnWordCount;
-            if (currentTurn.confidence < 0.5) {
-              currentTurn.text = "[inaudible]";
-            } else if (currentTurn.confidence < 0.75) {
-              currentTurn.text = `[unclear, confidence=${currentTurn.confidence.toFixed(2)}] ${currentTurn.text}`;
-            }
             turns.push(currentTurn);
           }
           currentTurn = { speaker, text: utterance, confidence: 0 };
@@ -716,6 +712,279 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
     }
   }
 };
+var NATIVE_WEBSOCKET_AVAILABLE = typeof WebSocket !== "undefined";
+var DummyWebSocket = class {
+  url;
+  constructor(url) {
+    this.url = null;
+  }
+  send(data) {
+    console.error("Data send attempted through dummy websocket");
+  }
+  close() {
+  }
+  addEventListener() {
+  }
+  get readyState() {
+    return 3 /* CLOSED */;
+  }
+};
+var QueueWebSocket = class {
+  wsQueue = [];
+  handlingQueue = false;
+  url;
+  ws;
+  constructor(url) {
+    this.url = url;
+    if (this.url) {
+      if (NATIVE_WEBSOCKET_AVAILABLE) {
+        this.ws = new WebSocket(this.url);
+      } else {
+        this.ws = new ws.WebSocket(this.url);
+      }
+    } else {
+      this.ws = new DummyWebSocket(null);
+    }
+    this.ws.addEventListener("open", () => {
+      this.handleQueue();
+    });
+  }
+  connect() {
+    if (this.url) {
+      if (NATIVE_WEBSOCKET_AVAILABLE) {
+        this.ws = new WebSocket(this.url);
+      } else {
+        this.ws = new ws.WebSocket(this.url);
+      }
+    } else {
+      this.ws = new DummyWebSocket(null);
+    }
+  }
+  close(code, reason) {
+    this.ws.close(code, reason);
+  }
+  handleQueue(cb = this.handleQueue.bind(this)) {
+    if (this.handlingQueue) {
+      console.log(`[Gladia] handleQueue: Already running`);
+      return;
+    }
+    this.handlingQueue = true;
+    const data = this.wsQueue.shift();
+    if (!data) {
+      this.handlingQueue = false;
+      return;
+    }
+    if (this.readyState === 1 /* OPEN */) {
+      this.ws.send(data);
+      this.handlingQueue = false;
+      this.handleQueue(cb);
+    } else {
+      this.wsQueue.unshift(data);
+    }
+    this.handlingQueue = false;
+  }
+  send(data) {
+    this.wsQueue.push(data);
+    this.handleQueue();
+  }
+  get readyState() {
+    return this.ws.readyState;
+  }
+};
+// src/voice/speechToText/gladia.ts
+var GladiaModel = /* @__PURE__ */ ((GladiaModel2) => {
+  GladiaModel2["SOLARIA_1"] = "solaria-1";
+  return GladiaModel2;
+})(GladiaModel || {});
+var GladiaLanguage = /* @__PURE__ */ ((GladiaLanguage2) => {
+  GladiaLanguage2["EN"] = "en";
+  return GladiaLanguage2;
+})(GladiaLanguage || {});
+var GladiaSTT = class extends MagmaFlowSpeechToText {
+  connection;
+  connectionUrl = null;
+  settingUp = false;
+  apiKey;
+  config;
+  turnBuffer = [];
+  utteranceEnded = false;
+  constructor(args) {
+    super();
+    this.config = {
+      model: args.model,
+      encoding: "wav/pcm",
+      sample_rate: 48e3,
+      channels: 1,
+      maximum_duration_without_endpointing: 1,
+      ...args.config
+    };
+    if (!args.apiKey && !process.env.GLADIA_API_KEY) {
+      throw new Error("GLADIA_API_KEY not supplied and not found in env");
+    }
+    this.apiKey = args.apiKey ?? process.env.GLADIA_API_KEY;
+    this.connection = new QueueWebSocket(null);
+  }
+  setup() {
+    if (this.settingUp) {
+      return;
+    }
+    this.settingUp = true;
+    if (this.connectionUrl) {
+      this.connectWS();
+      return;
+    }
+    console.log("[Gladia] Setup: Getting connection url");
+    fetch("https://api.gladia.io/v2/live", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "X-Gladia-Key": this.apiKey
+      },
+      body: JSON.stringify({
+        encoding: "wav/pcm",
+        sample_rate: 48e3,
+        bit_depth: 16,
+        channels: 1
+      })
+    }).then(async (response) => {
+      if (!response.ok) {
+        console.error("[Gladia] Could not get WS url");
+        const errorMessage = `${response.status}: ${await response.text() || response.statusText}`;
+        console.error(errorMessage);
+        this.settingUp = false;
+        throw new Error(errorMessage);
+      }
+      const json = await response.json();
+      this.connectionUrl = json.url;
+      this.connectWS();
+    });
+  }
+  connectWS() {
+    if (!this.connectionUrl) {
+      console.log(`[Gladia] ConnectWS: No connection url`);
+      return;
+    }
+    if (this.connection.readyState === 0 /* CONNECTING */) {
+      return;
+    }
+    console.log("[Gladia] ConnectWS: Connecting to", this.connectionUrl);
+    this.connection = new QueueWebSocket(this.connectionUrl);
+    this.settingUp = false;
+    this.connection.ws.addEventListener("error", (event) => {
+      console.log(`[Gladia] Error: ${JSON.stringify(event)}`);
+    });
+    this.connection.ws.addEventListener("close", (event) => {
+      console.log(`[Gladia] Close: ${JSON.stringify(event)}`);
+    });
+    this.connection.ws.addEventListener("open", this.onOpen.bind(this));
+    this.connection.ws.addEventListener("message", this.handleMessage.bind(this));
+  }
+  input(audio) {
+    this.connection.send(audio.buffer);
+    if (this.connection.readyState !== 1 /* OPEN */) {
+      this.setup();
+    }
+  }
+  flush() {
+    console.log("[Gladia] Flush: sending stop_recording messsage");
+    this.connection.send(
+      JSON.stringify({
+        type: "stop_recording"
+      })
+    );
+  }
+  kill() {
+    console.log("[Gladia] Kill: Closing connection");
+    this.connection?.close(1e3);
+    this.connection = new QueueWebSocket(null);
+  }
+  handleTranscriptionEvent(transcriptionEvent) {
+    if (transcriptionEvent.utterance.text.trim() === "") {
+      return;
+    }
+    this.onSpeechDetected();
+    if (transcriptionEvent.is_final) {
+      const turns = this.computeTurns(transcriptionEvent);
+      this.turnBuffer = this.turnBuffer.concat(turns);
+      this.sendOutput();
+    }
+  }
+  handleUtteranceEnd() {
+    this.utteranceEnded = true;
+    this.sendOutput();
+  }
+  sendOutput() {
+    if (!this.utteranceEnded) {
+      return;
+    }
+    if (this.turnBuffer.length === 0) {
+      return;
+    }
+    const text = this.turnBuffer.map((turn) => turn.text).join(" ");
+    let turns = void 0;
+    if (this.turnBuffer.every((turn) => turn.speaker !== void 0 && turn.speaker !== null)) {
+      turns = this.turnBuffer.reduce((acc, turn) => {
+        if (acc.at(-1)?.speaker === turn.speaker) {
+          acc.at(-1).text += turn.text;
+        } else {
+          acc.push(turn);
+        }
+        return acc;
+      }, []);
+    }
+    this.onOutput({
+      text,
+      turns
+    });
+    this.turnBuffer = [];
+    this.utteranceEnded = false;
+  }
+  onOpen() {
+    console.log(`[Gladia] Connected`);
+  }
+  computeTurns(transcript) {
+    try {
+      const nowMs = (/* @__PURE__ */ new Date()).getTime();
+      const durationMs = (transcript.utterance.end - transcript.utterance.start) * 1e3;
+      const startMs = nowMs - durationMs;
+      const turns = [
+        {
+          text: transcript.utterance.text,
+          confidence: transcript.utterance.confidence,
+          durationMs,
+          start: new Date(startMs),
+          end: new Date(nowMs),
+          speaker: transcript.utterance.speaker
+        }
+      ];
+      return turns;
+    } catch (error) {
+      console.error(error);
+      return [];
+    }
+  }
+  handleMessage(event) {
+    const message = JSON.parse(event.data.toString());
+    switch (message.type) {
+      case "audio_chunk":
+        break;
+      case "speech_start":
+        this.utteranceEnded = false;
+        this.onSpeechDetected();
+        break;
+      case "speech_end":
+        this.handleUtteranceEnd();
+        break;
+      case "transcript":
+        this.handleTranscriptionEvent(message.data);
+        break;
+      default:
+        console.log(`[Deepgram] Unhandled event: ${JSON.stringify(event)}`);
+        break;
+    }
+  }
+};
 // src/voice/textToSpeech/base.ts
 var MagmaFlowTextToSpeech = class {
@@ -901,6 +1170,9 @@ exports.DeepgramSTT = DeepgramSTT;
 exports.DeepgramTTS = DeepgramTTS;
 exports.ElevenLabsTTS = ElevenLabsTTS;
 exports.ElevenLabsVoice = ElevenLabsVoice;
+exports.GladiaLanguage = GladiaLanguage;
+exports.GladiaModel = GladiaModel;
+exports.GladiaSTT = GladiaSTT;
 exports.HumeTTS = HumeTTS;
 exports.MagmaFlow = MagmaFlow;
 exports.MagmaFlowSpeechToText = MagmaFlowSpeechToText;

package/dist/voice.mjs CHANGED Viewed

@@ -1,4 +1,5 @@
 import { DeepgramClient, LiveTranscriptionEvents } from '@deepgram/sdk';
+import { WebSocket as WebSocket$1 } from 'ws';
 import { HumeClient } from 'hume';
 import OpenAI from 'openai';
@@ -687,11 +688,6 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
         } else {
           if (currentTurn) {
             currentTurn.confidence = currentTurnConfidence / currentTurnWordCount;
-            if (currentTurn.confidence < 0.5) {
-              currentTurn.text = "[inaudible]";
-            } else if (currentTurn.confidence < 0.75) {
-              currentTurn.text = `[unclear, confidence=${currentTurn.confidence.toFixed(2)}] ${currentTurn.text}`;
-            }
             turns.push(currentTurn);
           }
           currentTurn = { speaker, text: utterance, confidence: 0 };
@@ -710,6 +706,279 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
     }
   }
 };
+var NATIVE_WEBSOCKET_AVAILABLE = typeof WebSocket !== "undefined";
+var DummyWebSocket = class {
+  url;
+  constructor(url) {
+    this.url = null;
+  }
+  send(data) {
+    console.error("Data send attempted through dummy websocket");
+  }
+  close() {
+  }
+  addEventListener() {
+  }
+  get readyState() {
+    return 3 /* CLOSED */;
+  }
+};
+var QueueWebSocket = class {
+  wsQueue = [];
+  handlingQueue = false;
+  url;
+  ws;
+  constructor(url) {
+    this.url = url;
+    if (this.url) {
+      if (NATIVE_WEBSOCKET_AVAILABLE) {
+        this.ws = new WebSocket(this.url);
+      } else {
+        this.ws = new WebSocket$1(this.url);
+      }
+    } else {
+      this.ws = new DummyWebSocket(null);
+    }
+    this.ws.addEventListener("open", () => {
+      this.handleQueue();
+    });
+  }
+  connect() {
+    if (this.url) {
+      if (NATIVE_WEBSOCKET_AVAILABLE) {
+        this.ws = new WebSocket(this.url);
+      } else {
+        this.ws = new WebSocket$1(this.url);
+      }
+    } else {
+      this.ws = new DummyWebSocket(null);
+    }
+  }
+  close(code, reason) {
+    this.ws.close(code, reason);
+  }
+  handleQueue(cb = this.handleQueue.bind(this)) {
+    if (this.handlingQueue) {
+      console.log(`[Gladia] handleQueue: Already running`);
+      return;
+    }
+    this.handlingQueue = true;
+    const data = this.wsQueue.shift();
+    if (!data) {
+      this.handlingQueue = false;
+      return;
+    }
+    if (this.readyState === 1 /* OPEN */) {
+      this.ws.send(data);
+      this.handlingQueue = false;
+      this.handleQueue(cb);
+    } else {
+      this.wsQueue.unshift(data);
+    }
+    this.handlingQueue = false;
+  }
+  send(data) {
+    this.wsQueue.push(data);
+    this.handleQueue();
+  }
+  get readyState() {
+    return this.ws.readyState;
+  }
+};
+// src/voice/speechToText/gladia.ts
+var GladiaModel = /* @__PURE__ */ ((GladiaModel2) => {
+  GladiaModel2["SOLARIA_1"] = "solaria-1";
+  return GladiaModel2;
+})(GladiaModel || {});
+var GladiaLanguage = /* @__PURE__ */ ((GladiaLanguage2) => {
+  GladiaLanguage2["EN"] = "en";
+  return GladiaLanguage2;
+})(GladiaLanguage || {});
+var GladiaSTT = class extends MagmaFlowSpeechToText {
+  connection;
+  connectionUrl = null;
+  settingUp = false;
+  apiKey;
+  config;
+  turnBuffer = [];
+  utteranceEnded = false;
+  constructor(args) {
+    super();
+    this.config = {
+      model: args.model,
+      encoding: "wav/pcm",
+      sample_rate: 48e3,
+      channels: 1,
+      maximum_duration_without_endpointing: 1,
+      ...args.config
+    };
+    if (!args.apiKey && !process.env.GLADIA_API_KEY) {
+      throw new Error("GLADIA_API_KEY not supplied and not found in env");
+    }
+    this.apiKey = args.apiKey ?? process.env.GLADIA_API_KEY;
+    this.connection = new QueueWebSocket(null);
+  }
+  setup() {
+    if (this.settingUp) {
+      return;
+    }
+    this.settingUp = true;
+    if (this.connectionUrl) {
+      this.connectWS();
+      return;
+    }
+    console.log("[Gladia] Setup: Getting connection url");
+    fetch("https://api.gladia.io/v2/live", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "X-Gladia-Key": this.apiKey
+      },
+      body: JSON.stringify({
+        encoding: "wav/pcm",
+        sample_rate: 48e3,
+        bit_depth: 16,
+        channels: 1
+      })
+    }).then(async (response) => {
+      if (!response.ok) {
+        console.error("[Gladia] Could not get WS url");
+        const errorMessage = `${response.status}: ${await response.text() || response.statusText}`;
+        console.error(errorMessage);
+        this.settingUp = false;
+        throw new Error(errorMessage);
+      }
+      const json = await response.json();
+      this.connectionUrl = json.url;
+      this.connectWS();
+    });
+  }
+  connectWS() {
+    if (!this.connectionUrl) {
+      console.log(`[Gladia] ConnectWS: No connection url`);
+      return;
+    }
+    if (this.connection.readyState === 0 /* CONNECTING */) {
+      return;
+    }
+    console.log("[Gladia] ConnectWS: Connecting to", this.connectionUrl);
+    this.connection = new QueueWebSocket(this.connectionUrl);
+    this.settingUp = false;
+    this.connection.ws.addEventListener("error", (event) => {
+      console.log(`[Gladia] Error: ${JSON.stringify(event)}`);
+    });
+    this.connection.ws.addEventListener("close", (event) => {
+      console.log(`[Gladia] Close: ${JSON.stringify(event)}`);
+    });
+    this.connection.ws.addEventListener("open", this.onOpen.bind(this));
+    this.connection.ws.addEventListener("message", this.handleMessage.bind(this));
+  }
+  input(audio) {
+    this.connection.send(audio.buffer);
+    if (this.connection.readyState !== 1 /* OPEN */) {
+      this.setup();
+    }
+  }
+  flush() {
+    console.log("[Gladia] Flush: sending stop_recording messsage");
+    this.connection.send(
+      JSON.stringify({
+        type: "stop_recording"
+      })
+    );
+  }
+  kill() {
+    console.log("[Gladia] Kill: Closing connection");
+    this.connection?.close(1e3);
+    this.connection = new QueueWebSocket(null);
+  }
+  handleTranscriptionEvent(transcriptionEvent) {
+    if (transcriptionEvent.utterance.text.trim() === "") {
+      return;
+    }
+    this.onSpeechDetected();
+    if (transcriptionEvent.is_final) {
+      const turns = this.computeTurns(transcriptionEvent);
+      this.turnBuffer = this.turnBuffer.concat(turns);
+      this.sendOutput();
+    }
+  }
+  handleUtteranceEnd() {
+    this.utteranceEnded = true;
+    this.sendOutput();
+  }
+  sendOutput() {
+    if (!this.utteranceEnded) {
+      return;
+    }
+    if (this.turnBuffer.length === 0) {
+      return;
+    }
+    const text = this.turnBuffer.map((turn) => turn.text).join(" ");
+    let turns = void 0;
+    if (this.turnBuffer.every((turn) => turn.speaker !== void 0 && turn.speaker !== null)) {
+      turns = this.turnBuffer.reduce((acc, turn) => {
+        if (acc.at(-1)?.speaker === turn.speaker) {
+          acc.at(-1).text += turn.text;
+        } else {
+          acc.push(turn);
+        }
+        return acc;
+      }, []);
+    }
+    this.onOutput({
+      text,
+      turns
+    });
+    this.turnBuffer = [];
+    this.utteranceEnded = false;
+  }
+  onOpen() {
+    console.log(`[Gladia] Connected`);
+  }
+  computeTurns(transcript) {
+    try {
+      const nowMs = (/* @__PURE__ */ new Date()).getTime();
+      const durationMs = (transcript.utterance.end - transcript.utterance.start) * 1e3;
+      const startMs = nowMs - durationMs;
+      const turns = [
+        {
+          text: transcript.utterance.text,
+          confidence: transcript.utterance.confidence,
+          durationMs,
+          start: new Date(startMs),
+          end: new Date(nowMs),
+          speaker: transcript.utterance.speaker
+        }
+      ];
+      return turns;
+    } catch (error) {
+      console.error(error);
+      return [];
+    }
+  }
+  handleMessage(event) {
+    const message = JSON.parse(event.data.toString());
+    switch (message.type) {
+      case "audio_chunk":
+        break;
+      case "speech_start":
+        this.utteranceEnded = false;
+        this.onSpeechDetected();
+        break;
+      case "speech_end":
+        this.handleUtteranceEnd();
+        break;
+      case "transcript":
+        this.handleTranscriptionEvent(message.data);
+        break;
+      default:
+        console.log(`[Deepgram] Unhandled event: ${JSON.stringify(event)}`);
+        break;
+    }
+  }
+};
 // src/voice/textToSpeech/base.ts
 var MagmaFlowTextToSpeech = class {
@@ -889,4 +1158,4 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
   }
 };
-export { DeepgramLanguage, DeepgramModel, DeepgramSTT, DeepgramTTS, ElevenLabsTTS, ElevenLabsVoice, HumeTTS, MagmaFlow, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, WhisperTTS, splitTextIntoChunks };
+export { DeepgramLanguage, DeepgramModel, DeepgramSTT, DeepgramTTS, ElevenLabsTTS, ElevenLabsVoice, GladiaLanguage, GladiaModel, GladiaSTT, HumeTTS, MagmaFlow, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, WhisperTTS, splitTextIntoChunks };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pompeii-labs/audio",
-  "version": "0.2.0",
+  "version": "0.2.2",
   "description": "The Audio SDK from Pompeii Labs",
   "keywords": [
     "Pompeii",
@@ -47,6 +47,7 @@
   },
   "devDependencies": {
     "@types/node": "^22.16.0",
+    "@types/ws": "^8.18.1",
     "@typescript-eslint/eslint-plugin": "^6.21.0",
     "@typescript-eslint/parser": "^6.21.0",
     "eslint": "^8.57.1",
@@ -55,5 +56,8 @@
     "prettier": "^3.6.2",
     "tsup": "^8.5.0",
     "typescript": "^5.8.3"
+  },
+  "peerDependencies": {
+    "ws": "^8.0.0"
   }
 }