npm - @micdrop/server - Versions diffs - 2.0.13 → 2.2.0 - Mend

@micdrop/server 2.0.13 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,9 @@
+MIT License
+Copyright (c) 2022 Rolebase
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

package/dist/index.d.mts CHANGED Viewed

@@ -4,7 +4,7 @@ import { z } from 'zod';
 import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
 declare class Logger {
-    private readonly name;
+    name: string;
     constructor(name: string);
     log(...message: any[]): void;
 }
@@ -145,10 +145,6 @@ declare class MockAgent extends Agent {
     cancel(): void;
 }
-declare function convertToPCM(audioStream: Readable, sampleRate?: number, bitDepth?: number): PassThrough;
-declare function convertToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
-declare function convertPCMToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
 declare enum MicdropErrorCode {
     BadRequest = 4400,
     Unauthorized = 4401,
@@ -160,43 +156,44 @@ declare class MicdropError extends Error {
 }
 declare function handleError(socket: WebSocket, error: unknown): void;
-declare const MIME_TYPE_TO_EXTENSION: {
-    readonly 'audio/wav': "wav";
-    readonly 'audio/ogg': "ogg";
-    readonly 'audio/mpeg': "mp3";
-    readonly 'audio/webm': "webm";
-    readonly 'audio/mp4': "mp4";
-    readonly 'audio/flac': "flac";
-};
 interface STTEvents {
     Transcript: [string];
+    Failed: [Buffer[]];
 }
 declare abstract class STT extends EventEmitter<STTEvents> {
-    protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
     logger?: Logger;
-    transcribe(audioStream: Readable): void;
+    abstract transcribe(audioStream: Readable): void;
     protected log(...message: any[]): void;
     destroy(): void;
-    protected get extension(): string;
-    private detectMimeType;
 }
-/**
- * Abstract class for STT, converting stream to file before transcribing
- */
-declare abstract class FileSTT extends STT {
-    abstract transcribeFile(file: File): Promise<string>;
-    transcribe(audioStream: Readable): void;
+declare class MockSTT extends STT {
+    private i;
+    transcribe(): Promise<void>;
 }
-declare class MockSTT extends FileSTT {
-    private i;
-    transcribeFile(file: File): Promise<string>;
+interface FallbackSTTOptions {
+    factories: Array<() => STT>;
+}
+declare class FallbackSTT extends STT {
+    private readonly options;
+    private stt;
+    private sttIndex;
+    constructor(options: FallbackSTTOptions);
+    transcribe(audioStream: Readable): void;
+    destroy(): void;
+    private startNextSTT;
+    private onTranscript;
+    private onFailed;
 }
-declare abstract class TTS {
+interface TTSEvents {
+    Audio: [Buffer];
+    Failed: [string[]];
+}
+declare abstract class TTS extends EventEmitter<TTSEvents> {
     logger?: Logger;
-    abstract speak(textStream: Readable): Readable;
+    abstract speak(textStream: Readable): void;
     abstract cancel(): void;
     protected log(...message: any[]): void;
     destroy(): void;
@@ -209,6 +206,22 @@ declare class MockTTS extends TTS {
     cancel(): void;
 }
+interface FallbackTTSOptions {
+    factories: Array<() => TTS>;
+}
+declare class FallbackTTS extends TTS {
+    private readonly options;
+    private tts;
+    private ttsIndex;
+    constructor(options: FallbackTTSOptions);
+    speak(textStream: Readable): void;
+    cancel(): void;
+    destroy(): void;
+    private startNextTTS;
+    private onAudio;
+    private onFailed;
+}
 interface MicdropConfig {
     firstMessage?: string;
     generateFirstMessage?: boolean;
@@ -238,16 +251,15 @@ declare class MicdropServer {
     private onMute;
     private onStartSpeaking;
     private onStopSpeaking;
-    private onTranscript;
+    private onTranscriptSTT;
+    private onAudioTTS;
     private sendFirstMessage;
     answer(): void;
     private _answer;
     speak(message: string | Readable): void;
     private _speak;
-    sendAudio(audio: Readable): void;
-    private _sendAudio;
 }
 declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
-export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type Tool, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
+export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FallbackSTT, type FallbackSTTOptions, FallbackTTS, type FallbackTTSOptions, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type TTSEvents, type Tool, handleError, waitForParams };

package/dist/index.d.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import { z } from 'zod';
 import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
 declare class Logger {
-    private readonly name;
+    name: string;
     constructor(name: string);
     log(...message: any[]): void;
 }
@@ -145,10 +145,6 @@ declare class MockAgent extends Agent {
     cancel(): void;
 }
-declare function convertToPCM(audioStream: Readable, sampleRate?: number, bitDepth?: number): PassThrough;
-declare function convertToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
-declare function convertPCMToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
 declare enum MicdropErrorCode {
     BadRequest = 4400,
     Unauthorized = 4401,
@@ -160,43 +156,44 @@ declare class MicdropError extends Error {
 }
 declare function handleError(socket: WebSocket, error: unknown): void;
-declare const MIME_TYPE_TO_EXTENSION: {
-    readonly 'audio/wav': "wav";
-    readonly 'audio/ogg': "ogg";
-    readonly 'audio/mpeg': "mp3";
-    readonly 'audio/webm': "webm";
-    readonly 'audio/mp4': "mp4";
-    readonly 'audio/flac': "flac";
-};
 interface STTEvents {
     Transcript: [string];
+    Failed: [Buffer[]];
 }
 declare abstract class STT extends EventEmitter<STTEvents> {
-    protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
     logger?: Logger;
-    transcribe(audioStream: Readable): void;
+    abstract transcribe(audioStream: Readable): void;
     protected log(...message: any[]): void;
     destroy(): void;
-    protected get extension(): string;
-    private detectMimeType;
 }
-/**
- * Abstract class for STT, converting stream to file before transcribing
- */
-declare abstract class FileSTT extends STT {
-    abstract transcribeFile(file: File): Promise<string>;
-    transcribe(audioStream: Readable): void;
+declare class MockSTT extends STT {
+    private i;
+    transcribe(): Promise<void>;
 }
-declare class MockSTT extends FileSTT {
-    private i;
-    transcribeFile(file: File): Promise<string>;
+interface FallbackSTTOptions {
+    factories: Array<() => STT>;
+}
+declare class FallbackSTT extends STT {
+    private readonly options;
+    private stt;
+    private sttIndex;
+    constructor(options: FallbackSTTOptions);
+    transcribe(audioStream: Readable): void;
+    destroy(): void;
+    private startNextSTT;
+    private onTranscript;
+    private onFailed;
 }
-declare abstract class TTS {
+interface TTSEvents {
+    Audio: [Buffer];
+    Failed: [string[]];
+}
+declare abstract class TTS extends EventEmitter<TTSEvents> {
     logger?: Logger;
-    abstract speak(textStream: Readable): Readable;
+    abstract speak(textStream: Readable): void;
     abstract cancel(): void;
     protected log(...message: any[]): void;
     destroy(): void;
@@ -209,6 +206,22 @@ declare class MockTTS extends TTS {
     cancel(): void;
 }
+interface FallbackTTSOptions {
+    factories: Array<() => TTS>;
+}
+declare class FallbackTTS extends TTS {
+    private readonly options;
+    private tts;
+    private ttsIndex;
+    constructor(options: FallbackTTSOptions);
+    speak(textStream: Readable): void;
+    cancel(): void;
+    destroy(): void;
+    private startNextTTS;
+    private onAudio;
+    private onFailed;
+}
 interface MicdropConfig {
     firstMessage?: string;
     generateFirstMessage?: boolean;
@@ -238,16 +251,15 @@ declare class MicdropServer {
     private onMute;
     private onStartSpeaking;
     private onStopSpeaking;
-    private onTranscript;
+    private onTranscriptSTT;
+    private onAudioTTS;
     private sendFirstMessage;
     answer(): void;
     private _answer;
     speak(message: string | Readable): void;
     private _speak;
-    sendAudio(audio: Readable): void;
-    private _sendAudio;
 }
 declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
-export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type Tool, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
+export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FallbackSTT, type FallbackSTTOptions, FallbackTTS, type FallbackTTSOptions, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type TTSEvents, type Tool, handleError, waitForParams };

package/dist/index.js CHANGED Viewed

@@ -37,7 +37,8 @@ __export(index_exports, {
   AUTO_SEMANTIC_TURN_PROMPT: () => AUTO_SEMANTIC_TURN_PROMPT,
   AUTO_SEMANTIC_TURN_TOOL_NAME: () => AUTO_SEMANTIC_TURN_TOOL_NAME,
   Agent: () => Agent,
-  FileSTT: () => FileSTT,
+  FallbackSTT: () => FallbackSTT,
+  FallbackTTS: () => FallbackTTS,
   Logger: () => Logger,
   MicdropClientCommands: () => MicdropClientCommands,
   MicdropError: () => MicdropError,
@@ -49,9 +50,6 @@ __export(index_exports, {
   MockTTS: () => MockTTS,
   STT: () => STT,
   TTS: () => TTS,
-  convertPCMToOpus: () => convertPCMToOpus,
-  convertToOpus: () => convertToOpus,
-  convertToPCM: () => convertToPCM,
   handleError: () => handleError,
   waitForParams: () => waitForParams
 });
@@ -276,41 +274,6 @@ var MockAgent = class extends Agent {
   }
 };
-// src/audio-convert.ts
-var import_ffmpeg = __toESM(require("@ffmpeg-installer/ffmpeg"));
-var import_fluent_ffmpeg = __toESM(require("fluent-ffmpeg"));
-var import_stream2 = require("stream");
-import_fluent_ffmpeg.default.setFfmpegPath(import_ffmpeg.default.path);
-function convertToPCM(audioStream, sampleRate = 16e3, bitDepth = 16) {
-  const pcmStream = new import_stream2.PassThrough();
-  (0, import_fluent_ffmpeg.default)(audioStream).audioChannels(1).audioFrequency(sampleRate).audioCodec(`pcm_s${bitDepth}le`).format(`s${bitDepth}le`).on("error", (error) => {
-    console.error("Error converting audio stream:", error.message);
-  }).pipe(pcmStream);
-  return pcmStream;
-}
-function convertToOpus(audioStream, sampleRate = 16e3) {
-  const webmStream = new import_stream2.PassThrough();
-  ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).pipe(webmStream);
-  return webmStream;
-}
-function convertPCMToOpus(audioStream, sampleRate = 16e3) {
-  const webmStream = new import_stream2.PassThrough();
-  ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).inputFormat("s16le").inputOptions(["-f s16le", "-ar 16000", "-ac 1"]).pipe(webmStream);
-  return webmStream;
-}
-function ffmpegToOpus(ffmpegCommand, sampleRate = 16e3) {
-  return ffmpegCommand.audioChannels(1).audioFrequency(sampleRate).audioCodec("libopus").format("webm").outputOptions([
-    "-application audio",
-    `-ac 1`,
-    `-ar ${sampleRate}`,
-    `-b:a 64k`,
-    `-f webm`,
-    `-map_metadata -1`
-  ]).on("error", (error) => {
-    console.error("Error converting to Opus: ", error.message);
-  });
-}
 // src/errors.ts
 var MicdropErrorCode = /* @__PURE__ */ ((MicdropErrorCode2) => {
   MicdropErrorCode2[MicdropErrorCode2["BadRequest"] = 4400] = "BadRequest";
@@ -346,7 +309,7 @@ var Logger = class {
 };
 // src/MicdropServer.ts
-var import_stream3 = require("stream");
+var import_stream2 = require("stream");
 // src/types.ts
 var MicdropClientCommands = /* @__PURE__ */ ((MicdropClientCommands2) => {
@@ -409,8 +372,12 @@ var MicdropServer = class {
         this.onAudioChunk(message);
       }
     };
-    this.onTranscript = async (transcript) => {
+    this.onTranscriptSTT = async (transcript) => {
       if (!this.config) return;
+      if (transcript === "") {
+        this.socket?.send("SkipAnswer" /* SkipAnswer */);
+        return;
+      }
       this.log(`User transcript: "${transcript}"`);
       this.config.agent.addUserMessage(transcript);
       if (!this.currentUserStream) {
@@ -419,10 +386,16 @@ var MicdropServer = class {
         this.answer();
       }
     };
+    this.onAudioTTS = (audio) => {
+      if (!this.socket) return;
+      this.log(`Send audio chunk (${audio.byteLength} bytes)`);
+      this.socket.send(audio);
+    };
     this.socket = socket;
     this.config = config;
     this.log(`Call started`);
-    this.config.stt.on("Transcript", this.onTranscript);
+    this.config.stt.on("Transcript", this.onTranscriptSTT);
+    this.config.tts.on("Audio", this.onAudioTTS);
     this.config.agent.on(
       "Message",
       (message) => this.socket?.send(
@@ -493,7 +466,7 @@ var MicdropServer = class {
     if (!this.config) return;
     this.userSpeechChunks = 0;
     this.currentUserStream?.end();
-    this.currentUserStream = new import_stream3.PassThrough();
+    this.currentUserStream = new import_stream2.PassThrough();
     this.config.stt.transcribe(this.currentUserStream);
     this.cancel();
   }
@@ -558,61 +531,20 @@ var MicdropServer = class {
     if (!this.socket || !this.config) return;
     let textStream;
     if (typeof message === "string") {
-      const stream = new import_stream3.PassThrough();
+      const stream = new import_stream2.PassThrough();
       stream.write(message);
       stream.end();
       textStream = stream;
     } else {
       textStream = message;
     }
-    const audio = this.config.tts.speak(textStream);
-    await this._sendAudio(audio);
-  }
-  sendAudio(audio) {
-    this.queueOperation(async () => {
-      await this._sendAudio(audio);
-    });
-  }
-  async _sendAudio(audio) {
-    if (!this.socket) return;
-    if (!audio.readable) {
-      this.log("Non readable audio, skipping", audio);
-      return;
-    }
-    await new Promise((resolve, reject) => {
-      audio.on("data", (chunk) => {
-        this.log(`Send audio chunk (${chunk.byteLength} bytes)`);
-        this.socket?.send(chunk);
-      });
-      audio.on("error", (error) => {
-        this.log("Error in audio stream", error);
-        reject(error);
-      });
-      audio.on("end", () => {
-        this.log("Audio stream ended");
-        resolve();
-      });
-    });
+    this.config.tts.speak(textStream);
   }
 };
 // src/stt/STT.ts
 var import_eventemitter32 = require("eventemitter3");
-var MIME_TYPE_TO_EXTENSION = {
-  "audio/wav": "wav",
-  "audio/ogg": "ogg",
-  "audio/mpeg": "mp3",
-  "audio/webm": "webm",
-  "audio/mp4": "mp4",
-  "audio/flac": "flac"
-};
 var STT = class extends import_eventemitter32.EventEmitter {
-  // Set stream of audio to transcribe
-  transcribe(audioStream) {
-    audioStream.once("data", (chunk) => {
-      this.mimeType = this.detectMimeType(chunk);
-    });
-  }
   log(...message) {
     this.logger?.log(...message);
   }
@@ -620,67 +552,72 @@ var STT = class extends import_eventemitter32.EventEmitter {
     this.log("Destroyed");
     this.removeAllListeners();
   }
-  get extension() {
-    return this.mimeType && MIME_TYPE_TO_EXTENSION[this.mimeType] || "bin";
-  }
-  detectMimeType(chunk) {
-    if (!chunk || chunk.byteLength === 0) {
-      throw new Error("Unable to detect mime type (empty chunk)");
-    }
-    const arr = new Uint8Array(chunk);
-    if (arr[0] === 26 && arr[1] === 69 && arr[2] === 223 && arr[3] === 163) {
-      return "audio/webm";
-    }
-    if (arr[0] === 79 && arr[1] === 103 && arr[2] === 103 && arr[3] === 83) {
-      return "audio/ogg";
-    }
-    if (arr[0] === 82 && arr[1] === 73 && arr[2] === 70 && arr[3] === 70 && arr[8] === 87 && arr[9] === 65 && arr[10] === 86 && arr[11] === 69) {
-      return "audio/wav";
-    }
-    if (arr[0] === 73 && arr[1] === 68 && arr[2] === 51) {
-      return "audio/mpeg";
-    }
-    if (arr[4] === 102 && arr[5] === 116 && arr[6] === 121 && arr[7] === 112) {
-      return "audio/mp4";
-    }
-    if (arr[0] === 102 && arr[1] === 76 && arr[2] === 97 && arr[3] === 67) {
-      return "audio/flac";
-    }
-    this.log("Unable to detect mime type, using default", chunk);
-    return "audio/wav";
-  }
-};
-// src/stt/FileSTT.ts
-var FileSTT = class extends STT {
-  transcribe(audioStream) {
-    super.transcribe(audioStream);
-    this.log("Converting stream to file...");
-    const chunks = [];
-    audioStream.on("data", (chunk) => {
-      chunks.push(chunk);
-    });
-    audioStream.on("end", async () => {
-      if (chunks.length === 0) return;
-      const arrayBuffer = Buffer.concat(chunks);
-      const file = new File([arrayBuffer], `audio.${this.extension}`, {
-        type: this.mimeType
-      });
-      this.log("Transcribing file...");
-      const transcript = await this.transcribeFile(file);
-      this.emit("Transcript", transcript);
-    });
-  }
 };
 // src/stt/MockSTT.ts
-var MockSTT = class extends FileSTT {
+var MockSTT = class extends STT {
   constructor() {
     super(...arguments);
     this.i = 0;
   }
-  async transcribeFile(file) {
-    return `User Message ${this.i++}`;
+  async transcribe() {
+    setTimeout(() => {
+      this.emit("Transcript", `User Message ${this.i++}`);
+    }, 300);
+  }
+};
+// src/stt/FallbackSTT.ts
+var import_stream3 = require("stream");
+var FallbackSTT = class extends STT {
+  // Start at -1 because we need to increment it before using it
+  constructor(options) {
+    super();
+    this.options = options;
+    this.stt = null;
+    this.sttIndex = -1;
+    this.onTranscript = (transcript) => {
+      this.emit("Transcript", transcript);
+    };
+    this.onFailed = (chunks) => {
+      this.log("STT failed, trying next STT");
+      this.startNextSTT();
+      if (chunks.length > 0) {
+        this.log("Sending audio chunks again");
+        const stream = new import_stream3.PassThrough();
+        this.stt?.transcribe(stream);
+        chunks.forEach((chunk) => stream.write(chunk));
+        stream.end();
+      }
+    };
+    if (this.options.factories.length === 0) {
+      throw new Error("FallbackSTT: No factories provided");
+    }
+    this.startNextSTT();
+  }
+  transcribe(audioStream) {
+    this.stt?.transcribe(audioStream);
+  }
+  destroy() {
+    super.destroy();
+    this.stt?.destroy();
+    this.stt = null;
+    this.sttIndex = -1;
+  }
+  startNextSTT() {
+    this.sttIndex++;
+    if (this.sttIndex >= this.options.factories.length) {
+      this.sttIndex = 0;
+    }
+    this.stt?.destroy();
+    this.stt = this.options.factories[this.sttIndex]();
+    this.stt.on("Transcript", this.onTranscript);
+    this.stt.on("Failed", this.onFailed);
+    setTimeout(() => {
+      if (this.stt && this.logger) {
+        this.stt.logger = new Logger(this.stt.constructor.name);
+      }
+    }, 0);
   }
 };
@@ -689,7 +626,8 @@ var fs = __toESM(require("fs"));
 var import_stream4 = require("stream");
 // src/tts/TTS.ts
-var TTS = class {
+var import_eventemitter33 = require("eventemitter3");
+var TTS = class extends import_eventemitter33.EventEmitter {
   log(...message) {
     this.logger?.log(...message);
   }
@@ -722,6 +660,63 @@ var MockTTS = class extends TTS {
   }
 };
+// src/tts/FallbackTTS.ts
+var import_stream5 = require("stream");
+var FallbackTTS = class extends TTS {
+  // Start at -1 because we need to increment it before using it
+  constructor(options) {
+    super();
+    this.options = options;
+    this.tts = null;
+    this.ttsIndex = -1;
+    this.onAudio = (audio) => {
+      this.emit("Audio", audio);
+    };
+    this.onFailed = (chunks) => {
+      this.log("TTS failed, trying next TTS");
+      this.startNextTTS();
+      if (chunks.length > 0) {
+        this.log("Sending text chunks again");
+        const stream = new import_stream5.PassThrough();
+        this.tts?.speak(stream);
+        chunks.forEach((chunk) => stream.write(chunk));
+        stream.end();
+      }
+    };
+    if (this.options.factories.length === 0) {
+      throw new Error("FallbackTTS: No factories provided");
+    }
+    this.startNextTTS();
+  }
+  speak(textStream) {
+    this.tts?.speak(textStream);
+  }
+  cancel() {
+    this.tts?.cancel();
+  }
+  destroy() {
+    super.destroy();
+    this.tts?.destroy();
+    this.tts = null;
+    this.ttsIndex = -1;
+  }
+  startNextTTS() {
+    this.ttsIndex++;
+    if (this.ttsIndex >= this.options.factories.length) {
+      this.ttsIndex = 0;
+    }
+    this.tts?.destroy();
+    this.tts = this.options.factories[this.ttsIndex]();
+    this.tts.on("Audio", this.onAudio);
+    this.tts.on("Failed", this.onFailed);
+    setTimeout(() => {
+      if (this.tts && this.logger) {
+        this.tts.logger = new Logger(this.tts.constructor.name);
+      }
+    }, 0);
+  }
+};
 // src/waitForParams.ts
 async function waitForParams(socket, validate) {
   return new Promise((resolve, reject) => {
@@ -750,7 +745,8 @@ async function waitForParams(socket, validate) {
   AUTO_SEMANTIC_TURN_PROMPT,
   AUTO_SEMANTIC_TURN_TOOL_NAME,
   Agent,
-  FileSTT,
+  FallbackSTT,
+  FallbackTTS,
   Logger,
   MicdropClientCommands,
   MicdropError,
@@ -762,9 +758,6 @@ async function waitForParams(socket, validate) {
   MockTTS,
   STT,
   TTS,
-  convertPCMToOpus,
-  convertToOpus,
-  convertToPCM,
   handleError,
   waitForParams
 });