@micdrop/server 2.0.13 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,9 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Rolebase
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package/dist/index.d.mts CHANGED
@@ -4,7 +4,7 @@ import { z } from 'zod';
4
4
  import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
5
5
 
6
6
  declare class Logger {
7
- private readonly name;
7
+ name: string;
8
8
  constructor(name: string);
9
9
  log(...message: any[]): void;
10
10
  }
@@ -145,10 +145,6 @@ declare class MockAgent extends Agent {
145
145
  cancel(): void;
146
146
  }
147
147
 
148
- declare function convertToPCM(audioStream: Readable, sampleRate?: number, bitDepth?: number): PassThrough;
149
- declare function convertToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
150
- declare function convertPCMToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
151
-
152
148
  declare enum MicdropErrorCode {
153
149
  BadRequest = 4400,
154
150
  Unauthorized = 4401,
@@ -160,43 +156,44 @@ declare class MicdropError extends Error {
160
156
  }
161
157
  declare function handleError(socket: WebSocket, error: unknown): void;
162
158
 
163
- declare const MIME_TYPE_TO_EXTENSION: {
164
- readonly 'audio/wav': "wav";
165
- readonly 'audio/ogg': "ogg";
166
- readonly 'audio/mpeg': "mp3";
167
- readonly 'audio/webm': "webm";
168
- readonly 'audio/mp4': "mp4";
169
- readonly 'audio/flac': "flac";
170
- };
171
159
  interface STTEvents {
172
160
  Transcript: [string];
161
+ Failed: [Buffer[]];
173
162
  }
174
163
  declare abstract class STT extends EventEmitter<STTEvents> {
175
- protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
176
164
  logger?: Logger;
177
- transcribe(audioStream: Readable): void;
165
+ abstract transcribe(audioStream: Readable): void;
178
166
  protected log(...message: any[]): void;
179
167
  destroy(): void;
180
- protected get extension(): string;
181
- private detectMimeType;
182
168
  }
183
169
 
184
- /**
185
- * Abstract class for STT, converting stream to file before transcribing
186
- */
187
- declare abstract class FileSTT extends STT {
188
- abstract transcribeFile(file: File): Promise<string>;
189
- transcribe(audioStream: Readable): void;
170
+ declare class MockSTT extends STT {
171
+ private i;
172
+ transcribe(): Promise<void>;
190
173
  }
191
174
 
192
- declare class MockSTT extends FileSTT {
193
- private i;
194
- transcribeFile(file: File): Promise<string>;
175
+ interface FallbackSTTOptions {
176
+ factories: Array<() => STT>;
177
+ }
178
+ declare class FallbackSTT extends STT {
179
+ private readonly options;
180
+ private stt;
181
+ private sttIndex;
182
+ constructor(options: FallbackSTTOptions);
183
+ transcribe(audioStream: Readable): void;
184
+ destroy(): void;
185
+ private startNextSTT;
186
+ private onTranscript;
187
+ private onFailed;
195
188
  }
196
189
 
197
- declare abstract class TTS {
190
+ interface TTSEvents {
191
+ Audio: [Buffer];
192
+ Failed: [string[]];
193
+ }
194
+ declare abstract class TTS extends EventEmitter<TTSEvents> {
198
195
  logger?: Logger;
199
- abstract speak(textStream: Readable): Readable;
196
+ abstract speak(textStream: Readable): void;
200
197
  abstract cancel(): void;
201
198
  protected log(...message: any[]): void;
202
199
  destroy(): void;
@@ -209,6 +206,22 @@ declare class MockTTS extends TTS {
209
206
  cancel(): void;
210
207
  }
211
208
 
209
+ interface FallbackTTSOptions {
210
+ factories: Array<() => TTS>;
211
+ }
212
+ declare class FallbackTTS extends TTS {
213
+ private readonly options;
214
+ private tts;
215
+ private ttsIndex;
216
+ constructor(options: FallbackTTSOptions);
217
+ speak(textStream: Readable): void;
218
+ cancel(): void;
219
+ destroy(): void;
220
+ private startNextTTS;
221
+ private onAudio;
222
+ private onFailed;
223
+ }
224
+
212
225
  interface MicdropConfig {
213
226
  firstMessage?: string;
214
227
  generateFirstMessage?: boolean;
@@ -238,16 +251,15 @@ declare class MicdropServer {
238
251
  private onMute;
239
252
  private onStartSpeaking;
240
253
  private onStopSpeaking;
241
- private onTranscript;
254
+ private onTranscriptSTT;
255
+ private onAudioTTS;
242
256
  private sendFirstMessage;
243
257
  answer(): void;
244
258
  private _answer;
245
259
  speak(message: string | Readable): void;
246
260
  private _speak;
247
- sendAudio(audio: Readable): void;
248
- private _sendAudio;
249
261
  }
250
262
 
251
263
  declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
252
264
 
253
- export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type Tool, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
265
+ export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FallbackSTT, type FallbackSTTOptions, FallbackTTS, type FallbackTTSOptions, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type TTSEvents, type Tool, handleError, waitForParams };
package/dist/index.d.ts CHANGED
@@ -4,7 +4,7 @@ import { z } from 'zod';
4
4
  import WebSocket, { WebSocket as WebSocket$1 } from 'ws';
5
5
 
6
6
  declare class Logger {
7
- private readonly name;
7
+ name: string;
8
8
  constructor(name: string);
9
9
  log(...message: any[]): void;
10
10
  }
@@ -145,10 +145,6 @@ declare class MockAgent extends Agent {
145
145
  cancel(): void;
146
146
  }
147
147
 
148
- declare function convertToPCM(audioStream: Readable, sampleRate?: number, bitDepth?: number): PassThrough;
149
- declare function convertToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
150
- declare function convertPCMToOpus(audioStream: Readable, sampleRate?: number): PassThrough;
151
-
152
148
  declare enum MicdropErrorCode {
153
149
  BadRequest = 4400,
154
150
  Unauthorized = 4401,
@@ -160,43 +156,44 @@ declare class MicdropError extends Error {
160
156
  }
161
157
  declare function handleError(socket: WebSocket, error: unknown): void;
162
158
 
163
- declare const MIME_TYPE_TO_EXTENSION: {
164
- readonly 'audio/wav': "wav";
165
- readonly 'audio/ogg': "ogg";
166
- readonly 'audio/mpeg': "mp3";
167
- readonly 'audio/webm': "webm";
168
- readonly 'audio/mp4': "mp4";
169
- readonly 'audio/flac': "flac";
170
- };
171
159
  interface STTEvents {
172
160
  Transcript: [string];
161
+ Failed: [Buffer[]];
173
162
  }
174
163
  declare abstract class STT extends EventEmitter<STTEvents> {
175
- protected mimeType?: keyof typeof MIME_TYPE_TO_EXTENSION;
176
164
  logger?: Logger;
177
- transcribe(audioStream: Readable): void;
165
+ abstract transcribe(audioStream: Readable): void;
178
166
  protected log(...message: any[]): void;
179
167
  destroy(): void;
180
- protected get extension(): string;
181
- private detectMimeType;
182
168
  }
183
169
 
184
- /**
185
- * Abstract class for STT, converting stream to file before transcribing
186
- */
187
- declare abstract class FileSTT extends STT {
188
- abstract transcribeFile(file: File): Promise<string>;
189
- transcribe(audioStream: Readable): void;
170
+ declare class MockSTT extends STT {
171
+ private i;
172
+ transcribe(): Promise<void>;
190
173
  }
191
174
 
192
- declare class MockSTT extends FileSTT {
193
- private i;
194
- transcribeFile(file: File): Promise<string>;
175
+ interface FallbackSTTOptions {
176
+ factories: Array<() => STT>;
177
+ }
178
+ declare class FallbackSTT extends STT {
179
+ private readonly options;
180
+ private stt;
181
+ private sttIndex;
182
+ constructor(options: FallbackSTTOptions);
183
+ transcribe(audioStream: Readable): void;
184
+ destroy(): void;
185
+ private startNextSTT;
186
+ private onTranscript;
187
+ private onFailed;
195
188
  }
196
189
 
197
- declare abstract class TTS {
190
+ interface TTSEvents {
191
+ Audio: [Buffer];
192
+ Failed: [string[]];
193
+ }
194
+ declare abstract class TTS extends EventEmitter<TTSEvents> {
198
195
  logger?: Logger;
199
- abstract speak(textStream: Readable): Readable;
196
+ abstract speak(textStream: Readable): void;
200
197
  abstract cancel(): void;
201
198
  protected log(...message: any[]): void;
202
199
  destroy(): void;
@@ -209,6 +206,22 @@ declare class MockTTS extends TTS {
209
206
  cancel(): void;
210
207
  }
211
208
 
209
+ interface FallbackTTSOptions {
210
+ factories: Array<() => TTS>;
211
+ }
212
+ declare class FallbackTTS extends TTS {
213
+ private readonly options;
214
+ private tts;
215
+ private ttsIndex;
216
+ constructor(options: FallbackTTSOptions);
217
+ speak(textStream: Readable): void;
218
+ cancel(): void;
219
+ destroy(): void;
220
+ private startNextTTS;
221
+ private onAudio;
222
+ private onFailed;
223
+ }
224
+
212
225
  interface MicdropConfig {
213
226
  firstMessage?: string;
214
227
  generateFirstMessage?: boolean;
@@ -238,16 +251,15 @@ declare class MicdropServer {
238
251
  private onMute;
239
252
  private onStartSpeaking;
240
253
  private onStopSpeaking;
241
- private onTranscript;
254
+ private onTranscriptSTT;
255
+ private onAudioTTS;
242
256
  private sendFirstMessage;
243
257
  answer(): void;
244
258
  private _answer;
245
259
  speak(message: string | Readable): void;
246
260
  private _speak;
247
- sendAudio(audio: Readable): void;
248
- private _sendAudio;
249
261
  }
250
262
 
251
263
  declare function waitForParams<CallParams>(socket: WebSocket$1, validate: (params: any) => CallParams): Promise<CallParams>;
252
264
 
253
- export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FileSTT, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type Tool, convertPCMToOpus, convertToOpus, convertToPCM, handleError, waitForParams };
265
+ export { AUTO_END_CALL_PROMPT, AUTO_END_CALL_TOOL_NAME, AUTO_IGNORE_USER_NOISE_PROMPT, AUTO_IGNORE_USER_NOISE_TOOL_NAME, AUTO_SEMANTIC_TURN_PROMPT, AUTO_SEMANTIC_TURN_TOOL_NAME, Agent, type AgentEvents, type AgentOptions, type DeepPartial, type ExtractJsonOptions, type ExtractOptions, type ExtractTagOptions, FallbackSTT, type FallbackSTTOptions, FallbackTTS, type FallbackTTSOptions, Logger, type MicdropAnswerMetadata, type MicdropCallSummary, MicdropClientCommands, type MicdropConfig, type MicdropConversation, type MicdropConversationItem, type MicdropConversationMessage, type MicdropConversationToolCall, type MicdropConversationToolResult, MicdropError, MicdropErrorCode, MicdropServer, MicdropServerCommands, type MicdropToolCall, MockAgent, MockSTT, MockTTS, STT, type STTEvents, TTS, type TTSEvents, type Tool, handleError, waitForParams };
package/dist/index.js CHANGED
@@ -37,7 +37,8 @@ __export(index_exports, {
37
37
  AUTO_SEMANTIC_TURN_PROMPT: () => AUTO_SEMANTIC_TURN_PROMPT,
38
38
  AUTO_SEMANTIC_TURN_TOOL_NAME: () => AUTO_SEMANTIC_TURN_TOOL_NAME,
39
39
  Agent: () => Agent,
40
- FileSTT: () => FileSTT,
40
+ FallbackSTT: () => FallbackSTT,
41
+ FallbackTTS: () => FallbackTTS,
41
42
  Logger: () => Logger,
42
43
  MicdropClientCommands: () => MicdropClientCommands,
43
44
  MicdropError: () => MicdropError,
@@ -49,9 +50,6 @@ __export(index_exports, {
49
50
  MockTTS: () => MockTTS,
50
51
  STT: () => STT,
51
52
  TTS: () => TTS,
52
- convertPCMToOpus: () => convertPCMToOpus,
53
- convertToOpus: () => convertToOpus,
54
- convertToPCM: () => convertToPCM,
55
53
  handleError: () => handleError,
56
54
  waitForParams: () => waitForParams
57
55
  });
@@ -276,41 +274,6 @@ var MockAgent = class extends Agent {
276
274
  }
277
275
  };
278
276
 
279
- // src/audio-convert.ts
280
- var import_ffmpeg = __toESM(require("@ffmpeg-installer/ffmpeg"));
281
- var import_fluent_ffmpeg = __toESM(require("fluent-ffmpeg"));
282
- var import_stream2 = require("stream");
283
- import_fluent_ffmpeg.default.setFfmpegPath(import_ffmpeg.default.path);
284
- function convertToPCM(audioStream, sampleRate = 16e3, bitDepth = 16) {
285
- const pcmStream = new import_stream2.PassThrough();
286
- (0, import_fluent_ffmpeg.default)(audioStream).audioChannels(1).audioFrequency(sampleRate).audioCodec(`pcm_s${bitDepth}le`).format(`s${bitDepth}le`).on("error", (error) => {
287
- console.error("Error converting audio stream:", error.message);
288
- }).pipe(pcmStream);
289
- return pcmStream;
290
- }
291
- function convertToOpus(audioStream, sampleRate = 16e3) {
292
- const webmStream = new import_stream2.PassThrough();
293
- ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).pipe(webmStream);
294
- return webmStream;
295
- }
296
- function convertPCMToOpus(audioStream, sampleRate = 16e3) {
297
- const webmStream = new import_stream2.PassThrough();
298
- ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).inputFormat("s16le").inputOptions(["-f s16le", "-ar 16000", "-ac 1"]).pipe(webmStream);
299
- return webmStream;
300
- }
301
- function ffmpegToOpus(ffmpegCommand, sampleRate = 16e3) {
302
- return ffmpegCommand.audioChannels(1).audioFrequency(sampleRate).audioCodec("libopus").format("webm").outputOptions([
303
- "-application audio",
304
- `-ac 1`,
305
- `-ar ${sampleRate}`,
306
- `-b:a 64k`,
307
- `-f webm`,
308
- `-map_metadata -1`
309
- ]).on("error", (error) => {
310
- console.error("Error converting to Opus: ", error.message);
311
- });
312
- }
313
-
314
277
  // src/errors.ts
315
278
  var MicdropErrorCode = /* @__PURE__ */ ((MicdropErrorCode2) => {
316
279
  MicdropErrorCode2[MicdropErrorCode2["BadRequest"] = 4400] = "BadRequest";
@@ -346,7 +309,7 @@ var Logger = class {
346
309
  };
347
310
 
348
311
  // src/MicdropServer.ts
349
- var import_stream3 = require("stream");
312
+ var import_stream2 = require("stream");
350
313
 
351
314
  // src/types.ts
352
315
  var MicdropClientCommands = /* @__PURE__ */ ((MicdropClientCommands2) => {
@@ -409,8 +372,12 @@ var MicdropServer = class {
409
372
  this.onAudioChunk(message);
410
373
  }
411
374
  };
412
- this.onTranscript = async (transcript) => {
375
+ this.onTranscriptSTT = async (transcript) => {
413
376
  if (!this.config) return;
377
+ if (transcript === "") {
378
+ this.socket?.send("SkipAnswer" /* SkipAnswer */);
379
+ return;
380
+ }
414
381
  this.log(`User transcript: "${transcript}"`);
415
382
  this.config.agent.addUserMessage(transcript);
416
383
  if (!this.currentUserStream) {
@@ -419,10 +386,16 @@ var MicdropServer = class {
419
386
  this.answer();
420
387
  }
421
388
  };
389
+ this.onAudioTTS = (audio) => {
390
+ if (!this.socket) return;
391
+ this.log(`Send audio chunk (${audio.byteLength} bytes)`);
392
+ this.socket.send(audio);
393
+ };
422
394
  this.socket = socket;
423
395
  this.config = config;
424
396
  this.log(`Call started`);
425
- this.config.stt.on("Transcript", this.onTranscript);
397
+ this.config.stt.on("Transcript", this.onTranscriptSTT);
398
+ this.config.tts.on("Audio", this.onAudioTTS);
426
399
  this.config.agent.on(
427
400
  "Message",
428
401
  (message) => this.socket?.send(
@@ -493,7 +466,7 @@ var MicdropServer = class {
493
466
  if (!this.config) return;
494
467
  this.userSpeechChunks = 0;
495
468
  this.currentUserStream?.end();
496
- this.currentUserStream = new import_stream3.PassThrough();
469
+ this.currentUserStream = new import_stream2.PassThrough();
497
470
  this.config.stt.transcribe(this.currentUserStream);
498
471
  this.cancel();
499
472
  }
@@ -558,61 +531,20 @@ var MicdropServer = class {
558
531
  if (!this.socket || !this.config) return;
559
532
  let textStream;
560
533
  if (typeof message === "string") {
561
- const stream = new import_stream3.PassThrough();
534
+ const stream = new import_stream2.PassThrough();
562
535
  stream.write(message);
563
536
  stream.end();
564
537
  textStream = stream;
565
538
  } else {
566
539
  textStream = message;
567
540
  }
568
- const audio = this.config.tts.speak(textStream);
569
- await this._sendAudio(audio);
570
- }
571
- sendAudio(audio) {
572
- this.queueOperation(async () => {
573
- await this._sendAudio(audio);
574
- });
575
- }
576
- async _sendAudio(audio) {
577
- if (!this.socket) return;
578
- if (!audio.readable) {
579
- this.log("Non readable audio, skipping", audio);
580
- return;
581
- }
582
- await new Promise((resolve, reject) => {
583
- audio.on("data", (chunk) => {
584
- this.log(`Send audio chunk (${chunk.byteLength} bytes)`);
585
- this.socket?.send(chunk);
586
- });
587
- audio.on("error", (error) => {
588
- this.log("Error in audio stream", error);
589
- reject(error);
590
- });
591
- audio.on("end", () => {
592
- this.log("Audio stream ended");
593
- resolve();
594
- });
595
- });
541
+ this.config.tts.speak(textStream);
596
542
  }
597
543
  };
598
544
 
599
545
  // src/stt/STT.ts
600
546
  var import_eventemitter32 = require("eventemitter3");
601
- var MIME_TYPE_TO_EXTENSION = {
602
- "audio/wav": "wav",
603
- "audio/ogg": "ogg",
604
- "audio/mpeg": "mp3",
605
- "audio/webm": "webm",
606
- "audio/mp4": "mp4",
607
- "audio/flac": "flac"
608
- };
609
547
  var STT = class extends import_eventemitter32.EventEmitter {
610
- // Set stream of audio to transcribe
611
- transcribe(audioStream) {
612
- audioStream.once("data", (chunk) => {
613
- this.mimeType = this.detectMimeType(chunk);
614
- });
615
- }
616
548
  log(...message) {
617
549
  this.logger?.log(...message);
618
550
  }
@@ -620,67 +552,72 @@ var STT = class extends import_eventemitter32.EventEmitter {
620
552
  this.log("Destroyed");
621
553
  this.removeAllListeners();
622
554
  }
623
- get extension() {
624
- return this.mimeType && MIME_TYPE_TO_EXTENSION[this.mimeType] || "bin";
625
- }
626
- detectMimeType(chunk) {
627
- if (!chunk || chunk.byteLength === 0) {
628
- throw new Error("Unable to detect mime type (empty chunk)");
629
- }
630
- const arr = new Uint8Array(chunk);
631
- if (arr[0] === 26 && arr[1] === 69 && arr[2] === 223 && arr[3] === 163) {
632
- return "audio/webm";
633
- }
634
- if (arr[0] === 79 && arr[1] === 103 && arr[2] === 103 && arr[3] === 83) {
635
- return "audio/ogg";
636
- }
637
- if (arr[0] === 82 && arr[1] === 73 && arr[2] === 70 && arr[3] === 70 && arr[8] === 87 && arr[9] === 65 && arr[10] === 86 && arr[11] === 69) {
638
- return "audio/wav";
639
- }
640
- if (arr[0] === 73 && arr[1] === 68 && arr[2] === 51) {
641
- return "audio/mpeg";
642
- }
643
- if (arr[4] === 102 && arr[5] === 116 && arr[6] === 121 && arr[7] === 112) {
644
- return "audio/mp4";
645
- }
646
- if (arr[0] === 102 && arr[1] === 76 && arr[2] === 97 && arr[3] === 67) {
647
- return "audio/flac";
648
- }
649
- this.log("Unable to detect mime type, using default", chunk);
650
- return "audio/wav";
651
- }
652
- };
653
-
654
- // src/stt/FileSTT.ts
655
- var FileSTT = class extends STT {
656
- transcribe(audioStream) {
657
- super.transcribe(audioStream);
658
- this.log("Converting stream to file...");
659
- const chunks = [];
660
- audioStream.on("data", (chunk) => {
661
- chunks.push(chunk);
662
- });
663
- audioStream.on("end", async () => {
664
- if (chunks.length === 0) return;
665
- const arrayBuffer = Buffer.concat(chunks);
666
- const file = new File([arrayBuffer], `audio.${this.extension}`, {
667
- type: this.mimeType
668
- });
669
- this.log("Transcribing file...");
670
- const transcript = await this.transcribeFile(file);
671
- this.emit("Transcript", transcript);
672
- });
673
- }
674
555
  };
675
556
 
676
557
  // src/stt/MockSTT.ts
677
- var MockSTT = class extends FileSTT {
558
+ var MockSTT = class extends STT {
678
559
  constructor() {
679
560
  super(...arguments);
680
561
  this.i = 0;
681
562
  }
682
- async transcribeFile(file) {
683
- return `User Message ${this.i++}`;
563
+ async transcribe() {
564
+ setTimeout(() => {
565
+ this.emit("Transcript", `User Message ${this.i++}`);
566
+ }, 300);
567
+ }
568
+ };
569
+
570
+ // src/stt/FallbackSTT.ts
571
+ var import_stream3 = require("stream");
572
+ var FallbackSTT = class extends STT {
573
+ // Start at -1 because we need to increment it before using it
574
+ constructor(options) {
575
+ super();
576
+ this.options = options;
577
+ this.stt = null;
578
+ this.sttIndex = -1;
579
+ this.onTranscript = (transcript) => {
580
+ this.emit("Transcript", transcript);
581
+ };
582
+ this.onFailed = (chunks) => {
583
+ this.log("STT failed, trying next STT");
584
+ this.startNextSTT();
585
+ if (chunks.length > 0) {
586
+ this.log("Sending audio chunks again");
587
+ const stream = new import_stream3.PassThrough();
588
+ this.stt?.transcribe(stream);
589
+ chunks.forEach((chunk) => stream.write(chunk));
590
+ stream.end();
591
+ }
592
+ };
593
+ if (this.options.factories.length === 0) {
594
+ throw new Error("FallbackSTT: No factories provided");
595
+ }
596
+ this.startNextSTT();
597
+ }
598
+ transcribe(audioStream) {
599
+ this.stt?.transcribe(audioStream);
600
+ }
601
+ destroy() {
602
+ super.destroy();
603
+ this.stt?.destroy();
604
+ this.stt = null;
605
+ this.sttIndex = -1;
606
+ }
607
+ startNextSTT() {
608
+ this.sttIndex++;
609
+ if (this.sttIndex >= this.options.factories.length) {
610
+ this.sttIndex = 0;
611
+ }
612
+ this.stt?.destroy();
613
+ this.stt = this.options.factories[this.sttIndex]();
614
+ this.stt.on("Transcript", this.onTranscript);
615
+ this.stt.on("Failed", this.onFailed);
616
+ setTimeout(() => {
617
+ if (this.stt && this.logger) {
618
+ this.stt.logger = new Logger(this.stt.constructor.name);
619
+ }
620
+ }, 0);
684
621
  }
685
622
  };
686
623
 
@@ -689,7 +626,8 @@ var fs = __toESM(require("fs"));
689
626
  var import_stream4 = require("stream");
690
627
 
691
628
  // src/tts/TTS.ts
692
- var TTS = class {
629
+ var import_eventemitter33 = require("eventemitter3");
630
+ var TTS = class extends import_eventemitter33.EventEmitter {
693
631
  log(...message) {
694
632
  this.logger?.log(...message);
695
633
  }
@@ -722,6 +660,63 @@ var MockTTS = class extends TTS {
722
660
  }
723
661
  };
724
662
 
663
+ // src/tts/FallbackTTS.ts
664
+ var import_stream5 = require("stream");
665
+ var FallbackTTS = class extends TTS {
666
+ // Start at -1 because we need to increment it before using it
667
+ constructor(options) {
668
+ super();
669
+ this.options = options;
670
+ this.tts = null;
671
+ this.ttsIndex = -1;
672
+ this.onAudio = (audio) => {
673
+ this.emit("Audio", audio);
674
+ };
675
+ this.onFailed = (chunks) => {
676
+ this.log("TTS failed, trying next TTS");
677
+ this.startNextTTS();
678
+ if (chunks.length > 0) {
679
+ this.log("Sending text chunks again");
680
+ const stream = new import_stream5.PassThrough();
681
+ this.tts?.speak(stream);
682
+ chunks.forEach((chunk) => stream.write(chunk));
683
+ stream.end();
684
+ }
685
+ };
686
+ if (this.options.factories.length === 0) {
687
+ throw new Error("FallbackTTS: No factories provided");
688
+ }
689
+ this.startNextTTS();
690
+ }
691
+ speak(textStream) {
692
+ this.tts?.speak(textStream);
693
+ }
694
+ cancel() {
695
+ this.tts?.cancel();
696
+ }
697
+ destroy() {
698
+ super.destroy();
699
+ this.tts?.destroy();
700
+ this.tts = null;
701
+ this.ttsIndex = -1;
702
+ }
703
+ startNextTTS() {
704
+ this.ttsIndex++;
705
+ if (this.ttsIndex >= this.options.factories.length) {
706
+ this.ttsIndex = 0;
707
+ }
708
+ this.tts?.destroy();
709
+ this.tts = this.options.factories[this.ttsIndex]();
710
+ this.tts.on("Audio", this.onAudio);
711
+ this.tts.on("Failed", this.onFailed);
712
+ setTimeout(() => {
713
+ if (this.tts && this.logger) {
714
+ this.tts.logger = new Logger(this.tts.constructor.name);
715
+ }
716
+ }, 0);
717
+ }
718
+ };
719
+
725
720
  // src/waitForParams.ts
726
721
  async function waitForParams(socket, validate) {
727
722
  return new Promise((resolve, reject) => {
@@ -750,7 +745,8 @@ async function waitForParams(socket, validate) {
750
745
  AUTO_SEMANTIC_TURN_PROMPT,
751
746
  AUTO_SEMANTIC_TURN_TOOL_NAME,
752
747
  Agent,
753
- FileSTT,
748
+ FallbackSTT,
749
+ FallbackTTS,
754
750
  Logger,
755
751
  MicdropClientCommands,
756
752
  MicdropError,
@@ -762,9 +758,6 @@ async function waitForParams(socket, validate) {
762
758
  MockTTS,
763
759
  STT,
764
760
  TTS,
765
- convertPCMToOpus,
766
- convertToOpus,
767
- convertToPCM,
768
761
  handleError,
769
762
  waitForParams
770
763
  });