@micdrop/server 1.7.1 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -30,271 +30,500 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/index.ts
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
- CallClientCommands: () => CallClientCommands,
34
- CallError: () => CallError,
35
- CallErrorCode: () => CallErrorCode,
36
- CallServer: () => CallServer,
37
- CallServerCommands: () => CallServerCommands,
33
+ Agent: () => Agent,
34
+ FileSTT: () => FileSTT,
35
+ Logger: () => Logger,
36
+ MicdropClientCommands: () => MicdropClientCommands,
37
+ MicdropError: () => MicdropError,
38
+ MicdropErrorCode: () => MicdropErrorCode,
39
+ MicdropServer: () => MicdropServer,
40
+ MicdropServerCommands: () => MicdropServerCommands,
41
+ MockAgent: () => MockAgent,
42
+ MockSTT: () => MockSTT,
43
+ MockTTS: () => MockTTS,
44
+ STT: () => STT,
45
+ TTS: () => TTS,
46
+ convertPCMToOpus: () => convertPCMToOpus,
47
+ convertToOpus: () => convertToOpus,
48
+ convertToPCM: () => convertToPCM,
38
49
  handleError: () => handleError,
39
50
  waitForParams: () => waitForParams
40
51
  });
41
52
  module.exports = __toCommonJS(index_exports);
42
53
 
43
- // src/CallServer.ts
44
- var fs = __toESM(require("fs"));
54
+ // src/agent/Agent.ts
55
+ var import_eventemitter3 = __toESM(require("eventemitter3"));
56
+ var Agent = class extends import_eventemitter3.default {
57
+ constructor(options) {
58
+ super();
59
+ this.options = options;
60
+ this.conversation = [{ role: "system", content: options.systemPrompt }];
61
+ }
62
+ addUserMessage(text) {
63
+ this.addMessage("user", text);
64
+ }
65
+ addAssistantMessage(text) {
66
+ this.addMessage("assistant", text);
67
+ }
68
+ addMessage(role, text) {
69
+ this.log(`Adding ${role} message to conversation: ${text}`);
70
+ const message = {
71
+ role,
72
+ content: text
73
+ };
74
+ this.conversation.push(message);
75
+ this.emit("Message", message);
76
+ }
77
+ endCall() {
78
+ this.log("Ending call");
79
+ this.emit("EndCall");
80
+ }
81
+ cancelLastUserMessage() {
82
+ this.log("Cancelling last user message");
83
+ const lastMessage = this.conversation[this.conversation.length - 1];
84
+ if (lastMessage?.role !== "user") return;
85
+ this.conversation.pop();
86
+ this.emit("CancelLastUserMessage");
87
+ }
88
+ cancelLastAssistantMessage() {
89
+ this.log("Cancelling last assistant message");
90
+ const lastMessage = this.conversation[this.conversation.length - 1];
91
+ if (lastMessage?.role !== "assistant") return;
92
+ this.conversation.pop();
93
+ this.emit("CancelLastAssistantMessage");
94
+ }
95
+ skipAnswer() {
96
+ this.log("Skipping answer");
97
+ this.emit("SkipAnswer");
98
+ }
99
+ createTextPromise() {
100
+ const result = {};
101
+ result.promise = new Promise((resolve, reject) => {
102
+ result.resolve = resolve;
103
+ result.reject = reject;
104
+ });
105
+ return result;
106
+ }
107
+ log(...message) {
108
+ this.logger?.log(...message);
109
+ }
110
+ destroy() {
111
+ this.log("Destroyed");
112
+ this.removeAllListeners();
113
+ this.cancel();
114
+ }
115
+ };
116
+
117
+ // src/agent/MockAgent.ts
118
+ var import_stream = require("stream");
119
+ var MockAgent = class extends Agent {
120
+ constructor() {
121
+ super({ systemPrompt: "" });
122
+ this.i = 0;
123
+ }
124
+ answer() {
125
+ const stream = new import_stream.PassThrough();
126
+ const textPromise = this.createTextPromise();
127
+ const message = `Assistant Message ${this.i++}`;
128
+ this.addAssistantMessage(message);
129
+ stream.write(message);
130
+ stream.end();
131
+ textPromise.resolve(message);
132
+ return { message: textPromise.promise, stream };
133
+ }
134
+ cancel() {
135
+ }
136
+ };
137
+
138
+ // src/audio-convert.ts
139
+ var import_ffmpeg = __toESM(require("@ffmpeg-installer/ffmpeg"));
140
+ var import_fluent_ffmpeg = __toESM(require("fluent-ffmpeg"));
141
+ var import_stream2 = require("stream");
142
+ import_fluent_ffmpeg.default.setFfmpegPath(import_ffmpeg.default.path);
143
+ function convertToPCM(audioStream, sampleRate = 16e3, bitDepth = 16) {
144
+ const pcmStream = new import_stream2.PassThrough();
145
+ (0, import_fluent_ffmpeg.default)(audioStream).audioChannels(1).audioFrequency(sampleRate).audioCodec(`pcm_s${bitDepth}le`).format(`s${bitDepth}le`).on("error", (error) => {
146
+ console.error("Error converting audio stream:", error.message);
147
+ }).pipe(pcmStream);
148
+ return pcmStream;
149
+ }
150
+ function convertToOpus(audioStream, sampleRate = 16e3) {
151
+ const webmStream = new import_stream2.PassThrough();
152
+ ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).pipe(webmStream);
153
+ return webmStream;
154
+ }
155
+ function convertPCMToOpus(audioStream, sampleRate = 16e3) {
156
+ const webmStream = new import_stream2.PassThrough();
157
+ ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).inputFormat("s16le").inputOptions(["-f s16le", "-ar 16000", "-ac 1"]).pipe(webmStream);
158
+ return webmStream;
159
+ }
160
+ function ffmpegToOpus(ffmpegCommand, sampleRate = 16e3) {
161
+ return ffmpegCommand.audioChannels(1).audioFrequency(sampleRate).audioCodec("libopus").format("webm").outputOptions([
162
+ "-application audio",
163
+ `-ac 1`,
164
+ `-ar ${sampleRate}`,
165
+ `-b:a 64k`,
166
+ `-f webm`,
167
+ `-map_metadata -1`
168
+ ]).on("error", (error) => {
169
+ console.error("Error converting to Opus: ", error.message);
170
+ });
171
+ }
172
+
173
+ // src/errors.ts
174
+ var MicdropErrorCode = /* @__PURE__ */ ((MicdropErrorCode2) => {
175
+ MicdropErrorCode2[MicdropErrorCode2["BadRequest"] = 4400] = "BadRequest";
176
+ MicdropErrorCode2[MicdropErrorCode2["Unauthorized"] = 4401] = "Unauthorized";
177
+ MicdropErrorCode2[MicdropErrorCode2["NotFound"] = 4404] = "NotFound";
178
+ return MicdropErrorCode2;
179
+ })(MicdropErrorCode || {});
180
+ var MicdropError = class extends Error {
181
+ constructor(code, message) {
182
+ super(message);
183
+ this.code = code;
184
+ }
185
+ };
186
+ function handleError(socket, error) {
187
+ if (error instanceof MicdropError) {
188
+ socket.close(error.code, error.message);
189
+ } else {
190
+ console.error(error);
191
+ socket.close(1011);
192
+ }
193
+ socket.terminate();
194
+ }
195
+
196
+ // src/Logger.ts
197
+ var Logger = class {
198
+ constructor(name) {
199
+ this.name = name;
200
+ }
201
+ log(...message) {
202
+ const time = process.uptime().toFixed(3);
203
+ console.log(`[${this.name} ${time}]`, ...message);
204
+ }
205
+ };
206
+
207
+ // src/MicdropServer.ts
208
+ var import_stream3 = require("stream");
45
209
 
46
210
  // src/types.ts
47
- var CallClientCommands = /* @__PURE__ */ ((CallClientCommands2) => {
48
- CallClientCommands2["StartSpeaking"] = "StartSpeaking";
49
- CallClientCommands2["StopSpeaking"] = "StopSpeaking";
50
- CallClientCommands2["Mute"] = "Mute";
51
- return CallClientCommands2;
52
- })(CallClientCommands || {});
53
- var CallServerCommands = /* @__PURE__ */ ((CallServerCommands2) => {
54
- CallServerCommands2["Message"] = "Message";
55
- CallServerCommands2["CancelLastAssistantMessage"] = "CancelLastAssistantMessage";
56
- CallServerCommands2["CancelLastUserMessage"] = "CancelLastUserMessage";
57
- CallServerCommands2["SkipAnswer"] = "SkipAnswer";
58
- CallServerCommands2["EnableSpeakerStreaming"] = "EnableSpeakerStreaming";
59
- CallServerCommands2["EndCall"] = "EndCall";
60
- return CallServerCommands2;
61
- })(CallServerCommands || {});
211
+ var MicdropClientCommands = /* @__PURE__ */ ((MicdropClientCommands2) => {
212
+ MicdropClientCommands2["StartSpeaking"] = "StartSpeaking";
213
+ MicdropClientCommands2["StopSpeaking"] = "StopSpeaking";
214
+ MicdropClientCommands2["Mute"] = "Mute";
215
+ return MicdropClientCommands2;
216
+ })(MicdropClientCommands || {});
217
+ var MicdropServerCommands = /* @__PURE__ */ ((MicdropServerCommands2) => {
218
+ MicdropServerCommands2["Message"] = "Message";
219
+ MicdropServerCommands2["CancelLastAssistantMessage"] = "CancelLastAssistantMessage";
220
+ MicdropServerCommands2["CancelLastUserMessage"] = "CancelLastUserMessage";
221
+ MicdropServerCommands2["SkipAnswer"] = "SkipAnswer";
222
+ MicdropServerCommands2["EndCall"] = "EndCall";
223
+ return MicdropServerCommands2;
224
+ })(MicdropServerCommands || {});
62
225
 
63
- // src/CallServer.ts
64
- var CallServer = class {
226
+ // src/MicdropServer.ts
227
+ var MicdropServer = class {
65
228
  constructor(socket, config) {
66
229
  this.socket = null;
67
230
  this.config = null;
68
231
  this.startTime = Date.now();
69
- this.lastDebug = Date.now();
70
- // When user is speaking, we're waiting to chunks or to stop
71
- this.isSpeaking = false;
72
- // Chunks of user speech since user started speaking
73
- this.chunks = [];
74
- // Enable speaker streaming
75
- this.speakerStreamingEnabled = false;
232
+ this.onClose = () => {
233
+ if (!this.config) return;
234
+ this.log("Connection closed");
235
+ const duration = Math.round((Date.now() - this.startTime) / 1e3);
236
+ this.config.agent.destroy();
237
+ this.config.stt.destroy();
238
+ this.config.tts.destroy();
239
+ this.config.onEnd?.({
240
+ conversation: this.config.agent.conversation.slice(1),
241
+ // Remove system message
242
+ duration
243
+ });
244
+ this.socket = null;
245
+ this.config = null;
246
+ };
247
+ this.onMessage = async (message) => {
248
+ if (message.byteLength === 0) return;
249
+ if (!Buffer.isBuffer(message)) {
250
+ this.log("Message is not a buffer");
251
+ return;
252
+ }
253
+ if (message.byteLength < 15) {
254
+ const cmd = message.toString();
255
+ this.log(`Command: ${cmd}`);
256
+ if (cmd === "StartSpeaking" /* StartSpeaking */) {
257
+ await this.onStartSpeaking();
258
+ } else if (cmd === "Mute" /* Mute */) {
259
+ await this.onMute();
260
+ } else if (cmd === "StopSpeaking" /* StopSpeaking */) {
261
+ await this.onStopSpeaking();
262
+ }
263
+ } else if (this.currentUserStream) {
264
+ this.log(`Received chunk (${message.byteLength} bytes)`);
265
+ this.currentUserStream.write(message);
266
+ }
267
+ };
268
+ this.onTranscript = async (transcript) => {
269
+ if (!this.config) return;
270
+ this.log(`User transcript: "${transcript}"`);
271
+ this.config.agent.addUserMessage(transcript);
272
+ if (!this.currentUserStream) {
273
+ this.log("User stopped speaking, answering");
274
+ this.answer();
275
+ }
276
+ };
76
277
  this.socket = socket;
77
278
  this.config = config;
78
- this.conversation = [{ role: "system", content: config.systemPrompt }];
79
279
  this.log(`Call started`);
80
- if (config.firstMessage) {
81
- this.answer(config.firstMessage);
82
- } else {
83
- this.config.generateAnswer(this.conversation).then((answer) => this.answer(answer)).catch((error) => {
84
- console.error("[CallServer]", error);
85
- socket?.close();
86
- });
87
- }
88
- socket.on("close", this.onClose.bind(this));
89
- socket.on("message", this.onMessage.bind(this));
280
+ this.config.stt.on("Transcript", this.onTranscript);
281
+ this.config.agent.on(
282
+ "Message",
283
+ (message) => this.socket?.send(
284
+ `${"Message" /* Message */} ${JSON.stringify(message)}`
285
+ )
286
+ );
287
+ this.config.agent.on(
288
+ "CancelLastUserMessage",
289
+ () => this.socket?.send("CancelLastUserMessage" /* CancelLastUserMessage */)
290
+ );
291
+ this.config.agent.on(
292
+ "CancelLastAssistantMessage",
293
+ () => this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */)
294
+ );
295
+ this.config.agent.on(
296
+ "SkipAnswer",
297
+ () => this.socket?.send("SkipAnswer" /* SkipAnswer */)
298
+ );
299
+ this.config.agent.on(
300
+ "EndCall",
301
+ () => this.socket?.send("EndCall" /* EndCall */)
302
+ );
303
+ this.sendFirstMessage();
304
+ socket.on("close", this.onClose);
305
+ socket.on("message", this.onMessage);
90
306
  }
91
- // Reset conversation
92
- resetConversation(conversation) {
93
- this.log("Reset conversation");
94
- this.conversation = conversation;
307
+ log(...message) {
308
+ this.logger?.log(...message);
95
309
  }
96
- abortProcessing() {
97
- if (!this.processing) return;
98
- this.processing.aborted = true;
99
- this.processing = void 0;
310
+ cancel() {
311
+ this.config?.tts.cancel();
312
+ this.config?.agent.cancel();
100
313
  }
101
- addMessage(message) {
102
- if (!this.socket || !this.config) return;
103
- this.conversation.push(message);
104
- this.socket.send(`${"Message" /* Message */} ${JSON.stringify(message)}`);
105
- this.config.onMessage?.(message);
314
+ async onMute() {
315
+ this.currentUserStream?.end();
316
+ this.currentUserStream = void 0;
317
+ this.cancel();
106
318
  }
107
- async sendAudio(audio, processing, onAbort) {
108
- if (!this.socket) return;
109
- if (processing.aborted) {
110
- onAbort?.();
111
- return;
112
- }
113
- if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {
114
- this.log(`Send audio: (${audio.byteLength} bytes)`);
115
- this.socket.send(audio);
116
- } else if ("paused" in audio) {
117
- if (!this.speakerStreamingEnabled) {
118
- this.socket.send("EnableSpeakerStreaming" /* EnableSpeakerStreaming */);
119
- this.speakerStreamingEnabled = true;
120
- }
121
- for await (const chunk of audio) {
122
- if (processing.aborted) {
123
- onAbort?.();
124
- return;
125
- }
126
- this.log(`Send audio chunk (${chunk.length} bytes)`);
127
- this.socket.send(chunk);
128
- }
129
- } else {
130
- this.log(`Unknown audio type: ${audio}`);
131
- }
132
- }
133
- onClose() {
319
+ async onStartSpeaking() {
134
320
  if (!this.config) return;
135
- this.log("Connection closed");
136
- this.abortProcessing();
137
- const duration = Math.round((Date.now() - this.startTime) / 1e3);
138
- this.config.onEnd?.({
139
- conversation: this.conversation.slice(1),
140
- // Remove system message
141
- duration
142
- });
143
- this.socket = null;
144
- this.config = null;
321
+ this.currentUserStream?.end();
322
+ this.currentUserStream = new import_stream3.PassThrough();
323
+ this.config.stt.transcribe(this.currentUserStream);
324
+ this.cancel();
145
325
  }
146
- async onMessage(message) {
147
- if (!Buffer.isBuffer(message)) {
148
- console.warn(`[CallServer] Message is not a buffer`);
149
- return;
326
+ async onStopSpeaking() {
327
+ this.currentUserStream?.end();
328
+ this.currentUserStream = void 0;
329
+ const conversation = this.config?.agent.conversation;
330
+ if (conversation && conversation[conversation.length - 1].role === "user") {
331
+ this.log(
332
+ "User stopped speaking and a transcript already exists, answering"
333
+ );
334
+ this.answer();
150
335
  }
151
- if (message.byteLength < 15) {
152
- const cmd = message.toString();
153
- this.log(`Command: ${cmd}`);
154
- if (cmd === "StartSpeaking" /* StartSpeaking */) {
155
- this.isSpeaking = true;
156
- this.abortProcessing();
157
- } else if (cmd === "Mute" /* Mute */) {
158
- this.isSpeaking = false;
159
- this.chunks.length = 0;
160
- this.abortProcessing();
161
- } else if (cmd === "StopSpeaking" /* StopSpeaking */) {
162
- this.isSpeaking = false;
163
- await this.onStopSpeaking();
336
+ }
337
+ async sendFirstMessage() {
338
+ if (!this.config) return;
339
+ try {
340
+ if (this.config.firstMessage) {
341
+ this.config.agent.addAssistantMessage(this.config.firstMessage);
342
+ await this.speak(this.config.firstMessage);
343
+ } else if (this.config.generateFirstMessage) {
344
+ await this.answer();
164
345
  }
165
- } else if (Buffer.isBuffer(message) && this.isSpeaking) {
166
- this.log(`Received chunk (${message.byteLength} bytes)`);
167
- this.chunks.push(message);
346
+ } catch (error) {
347
+ console.error("[MicdropServer]", error);
348
+ this.socket?.send("SkipAnswer" /* SkipAnswer */);
168
349
  }
169
350
  }
170
- async onStopSpeaking() {
351
+ async answer() {
171
352
  if (!this.config) return;
172
- if (this.chunks.length === 0) return;
173
- this.abortProcessing();
174
- const processing = this.processing = { aborted: false };
175
- const blob = new Blob(this.chunks, { type: "audio/ogg" });
176
- this.chunks.length = 0;
353
+ this.cancel();
177
354
  try {
178
- if (this.config.debugSaveSpeech) {
179
- const filename = `speech-${Date.now()}.ogg`;
180
- fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()));
181
- this.log(`Saved speech: ${filename}`);
182
- }
183
- const transcript = await this.config.speech2Text(
184
- blob,
185
- this.conversation[this.conversation.length - 1]?.content
186
- );
187
- if (!transcript) {
188
- this.log("Ignoring empty transcript");
189
- this.socket?.send("SkipAnswer" /* SkipAnswer */);
190
- return;
191
- }
192
- this.log("User transcript:", transcript);
193
- this.addMessage({ role: "user", content: transcript });
194
- if (processing.aborted) {
195
- this.log("Answer aborted, no answer generated");
196
- return;
197
- }
198
- const answer = await this.config.generateAnswer(this.conversation);
199
- if (processing.aborted) {
200
- this.log("Answer aborted, ignoring answer");
201
- return;
202
- }
203
- await this.answer(answer, processing);
355
+ const { stream } = this.config.agent.answer();
356
+ await this.speak(stream);
204
357
  } catch (error) {
205
- console.error("[CallServer]", error);
358
+ console.error("[MicdropServer]", error);
206
359
  this.socket?.send("SkipAnswer" /* SkipAnswer */);
207
360
  }
208
361
  }
209
- // Add assistant message and send to client with audio (TTS)
210
- async answer(message, processing) {
362
+ // Run text-to-speech and send to client
363
+ async speak(message) {
211
364
  if (!this.socket || !this.config) return;
212
- if (!processing) {
213
- this.abortProcessing();
214
- processing = this.processing = { aborted: false };
215
- }
365
+ let textStream;
216
366
  if (typeof message === "string") {
217
- message = { role: "assistant", content: message };
367
+ const stream = new import_stream3.PassThrough();
368
+ stream.write(message);
369
+ stream.end();
370
+ textStream = stream;
371
+ } else {
372
+ textStream = message;
218
373
  }
219
- if (message.commands?.cancelLastUserMessage) {
220
- this.log("Cancelling last user message");
221
- const lastMessage = this.conversation[this.conversation.length - 1];
222
- if (lastMessage?.role === "user") {
223
- this.conversation.pop();
224
- this.socket?.send("CancelLastUserMessage" /* CancelLastUserMessage */);
225
- }
374
+ const audio = this.config.tts.speak(textStream);
375
+ await this.sendAudio(audio);
376
+ }
377
+ async sendAudio(audio) {
378
+ if (!this.socket) return;
379
+ if (!audio.readable) {
380
+ this.log("Non readable audio, skipping", audio);
226
381
  return;
227
382
  }
228
- if (!message.content.length || message.commands?.skipAnswer) {
229
- this.log("Skipping answer");
230
- this.socket?.send("SkipAnswer" /* SkipAnswer */);
231
- return;
383
+ audio.on("data", (chunk) => {
384
+ this.log(`Send audio chunk (${chunk.byteLength} bytes)`);
385
+ this.socket?.send(chunk);
386
+ });
387
+ audio.on("error", (error) => {
388
+ this.log("Error in audio stream", error);
389
+ });
390
+ audio.on("end", () => {
391
+ this.log("Audio stream ended");
392
+ });
393
+ }
394
+ };
395
+
396
+ // src/stt/STT.ts
397
+ var import_eventemitter32 = __toESM(require("eventemitter3"));
398
+ var MIME_TYPE_TO_EXTENSION = {
399
+ "audio/wav": "wav",
400
+ "audio/ogg": "ogg",
401
+ "audio/mpeg": "mp3",
402
+ "audio/webm": "webm",
403
+ "audio/mp4": "mp4",
404
+ "audio/flac": "flac"
405
+ };
406
+ var STT = class extends import_eventemitter32.default {
407
+ // Set stream of audio to transcribe
408
+ transcribe(audioStream) {
409
+ audioStream.once("data", (chunk) => {
410
+ this.mimeType = this.detectMimeType(chunk);
411
+ });
412
+ }
413
+ log(...message) {
414
+ this.logger?.log(...message);
415
+ }
416
+ destroy() {
417
+ this.log("Destroyed");
418
+ this.removeAllListeners();
419
+ }
420
+ get extension() {
421
+ return this.mimeType && MIME_TYPE_TO_EXTENSION[this.mimeType] || "bin";
422
+ }
423
+ detectMimeType(chunk) {
424
+ if (!chunk || chunk.byteLength === 0) {
425
+ throw new Error("Unable to detect mime type (empty chunk)");
232
426
  }
233
- this.log("Assistant message:", message);
234
- this.addMessage(message);
235
- if (!this.config.disableTTS) {
236
- try {
237
- const onAbort = () => {
238
- this.log("Answer aborted, removing last assistant message");
239
- const lastMessage = this.conversation[this.conversation.length - 1];
240
- if (lastMessage?.role === "assistant") {
241
- this.conversation.pop();
242
- this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */);
243
- }
244
- };
245
- if (processing.aborted) {
246
- onAbort();
247
- return;
248
- }
249
- const audio = await this.config.text2Speech(message.content);
250
- await this.sendAudio(audio, processing, onAbort);
251
- } catch (error) {
252
- console.error("[CallServer]", error);
253
- this.socket?.send("SkipAnswer" /* SkipAnswer */);
254
- }
427
+ const arr = new Uint8Array(chunk);
428
+ if (arr[0] === 26 && arr[1] === 69 && arr[2] === 223 && arr[3] === 163) {
429
+ return "audio/webm";
430
+ }
431
+ if (arr[0] === 79 && arr[1] === 103 && arr[2] === 103 && arr[3] === 83) {
432
+ return "audio/ogg";
433
+ }
434
+ if (arr[0] === 82 && arr[1] === 73 && arr[2] === 70 && arr[3] === 70 && arr[8] === 87 && arr[9] === 65 && arr[10] === 86 && arr[11] === 69) {
435
+ return "audio/wav";
255
436
  }
256
- if (message.commands?.endCall) {
257
- this.log("Call ended");
258
- this.socket.send("EndCall" /* EndCall */);
437
+ if (arr[0] === 73 && arr[1] === 68 && arr[2] === 51) {
438
+ return "audio/mpeg";
259
439
  }
440
+ if (arr[4] === 102 && arr[5] === 116 && arr[6] === 121 && arr[7] === 112) {
441
+ return "audio/mp4";
442
+ }
443
+ if (arr[0] === 102 && arr[1] === 76 && arr[2] === 97 && arr[3] === 67) {
444
+ return "audio/flac";
445
+ }
446
+ this.log("Unable to detect mime type, using default", chunk);
447
+ return "audio/wav";
260
448
  }
261
- log(...message) {
262
- if (!this.config?.debugLog) return;
263
- const now = Date.now();
264
- const delta = now - this.lastDebug;
265
- this.lastDebug = now;
266
- console.log(`[Debug +${delta}ms]`, ...message);
449
+ };
450
+
451
+ // src/stt/FileSTT.ts
452
+ var FileSTT = class extends STT {
453
+ transcribe(audioStream) {
454
+ super.transcribe(audioStream);
455
+ this.log("Converting stream to file...");
456
+ const chunks = [];
457
+ audioStream.on("data", (chunk) => {
458
+ chunks.push(chunk);
459
+ });
460
+ audioStream.on("end", async () => {
461
+ if (chunks.length === 0) return;
462
+ const arrayBuffer = Buffer.concat(chunks);
463
+ const file = new File([arrayBuffer], `audio.${this.extension}`, {
464
+ type: this.mimeType
465
+ });
466
+ this.log("Transcribing file...");
467
+ const transcript = await this.transcribeFile(file);
468
+ this.emit("Transcript", transcript);
469
+ });
267
470
  }
268
471
  };
269
472
 
270
- // src/errors.ts
271
- var CallErrorCode = /* @__PURE__ */ ((CallErrorCode2) => {
272
- CallErrorCode2[CallErrorCode2["BadRequest"] = 4400] = "BadRequest";
273
- CallErrorCode2[CallErrorCode2["Unauthorized"] = 4401] = "Unauthorized";
274
- CallErrorCode2[CallErrorCode2["NotFound"] = 4404] = "NotFound";
275
- return CallErrorCode2;
276
- })(CallErrorCode || {});
277
- var CallError = class extends Error {
278
- constructor(code, message) {
279
- super(message);
280
- this.code = code;
473
+ // src/stt/MockSTT.ts
474
+ var MockSTT = class extends FileSTT {
475
+ constructor() {
476
+ super(...arguments);
477
+ this.i = 0;
478
+ }
479
+ async transcribeFile(file) {
480
+ return `User Message ${this.i++}`;
281
481
  }
282
482
  };
283
- function handleError(socket, error) {
284
- if (error instanceof CallError) {
285
- socket.close(error.code, error.message);
286
- } else {
287
- console.error(error);
288
- socket.close(1011);
483
+
484
+ // src/tts/MockTTS.ts
485
+ var fs = __toESM(require("fs"));
486
+ var import_stream4 = require("stream");
487
+
488
+ // src/tts/TTS.ts
489
+ var TTS = class {
490
+ log(...message) {
491
+ this.logger?.log(...message);
289
492
  }
290
- socket.terminate();
291
- }
493
+ destroy() {
494
+ this.log("Destroyed");
495
+ this.cancel();
496
+ }
497
+ };
498
+
499
+ // src/tts/MockTTS.ts
500
+ var MockTTS = class extends TTS {
501
+ constructor(audioFilePaths) {
502
+ super();
503
+ this.audioFilePaths = audioFilePaths;
504
+ }
505
+ speak(textStream) {
506
+ const audioStream = new import_stream4.PassThrough();
507
+ textStream.once("data", async () => {
508
+ for (const filePath of this.audioFilePaths) {
509
+ await new Promise((resolve) => setTimeout(resolve, 200));
510
+ const audioBuffer = fs.readFileSync(filePath);
511
+ this.log(`Loaded chunk (${audioBuffer.length} bytes)`);
512
+ audioStream.write(audioBuffer);
513
+ }
514
+ audioStream.end();
515
+ });
516
+ return audioStream;
517
+ }
518
+ cancel() {
519
+ }
520
+ };
292
521
 
293
522
  // src/waitForParams.ts
294
523
  async function waitForParams(socket, validate) {
295
524
  return new Promise((resolve, reject) => {
296
525
  const timeout = setTimeout(() => {
297
- reject(new CallError(4400 /* BadRequest */, "Missing params"));
526
+ reject(new MicdropError(4400 /* BadRequest */, "Missing params"));
298
527
  }, 3e3);
299
528
  const onParams = (payload) => {
300
529
  clearTimeout(timeout);
@@ -303,7 +532,7 @@ async function waitForParams(socket, validate) {
303
532
  const params = validate(JSON.parse(payload));
304
533
  resolve(params);
305
534
  } catch (error) {
306
- reject(new CallError(4400 /* BadRequest */, "Invalid params"));
535
+ reject(new MicdropError(4400 /* BadRequest */, "Invalid params"));
307
536
  }
308
537
  };
309
538
  socket.on("message", onParams);
@@ -311,11 +540,22 @@ async function waitForParams(socket, validate) {
311
540
  }
312
541
  // Annotate the CommonJS export names for ESM import in node:
313
542
  0 && (module.exports = {
314
- CallClientCommands,
315
- CallError,
316
- CallErrorCode,
317
- CallServer,
318
- CallServerCommands,
543
+ Agent,
544
+ FileSTT,
545
+ Logger,
546
+ MicdropClientCommands,
547
+ MicdropError,
548
+ MicdropErrorCode,
549
+ MicdropServer,
550
+ MicdropServerCommands,
551
+ MockAgent,
552
+ MockSTT,
553
+ MockTTS,
554
+ STT,
555
+ TTS,
556
+ convertPCMToOpus,
557
+ convertToOpus,
558
+ convertToPCM,
319
559
  handleError,
320
560
  waitForParams
321
561
  });