@micdrop/server 1.7.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -30,269 +30,500 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/index.ts
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
- CallClientCommands: () => CallClientCommands,
34
- CallError: () => CallError,
35
- CallErrorCode: () => CallErrorCode,
36
- CallServer: () => CallServer,
37
- CallServerCommands: () => CallServerCommands,
33
+ Agent: () => Agent,
34
+ FileSTT: () => FileSTT,
35
+ Logger: () => Logger,
36
+ MicdropClientCommands: () => MicdropClientCommands,
37
+ MicdropError: () => MicdropError,
38
+ MicdropErrorCode: () => MicdropErrorCode,
39
+ MicdropServer: () => MicdropServer,
40
+ MicdropServerCommands: () => MicdropServerCommands,
41
+ MockAgent: () => MockAgent,
42
+ MockSTT: () => MockSTT,
43
+ MockTTS: () => MockTTS,
44
+ STT: () => STT,
45
+ TTS: () => TTS,
46
+ convertPCMToOpus: () => convertPCMToOpus,
47
+ convertToOpus: () => convertToOpus,
48
+ convertToPCM: () => convertToPCM,
38
49
  handleError: () => handleError,
39
50
  waitForParams: () => waitForParams
40
51
  });
41
52
  module.exports = __toCommonJS(index_exports);
42
53
 
43
- // src/CallServer.ts
44
- var fs = __toESM(require("fs"));
54
+ // src/agent/Agent.ts
55
+ var import_eventemitter3 = __toESM(require("eventemitter3"));
56
+ var Agent = class extends import_eventemitter3.default {
57
+ constructor(options) {
58
+ super();
59
+ this.options = options;
60
+ this.conversation = [{ role: "system", content: options.systemPrompt }];
61
+ }
62
+ addUserMessage(text) {
63
+ this.addMessage("user", text);
64
+ }
65
+ addAssistantMessage(text) {
66
+ this.addMessage("assistant", text);
67
+ }
68
+ addMessage(role, text) {
69
+ this.log(`Adding ${role} message to conversation: ${text}`);
70
+ const message = {
71
+ role,
72
+ content: text
73
+ };
74
+ this.conversation.push(message);
75
+ this.emit("Message", message);
76
+ }
77
+ endCall() {
78
+ this.log("Ending call");
79
+ this.emit("EndCall");
80
+ }
81
+ cancelLastUserMessage() {
82
+ this.log("Cancelling last user message");
83
+ const lastMessage = this.conversation[this.conversation.length - 1];
84
+ if (lastMessage?.role !== "user") return;
85
+ this.conversation.pop();
86
+ this.emit("CancelLastUserMessage");
87
+ }
88
+ cancelLastAssistantMessage() {
89
+ this.log("Cancelling last assistant message");
90
+ const lastMessage = this.conversation[this.conversation.length - 1];
91
+ if (lastMessage?.role !== "assistant") return;
92
+ this.conversation.pop();
93
+ this.emit("CancelLastAssistantMessage");
94
+ }
95
+ skipAnswer() {
96
+ this.log("Skipping answer");
97
+ this.emit("SkipAnswer");
98
+ }
99
+ createTextPromise() {
100
+ const result = {};
101
+ result.promise = new Promise((resolve, reject) => {
102
+ result.resolve = resolve;
103
+ result.reject = reject;
104
+ });
105
+ return result;
106
+ }
107
+ log(...message) {
108
+ this.logger?.log(...message);
109
+ }
110
+ destroy() {
111
+ this.log("Destroyed");
112
+ this.removeAllListeners();
113
+ this.cancel();
114
+ }
115
+ };
116
+
117
+ // src/agent/MockAgent.ts
118
+ var import_stream = require("stream");
119
+ var MockAgent = class extends Agent {
120
+ constructor() {
121
+ super({ systemPrompt: "" });
122
+ this.i = 0;
123
+ }
124
+ answer() {
125
+ const stream = new import_stream.PassThrough();
126
+ const textPromise = this.createTextPromise();
127
+ const message = `Assistant Message ${this.i++}`;
128
+ this.addAssistantMessage(message);
129
+ stream.write(message);
130
+ stream.end();
131
+ textPromise.resolve(message);
132
+ return { message: textPromise.promise, stream };
133
+ }
134
+ cancel() {
135
+ }
136
+ };
137
+
138
+ // src/audio-convert.ts
139
+ var import_ffmpeg = __toESM(require("@ffmpeg-installer/ffmpeg"));
140
+ var import_fluent_ffmpeg = __toESM(require("fluent-ffmpeg"));
141
+ var import_stream2 = require("stream");
142
+ import_fluent_ffmpeg.default.setFfmpegPath(import_ffmpeg.default.path);
143
+ function convertToPCM(audioStream, sampleRate = 16e3, bitDepth = 16) {
144
+ const pcmStream = new import_stream2.PassThrough();
145
+ (0, import_fluent_ffmpeg.default)(audioStream).audioChannels(1).audioFrequency(sampleRate).audioCodec(`pcm_s${bitDepth}le`).format(`s${bitDepth}le`).on("error", (error) => {
146
+ console.error("Error converting audio stream:", error.message);
147
+ }).pipe(pcmStream);
148
+ return pcmStream;
149
+ }
150
+ function convertToOpus(audioStream, sampleRate = 16e3) {
151
+ const webmStream = new import_stream2.PassThrough();
152
+ ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).pipe(webmStream);
153
+ return webmStream;
154
+ }
155
+ function convertPCMToOpus(audioStream, sampleRate = 16e3) {
156
+ const webmStream = new import_stream2.PassThrough();
157
+ ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).inputFormat("s16le").inputOptions(["-f s16le", "-ar 16000", "-ac 1"]).pipe(webmStream);
158
+ return webmStream;
159
+ }
160
+ function ffmpegToOpus(ffmpegCommand, sampleRate = 16e3) {
161
+ return ffmpegCommand.audioChannels(1).audioFrequency(sampleRate).audioCodec("libopus").format("webm").outputOptions([
162
+ "-application audio",
163
+ `-ac 1`,
164
+ `-ar ${sampleRate}`,
165
+ `-b:a 64k`,
166
+ `-f webm`,
167
+ `-map_metadata -1`
168
+ ]).on("error", (error) => {
169
+ console.error("Error converting to Opus: ", error.message);
170
+ });
171
+ }
172
+
173
+ // src/errors.ts
174
+ var MicdropErrorCode = /* @__PURE__ */ ((MicdropErrorCode2) => {
175
+ MicdropErrorCode2[MicdropErrorCode2["BadRequest"] = 4400] = "BadRequest";
176
+ MicdropErrorCode2[MicdropErrorCode2["Unauthorized"] = 4401] = "Unauthorized";
177
+ MicdropErrorCode2[MicdropErrorCode2["NotFound"] = 4404] = "NotFound";
178
+ return MicdropErrorCode2;
179
+ })(MicdropErrorCode || {});
180
+ var MicdropError = class extends Error {
181
+ constructor(code, message) {
182
+ super(message);
183
+ this.code = code;
184
+ }
185
+ };
186
+ function handleError(socket, error) {
187
+ if (error instanceof MicdropError) {
188
+ socket.close(error.code, error.message);
189
+ } else {
190
+ console.error(error);
191
+ socket.close(1011);
192
+ }
193
+ socket.terminate();
194
+ }
195
+
196
+ // src/Logger.ts
197
+ var Logger = class {
198
+ constructor(name) {
199
+ this.name = name;
200
+ }
201
+ log(...message) {
202
+ const time = process.uptime().toFixed(3);
203
+ console.log(`[${this.name} ${time}]`, ...message);
204
+ }
205
+ };
206
+
207
+ // src/MicdropServer.ts
208
+ var import_stream3 = require("stream");
45
209
 
46
210
  // src/types.ts
47
- var CallClientCommands = /* @__PURE__ */ ((CallClientCommands2) => {
48
- CallClientCommands2["StartSpeaking"] = "StartSpeaking";
49
- CallClientCommands2["StopSpeaking"] = "StopSpeaking";
50
- CallClientCommands2["Mute"] = "Mute";
51
- return CallClientCommands2;
52
- })(CallClientCommands || {});
53
- var CallServerCommands = /* @__PURE__ */ ((CallServerCommands2) => {
54
- CallServerCommands2["Message"] = "Message";
55
- CallServerCommands2["CancelLastAssistantMessage"] = "CancelLastAssistantMessage";
56
- CallServerCommands2["CancelLastUserMessage"] = "CancelLastUserMessage";
57
- CallServerCommands2["SkipAnswer"] = "SkipAnswer";
58
- CallServerCommands2["EnableSpeakerStreaming"] = "EnableSpeakerStreaming";
59
- CallServerCommands2["EndCall"] = "EndCall";
60
- return CallServerCommands2;
61
- })(CallServerCommands || {});
211
+ var MicdropClientCommands = /* @__PURE__ */ ((MicdropClientCommands2) => {
212
+ MicdropClientCommands2["StartSpeaking"] = "StartSpeaking";
213
+ MicdropClientCommands2["StopSpeaking"] = "StopSpeaking";
214
+ MicdropClientCommands2["Mute"] = "Mute";
215
+ return MicdropClientCommands2;
216
+ })(MicdropClientCommands || {});
217
+ var MicdropServerCommands = /* @__PURE__ */ ((MicdropServerCommands2) => {
218
+ MicdropServerCommands2["Message"] = "Message";
219
+ MicdropServerCommands2["CancelLastAssistantMessage"] = "CancelLastAssistantMessage";
220
+ MicdropServerCommands2["CancelLastUserMessage"] = "CancelLastUserMessage";
221
+ MicdropServerCommands2["SkipAnswer"] = "SkipAnswer";
222
+ MicdropServerCommands2["EndCall"] = "EndCall";
223
+ return MicdropServerCommands2;
224
+ })(MicdropServerCommands || {});
62
225
 
63
- // src/CallServer.ts
64
- var CallServer = class {
226
+ // src/MicdropServer.ts
227
+ var MicdropServer = class {
65
228
  constructor(socket, config) {
66
229
  this.socket = null;
67
230
  this.config = null;
68
231
  this.startTime = Date.now();
69
- this.lastDebug = Date.now();
70
- // When user is speaking, we're waiting to chunks or to stop
71
- this.isSpeaking = false;
72
- // Chunks of user speech since user started speaking
73
- this.chunks = [];
74
- // Enable speaker streaming
75
- this.speakerStreamingEnabled = false;
232
+ this.onClose = () => {
233
+ if (!this.config) return;
234
+ this.log("Connection closed");
235
+ const duration = Math.round((Date.now() - this.startTime) / 1e3);
236
+ this.config.agent.destroy();
237
+ this.config.stt.destroy();
238
+ this.config.tts.destroy();
239
+ this.config.onEnd?.({
240
+ conversation: this.config.agent.conversation.slice(1),
241
+ // Remove system message
242
+ duration
243
+ });
244
+ this.socket = null;
245
+ this.config = null;
246
+ };
247
+ this.onMessage = async (message) => {
248
+ if (message.byteLength === 0) return;
249
+ if (!Buffer.isBuffer(message)) {
250
+ this.log("Message is not a buffer");
251
+ return;
252
+ }
253
+ if (message.byteLength < 15) {
254
+ const cmd = message.toString();
255
+ this.log(`Command: ${cmd}`);
256
+ if (cmd === "StartSpeaking" /* StartSpeaking */) {
257
+ await this.onStartSpeaking();
258
+ } else if (cmd === "Mute" /* Mute */) {
259
+ await this.onMute();
260
+ } else if (cmd === "StopSpeaking" /* StopSpeaking */) {
261
+ await this.onStopSpeaking();
262
+ }
263
+ } else if (this.currentUserStream) {
264
+ this.log(`Received chunk (${message.byteLength} bytes)`);
265
+ this.currentUserStream.write(message);
266
+ }
267
+ };
268
+ this.onTranscript = async (transcript) => {
269
+ if (!this.config) return;
270
+ this.log(`User transcript: "${transcript}"`);
271
+ this.config.agent.addUserMessage(transcript);
272
+ if (!this.currentUserStream) {
273
+ this.log("User stopped speaking, answering");
274
+ this.answer();
275
+ }
276
+ };
76
277
  this.socket = socket;
77
278
  this.config = config;
78
- this.conversation = [{ role: "system", content: config.systemPrompt }];
79
279
  this.log(`Call started`);
80
- if (config.firstMessage) {
81
- this.answer(config.firstMessage);
82
- } else {
83
- this.config.generateAnswer(this.conversation).then((answer) => this.answer(answer)).catch((error) => {
84
- console.error("[CallServer]", error);
85
- socket?.close();
86
- });
87
- }
88
- socket.on("close", this.onClose.bind(this));
89
- socket.on("message", this.onMessage.bind(this));
280
+ this.config.stt.on("Transcript", this.onTranscript);
281
+ this.config.agent.on(
282
+ "Message",
283
+ (message) => this.socket?.send(
284
+ `${"Message" /* Message */} ${JSON.stringify(message)}`
285
+ )
286
+ );
287
+ this.config.agent.on(
288
+ "CancelLastUserMessage",
289
+ () => this.socket?.send("CancelLastUserMessage" /* CancelLastUserMessage */)
290
+ );
291
+ this.config.agent.on(
292
+ "CancelLastAssistantMessage",
293
+ () => this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */)
294
+ );
295
+ this.config.agent.on(
296
+ "SkipAnswer",
297
+ () => this.socket?.send("SkipAnswer" /* SkipAnswer */)
298
+ );
299
+ this.config.agent.on(
300
+ "EndCall",
301
+ () => this.socket?.send("EndCall" /* EndCall */)
302
+ );
303
+ this.sendFirstMessage();
304
+ socket.on("close", this.onClose);
305
+ socket.on("message", this.onMessage);
90
306
  }
91
- // Reset conversation
92
- resetConversation(conversation) {
93
- this.log("Reset conversation");
94
- this.conversation = conversation;
307
+ log(...message) {
308
+ this.logger?.log(...message);
95
309
  }
96
- abortProcessing() {
97
- if (!this.processing) return;
98
- this.processing.aborted = true;
99
- this.processing = void 0;
310
+ cancel() {
311
+ this.config?.tts.cancel();
312
+ this.config?.agent.cancel();
100
313
  }
101
- addMessage(message) {
102
- if (!this.socket || !this.config) return;
103
- this.conversation.push(message);
104
- this.socket.send(`${"Message" /* Message */} ${JSON.stringify(message)}`);
105
- this.config.onMessage?.(message);
314
+ async onMute() {
315
+ this.currentUserStream?.end();
316
+ this.currentUserStream = void 0;
317
+ this.cancel();
106
318
  }
107
- async sendAudio(audio, processing, onAbort) {
108
- if (!this.socket) return;
109
- if (processing.aborted) {
110
- onAbort?.();
111
- return;
112
- }
113
- if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {
114
- this.log(`Send audio: (${audio.byteLength} bytes)`);
115
- this.socket.send(audio);
116
- } else if ("paused" in audio) {
117
- if (!this.speakerStreamingEnabled) {
118
- this.socket.send("EnableSpeakerStreaming" /* EnableSpeakerStreaming */);
119
- this.speakerStreamingEnabled = true;
120
- }
121
- for await (const chunk of audio) {
122
- if (processing.aborted) {
123
- onAbort?.();
124
- return;
125
- }
126
- this.log(`Send audio chunk (${chunk.length} bytes)`);
127
- this.socket.send(chunk);
128
- }
129
- } else {
130
- this.log(`Unknown audio type: ${audio}`);
131
- }
132
- }
133
- onClose() {
319
+ async onStartSpeaking() {
134
320
  if (!this.config) return;
135
- this.log("Connection closed");
136
- this.abortProcessing();
137
- const duration = Math.round((Date.now() - this.startTime) / 1e3);
138
- this.config.onEnd?.({
139
- conversation: this.conversation.slice(1),
140
- // Remove system message
141
- duration
142
- });
143
- this.socket = null;
144
- this.config = null;
321
+ this.currentUserStream?.end();
322
+ this.currentUserStream = new import_stream3.PassThrough();
323
+ this.config.stt.transcribe(this.currentUserStream);
324
+ this.cancel();
145
325
  }
146
- async onMessage(message) {
147
- if (!Buffer.isBuffer(message)) {
148
- console.warn(`[CallServer] Message is not a buffer`);
149
- return;
326
+ async onStopSpeaking() {
327
+ this.currentUserStream?.end();
328
+ this.currentUserStream = void 0;
329
+ const conversation = this.config?.agent.conversation;
330
+ if (conversation && conversation[conversation.length - 1].role === "user") {
331
+ this.log(
332
+ "User stopped speaking and a transcript already exists, answering"
333
+ );
334
+ this.answer();
150
335
  }
151
- if (message.byteLength < 15) {
152
- const cmd = message.toString();
153
- this.log(`Command: ${cmd}`);
154
- if (cmd === "StartSpeaking" /* StartSpeaking */) {
155
- this.isSpeaking = true;
156
- this.abortProcessing();
157
- } else if (cmd === "Mute" /* Mute */) {
158
- this.abortProcessing();
159
- } else if (cmd === "StopSpeaking" /* StopSpeaking */) {
160
- this.isSpeaking = false;
161
- await this.onStopSpeaking();
336
+ }
337
+ async sendFirstMessage() {
338
+ if (!this.config) return;
339
+ try {
340
+ if (this.config.firstMessage) {
341
+ this.config.agent.addAssistantMessage(this.config.firstMessage);
342
+ await this.speak(this.config.firstMessage);
343
+ } else if (this.config.generateFirstMessage) {
344
+ await this.answer();
162
345
  }
163
- } else if (Buffer.isBuffer(message) && this.isSpeaking) {
164
- this.log(`Received chunk (${message.byteLength} bytes)`);
165
- this.chunks.push(message);
346
+ } catch (error) {
347
+ console.error("[MicdropServer]", error);
348
+ this.socket?.send("SkipAnswer" /* SkipAnswer */);
166
349
  }
167
350
  }
168
- async onStopSpeaking() {
351
+ async answer() {
169
352
  if (!this.config) return;
170
- if (this.chunks.length === 0) return;
171
- this.abortProcessing();
172
- const processing = this.processing = { aborted: false };
173
- const blob = new Blob(this.chunks, { type: "audio/ogg" });
174
- this.chunks.length = 0;
353
+ this.cancel();
175
354
  try {
176
- if (this.config.debugSaveSpeech) {
177
- const filename = `speech-${Date.now()}.ogg`;
178
- fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()));
179
- this.log(`Saved speech: ${filename}`);
180
- }
181
- const transcript = await this.config.speech2Text(
182
- blob,
183
- this.conversation[this.conversation.length - 1]?.content
184
- );
185
- if (!transcript) {
186
- this.log("Ignoring empty transcript");
187
- this.socket?.send("SkipAnswer" /* SkipAnswer */);
188
- return;
189
- }
190
- this.log("User transcript:", transcript);
191
- this.addMessage({ role: "user", content: transcript });
192
- if (processing.aborted) {
193
- this.log("Answer aborted, no answer generated");
194
- return;
195
- }
196
- const answer = await this.config.generateAnswer(this.conversation);
197
- if (processing.aborted) {
198
- this.log("Answer aborted, ignoring answer");
199
- return;
200
- }
201
- await this.answer(answer, processing);
355
+ const { stream } = this.config.agent.answer();
356
+ await this.speak(stream);
202
357
  } catch (error) {
203
- console.error("[CallServer]", error);
358
+ console.error("[MicdropServer]", error);
204
359
  this.socket?.send("SkipAnswer" /* SkipAnswer */);
205
360
  }
206
361
  }
207
- // Add assistant message and send to client with audio (TTS)
208
- async answer(message, processing) {
362
+ // Run text-to-speech and send to client
363
+ async speak(message) {
209
364
  if (!this.socket || !this.config) return;
210
- if (!processing) {
211
- this.abortProcessing();
212
- processing = this.processing = { aborted: false };
213
- }
365
+ let textStream;
214
366
  if (typeof message === "string") {
215
- message = { role: "assistant", content: message };
367
+ const stream = new import_stream3.PassThrough();
368
+ stream.write(message);
369
+ stream.end();
370
+ textStream = stream;
371
+ } else {
372
+ textStream = message;
216
373
  }
217
- if (message.commands?.cancelLastUserMessage) {
218
- this.log("Cancelling last user message");
219
- const lastMessage = this.conversation[this.conversation.length - 1];
220
- if (lastMessage?.role === "user") {
221
- this.conversation.pop();
222
- this.socket?.send("CancelLastUserMessage" /* CancelLastUserMessage */);
223
- }
374
+ const audio = this.config.tts.speak(textStream);
375
+ await this.sendAudio(audio);
376
+ }
377
+ async sendAudio(audio) {
378
+ if (!this.socket) return;
379
+ if (!audio.readable) {
380
+ this.log("Non readable audio, skipping", audio);
224
381
  return;
225
382
  }
226
- if (!message.content.length || message.commands?.skipAnswer) {
227
- this.log("Skipping answer");
228
- this.socket?.send("SkipAnswer" /* SkipAnswer */);
229
- return;
383
+ audio.on("data", (chunk) => {
384
+ this.log(`Send audio chunk (${chunk.byteLength} bytes)`);
385
+ this.socket?.send(chunk);
386
+ });
387
+ audio.on("error", (error) => {
388
+ this.log("Error in audio stream", error);
389
+ });
390
+ audio.on("end", () => {
391
+ this.log("Audio stream ended");
392
+ });
393
+ }
394
+ };
395
+
396
+ // src/stt/STT.ts
397
+ var import_eventemitter32 = __toESM(require("eventemitter3"));
398
+ var MIME_TYPE_TO_EXTENSION = {
399
+ "audio/wav": "wav",
400
+ "audio/ogg": "ogg",
401
+ "audio/mpeg": "mp3",
402
+ "audio/webm": "webm",
403
+ "audio/mp4": "mp4",
404
+ "audio/flac": "flac"
405
+ };
406
+ var STT = class extends import_eventemitter32.default {
407
+ // Set stream of audio to transcribe
408
+ transcribe(audioStream) {
409
+ audioStream.once("data", (chunk) => {
410
+ this.mimeType = this.detectMimeType(chunk);
411
+ });
412
+ }
413
+ log(...message) {
414
+ this.logger?.log(...message);
415
+ }
416
+ destroy() {
417
+ this.log("Destroyed");
418
+ this.removeAllListeners();
419
+ }
420
+ get extension() {
421
+ return this.mimeType && MIME_TYPE_TO_EXTENSION[this.mimeType] || "bin";
422
+ }
423
+ detectMimeType(chunk) {
424
+ if (!chunk || chunk.byteLength === 0) {
425
+ throw new Error("Unable to detect mime type (empty chunk)");
230
426
  }
231
- this.log("Assistant message:", message);
232
- this.addMessage(message);
233
- if (!this.config.disableTTS) {
234
- try {
235
- const onAbort = () => {
236
- this.log("Answer aborted, removing last assistant message");
237
- const lastMessage = this.conversation[this.conversation.length - 1];
238
- if (lastMessage?.role === "assistant") {
239
- this.conversation.pop();
240
- this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */);
241
- }
242
- };
243
- if (processing.aborted) {
244
- onAbort();
245
- return;
246
- }
247
- const audio = await this.config.text2Speech(message.content);
248
- await this.sendAudio(audio, processing, onAbort);
249
- } catch (error) {
250
- console.error("[CallServer]", error);
251
- this.socket?.send("SkipAnswer" /* SkipAnswer */);
252
- }
427
+ const arr = new Uint8Array(chunk);
428
+ if (arr[0] === 26 && arr[1] === 69 && arr[2] === 223 && arr[3] === 163) {
429
+ return "audio/webm";
430
+ }
431
+ if (arr[0] === 79 && arr[1] === 103 && arr[2] === 103 && arr[3] === 83) {
432
+ return "audio/ogg";
433
+ }
434
+ if (arr[0] === 82 && arr[1] === 73 && arr[2] === 70 && arr[3] === 70 && arr[8] === 87 && arr[9] === 65 && arr[10] === 86 && arr[11] === 69) {
435
+ return "audio/wav";
253
436
  }
254
- if (message.commands?.endCall) {
255
- this.log("Call ended");
256
- this.socket.send("EndCall" /* EndCall */);
437
+ if (arr[0] === 73 && arr[1] === 68 && arr[2] === 51) {
438
+ return "audio/mpeg";
257
439
  }
440
+ if (arr[4] === 102 && arr[5] === 116 && arr[6] === 121 && arr[7] === 112) {
441
+ return "audio/mp4";
442
+ }
443
+ if (arr[0] === 102 && arr[1] === 76 && arr[2] === 97 && arr[3] === 67) {
444
+ return "audio/flac";
445
+ }
446
+ this.log("Unable to detect mime type, using default", chunk);
447
+ return "audio/wav";
258
448
  }
259
- log(...message) {
260
- if (!this.config?.debugLog) return;
261
- const now = Date.now();
262
- const delta = now - this.lastDebug;
263
- this.lastDebug = now;
264
- console.log(`[Debug +${delta}ms]`, ...message);
449
+ };
450
+
451
+ // src/stt/FileSTT.ts
452
+ var FileSTT = class extends STT {
453
+ transcribe(audioStream) {
454
+ super.transcribe(audioStream);
455
+ this.log("Converting stream to file...");
456
+ const chunks = [];
457
+ audioStream.on("data", (chunk) => {
458
+ chunks.push(chunk);
459
+ });
460
+ audioStream.on("end", async () => {
461
+ if (chunks.length === 0) return;
462
+ const arrayBuffer = Buffer.concat(chunks);
463
+ const file = new File([arrayBuffer], `audio.${this.extension}`, {
464
+ type: this.mimeType
465
+ });
466
+ this.log("Transcribing file...");
467
+ const transcript = await this.transcribeFile(file);
468
+ this.emit("Transcript", transcript);
469
+ });
265
470
  }
266
471
  };
267
472
 
268
- // src/errors.ts
269
- var CallErrorCode = /* @__PURE__ */ ((CallErrorCode2) => {
270
- CallErrorCode2[CallErrorCode2["BadRequest"] = 4400] = "BadRequest";
271
- CallErrorCode2[CallErrorCode2["Unauthorized"] = 4401] = "Unauthorized";
272
- CallErrorCode2[CallErrorCode2["NotFound"] = 4404] = "NotFound";
273
- return CallErrorCode2;
274
- })(CallErrorCode || {});
275
- var CallError = class extends Error {
276
- constructor(code, message) {
277
- super(message);
278
- this.code = code;
473
+ // src/stt/MockSTT.ts
474
+ var MockSTT = class extends FileSTT {
475
+ constructor() {
476
+ super(...arguments);
477
+ this.i = 0;
478
+ }
479
+ async transcribeFile(file) {
480
+ return `User Message ${this.i++}`;
279
481
  }
280
482
  };
281
- function handleError(socket, error) {
282
- if (error instanceof CallError) {
283
- socket.close(error.code, error.message);
284
- } else {
285
- console.error(error);
286
- socket.close(1011);
483
+
484
+ // src/tts/MockTTS.ts
485
+ var fs = __toESM(require("fs"));
486
+ var import_stream4 = require("stream");
487
+
488
+ // src/tts/TTS.ts
489
+ var TTS = class {
490
+ log(...message) {
491
+ this.logger?.log(...message);
287
492
  }
288
- socket.terminate();
289
- }
493
+ destroy() {
494
+ this.log("Destroyed");
495
+ this.cancel();
496
+ }
497
+ };
498
+
499
+ // src/tts/MockTTS.ts
500
+ var MockTTS = class extends TTS {
501
+ constructor(audioFilePaths) {
502
+ super();
503
+ this.audioFilePaths = audioFilePaths;
504
+ }
505
+ speak(textStream) {
506
+ const audioStream = new import_stream4.PassThrough();
507
+ textStream.once("data", async () => {
508
+ for (const filePath of this.audioFilePaths) {
509
+ await new Promise((resolve) => setTimeout(resolve, 200));
510
+ const audioBuffer = fs.readFileSync(filePath);
511
+ this.log(`Loaded chunk (${audioBuffer.length} bytes)`);
512
+ audioStream.write(audioBuffer);
513
+ }
514
+ audioStream.end();
515
+ });
516
+ return audioStream;
517
+ }
518
+ cancel() {
519
+ }
520
+ };
290
521
 
291
522
  // src/waitForParams.ts
292
523
  async function waitForParams(socket, validate) {
293
524
  return new Promise((resolve, reject) => {
294
525
  const timeout = setTimeout(() => {
295
- reject(new CallError(4400 /* BadRequest */, "Missing params"));
526
+ reject(new MicdropError(4400 /* BadRequest */, "Missing params"));
296
527
  }, 3e3);
297
528
  const onParams = (payload) => {
298
529
  clearTimeout(timeout);
@@ -301,7 +532,7 @@ async function waitForParams(socket, validate) {
301
532
  const params = validate(JSON.parse(payload));
302
533
  resolve(params);
303
534
  } catch (error) {
304
- reject(new CallError(4400 /* BadRequest */, "Invalid params"));
535
+ reject(new MicdropError(4400 /* BadRequest */, "Invalid params"));
305
536
  }
306
537
  };
307
538
  socket.on("message", onParams);
@@ -309,11 +540,22 @@ async function waitForParams(socket, validate) {
309
540
  }
310
541
  // Annotate the CommonJS export names for ESM import in node:
311
542
  0 && (module.exports = {
312
- CallClientCommands,
313
- CallError,
314
- CallErrorCode,
315
- CallServer,
316
- CallServerCommands,
543
+ Agent,
544
+ FileSTT,
545
+ Logger,
546
+ MicdropClientCommands,
547
+ MicdropError,
548
+ MicdropErrorCode,
549
+ MicdropServer,
550
+ MicdropServerCommands,
551
+ MockAgent,
552
+ MockSTT,
553
+ MockTTS,
554
+ STT,
555
+ TTS,
556
+ convertPCMToOpus,
557
+ convertToOpus,
558
+ convertToPCM,
317
559
  handleError,
318
560
  waitForParams
319
561
  });