@micdrop/server 1.7.1 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +229 -229
- package/dist/index.d.mts +150 -53
- package/dist/index.d.ts +150 -53
- package/dist/index.js +460 -220
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +444 -215
- package/dist/index.mjs.map +1 -1
- package/package.json +7 -3
package/dist/index.js
CHANGED
|
@@ -30,271 +30,500 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
30
30
|
// src/index.ts
|
|
31
31
|
var index_exports = {};
|
|
32
32
|
__export(index_exports, {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
33
|
+
Agent: () => Agent,
|
|
34
|
+
FileSTT: () => FileSTT,
|
|
35
|
+
Logger: () => Logger,
|
|
36
|
+
MicdropClientCommands: () => MicdropClientCommands,
|
|
37
|
+
MicdropError: () => MicdropError,
|
|
38
|
+
MicdropErrorCode: () => MicdropErrorCode,
|
|
39
|
+
MicdropServer: () => MicdropServer,
|
|
40
|
+
MicdropServerCommands: () => MicdropServerCommands,
|
|
41
|
+
MockAgent: () => MockAgent,
|
|
42
|
+
MockSTT: () => MockSTT,
|
|
43
|
+
MockTTS: () => MockTTS,
|
|
44
|
+
STT: () => STT,
|
|
45
|
+
TTS: () => TTS,
|
|
46
|
+
convertPCMToOpus: () => convertPCMToOpus,
|
|
47
|
+
convertToOpus: () => convertToOpus,
|
|
48
|
+
convertToPCM: () => convertToPCM,
|
|
38
49
|
handleError: () => handleError,
|
|
39
50
|
waitForParams: () => waitForParams
|
|
40
51
|
});
|
|
41
52
|
module.exports = __toCommonJS(index_exports);
|
|
42
53
|
|
|
43
|
-
// src/
|
|
44
|
-
var
|
|
54
|
+
// src/agent/Agent.ts
|
|
55
|
+
var import_eventemitter3 = __toESM(require("eventemitter3"));
|
|
56
|
+
var Agent = class extends import_eventemitter3.default {
|
|
57
|
+
constructor(options) {
|
|
58
|
+
super();
|
|
59
|
+
this.options = options;
|
|
60
|
+
this.conversation = [{ role: "system", content: options.systemPrompt }];
|
|
61
|
+
}
|
|
62
|
+
addUserMessage(text) {
|
|
63
|
+
this.addMessage("user", text);
|
|
64
|
+
}
|
|
65
|
+
addAssistantMessage(text) {
|
|
66
|
+
this.addMessage("assistant", text);
|
|
67
|
+
}
|
|
68
|
+
addMessage(role, text) {
|
|
69
|
+
this.log(`Adding ${role} message to conversation: ${text}`);
|
|
70
|
+
const message = {
|
|
71
|
+
role,
|
|
72
|
+
content: text
|
|
73
|
+
};
|
|
74
|
+
this.conversation.push(message);
|
|
75
|
+
this.emit("Message", message);
|
|
76
|
+
}
|
|
77
|
+
endCall() {
|
|
78
|
+
this.log("Ending call");
|
|
79
|
+
this.emit("EndCall");
|
|
80
|
+
}
|
|
81
|
+
cancelLastUserMessage() {
|
|
82
|
+
this.log("Cancelling last user message");
|
|
83
|
+
const lastMessage = this.conversation[this.conversation.length - 1];
|
|
84
|
+
if (lastMessage?.role !== "user") return;
|
|
85
|
+
this.conversation.pop();
|
|
86
|
+
this.emit("CancelLastUserMessage");
|
|
87
|
+
}
|
|
88
|
+
cancelLastAssistantMessage() {
|
|
89
|
+
this.log("Cancelling last assistant message");
|
|
90
|
+
const lastMessage = this.conversation[this.conversation.length - 1];
|
|
91
|
+
if (lastMessage?.role !== "assistant") return;
|
|
92
|
+
this.conversation.pop();
|
|
93
|
+
this.emit("CancelLastAssistantMessage");
|
|
94
|
+
}
|
|
95
|
+
skipAnswer() {
|
|
96
|
+
this.log("Skipping answer");
|
|
97
|
+
this.emit("SkipAnswer");
|
|
98
|
+
}
|
|
99
|
+
createTextPromise() {
|
|
100
|
+
const result = {};
|
|
101
|
+
result.promise = new Promise((resolve, reject) => {
|
|
102
|
+
result.resolve = resolve;
|
|
103
|
+
result.reject = reject;
|
|
104
|
+
});
|
|
105
|
+
return result;
|
|
106
|
+
}
|
|
107
|
+
log(...message) {
|
|
108
|
+
this.logger?.log(...message);
|
|
109
|
+
}
|
|
110
|
+
destroy() {
|
|
111
|
+
this.log("Destroyed");
|
|
112
|
+
this.removeAllListeners();
|
|
113
|
+
this.cancel();
|
|
114
|
+
}
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
// src/agent/MockAgent.ts
|
|
118
|
+
var import_stream = require("stream");
|
|
119
|
+
var MockAgent = class extends Agent {
|
|
120
|
+
constructor() {
|
|
121
|
+
super({ systemPrompt: "" });
|
|
122
|
+
this.i = 0;
|
|
123
|
+
}
|
|
124
|
+
answer() {
|
|
125
|
+
const stream = new import_stream.PassThrough();
|
|
126
|
+
const textPromise = this.createTextPromise();
|
|
127
|
+
const message = `Assistant Message ${this.i++}`;
|
|
128
|
+
this.addAssistantMessage(message);
|
|
129
|
+
stream.write(message);
|
|
130
|
+
stream.end();
|
|
131
|
+
textPromise.resolve(message);
|
|
132
|
+
return { message: textPromise.promise, stream };
|
|
133
|
+
}
|
|
134
|
+
cancel() {
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
// src/audio-convert.ts
|
|
139
|
+
var import_ffmpeg = __toESM(require("@ffmpeg-installer/ffmpeg"));
|
|
140
|
+
var import_fluent_ffmpeg = __toESM(require("fluent-ffmpeg"));
|
|
141
|
+
var import_stream2 = require("stream");
|
|
142
|
+
import_fluent_ffmpeg.default.setFfmpegPath(import_ffmpeg.default.path);
|
|
143
|
+
function convertToPCM(audioStream, sampleRate = 16e3, bitDepth = 16) {
|
|
144
|
+
const pcmStream = new import_stream2.PassThrough();
|
|
145
|
+
(0, import_fluent_ffmpeg.default)(audioStream).audioChannels(1).audioFrequency(sampleRate).audioCodec(`pcm_s${bitDepth}le`).format(`s${bitDepth}le`).on("error", (error) => {
|
|
146
|
+
console.error("Error converting audio stream:", error.message);
|
|
147
|
+
}).pipe(pcmStream);
|
|
148
|
+
return pcmStream;
|
|
149
|
+
}
|
|
150
|
+
function convertToOpus(audioStream, sampleRate = 16e3) {
|
|
151
|
+
const webmStream = new import_stream2.PassThrough();
|
|
152
|
+
ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).pipe(webmStream);
|
|
153
|
+
return webmStream;
|
|
154
|
+
}
|
|
155
|
+
function convertPCMToOpus(audioStream, sampleRate = 16e3) {
|
|
156
|
+
const webmStream = new import_stream2.PassThrough();
|
|
157
|
+
ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).inputFormat("s16le").inputOptions(["-f s16le", "-ar 16000", "-ac 1"]).pipe(webmStream);
|
|
158
|
+
return webmStream;
|
|
159
|
+
}
|
|
160
|
+
function ffmpegToOpus(ffmpegCommand, sampleRate = 16e3) {
|
|
161
|
+
return ffmpegCommand.audioChannels(1).audioFrequency(sampleRate).audioCodec("libopus").format("webm").outputOptions([
|
|
162
|
+
"-application audio",
|
|
163
|
+
`-ac 1`,
|
|
164
|
+
`-ar ${sampleRate}`,
|
|
165
|
+
`-b:a 64k`,
|
|
166
|
+
`-f webm`,
|
|
167
|
+
`-map_metadata -1`
|
|
168
|
+
]).on("error", (error) => {
|
|
169
|
+
console.error("Error converting to Opus: ", error.message);
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// src/errors.ts
|
|
174
|
+
var MicdropErrorCode = /* @__PURE__ */ ((MicdropErrorCode2) => {
|
|
175
|
+
MicdropErrorCode2[MicdropErrorCode2["BadRequest"] = 4400] = "BadRequest";
|
|
176
|
+
MicdropErrorCode2[MicdropErrorCode2["Unauthorized"] = 4401] = "Unauthorized";
|
|
177
|
+
MicdropErrorCode2[MicdropErrorCode2["NotFound"] = 4404] = "NotFound";
|
|
178
|
+
return MicdropErrorCode2;
|
|
179
|
+
})(MicdropErrorCode || {});
|
|
180
|
+
var MicdropError = class extends Error {
|
|
181
|
+
constructor(code, message) {
|
|
182
|
+
super(message);
|
|
183
|
+
this.code = code;
|
|
184
|
+
}
|
|
185
|
+
};
|
|
186
|
+
function handleError(socket, error) {
|
|
187
|
+
if (error instanceof MicdropError) {
|
|
188
|
+
socket.close(error.code, error.message);
|
|
189
|
+
} else {
|
|
190
|
+
console.error(error);
|
|
191
|
+
socket.close(1011);
|
|
192
|
+
}
|
|
193
|
+
socket.terminate();
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// src/Logger.ts
|
|
197
|
+
var Logger = class {
|
|
198
|
+
constructor(name) {
|
|
199
|
+
this.name = name;
|
|
200
|
+
}
|
|
201
|
+
log(...message) {
|
|
202
|
+
const time = process.uptime().toFixed(3);
|
|
203
|
+
console.log(`[${this.name} ${time}]`, ...message);
|
|
204
|
+
}
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
// src/MicdropServer.ts
|
|
208
|
+
var import_stream3 = require("stream");
|
|
45
209
|
|
|
46
210
|
// src/types.ts
|
|
47
|
-
var
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
return
|
|
52
|
-
})(
|
|
53
|
-
var
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
})(CallServerCommands || {});
|
|
211
|
+
var MicdropClientCommands = /* @__PURE__ */ ((MicdropClientCommands2) => {
|
|
212
|
+
MicdropClientCommands2["StartSpeaking"] = "StartSpeaking";
|
|
213
|
+
MicdropClientCommands2["StopSpeaking"] = "StopSpeaking";
|
|
214
|
+
MicdropClientCommands2["Mute"] = "Mute";
|
|
215
|
+
return MicdropClientCommands2;
|
|
216
|
+
})(MicdropClientCommands || {});
|
|
217
|
+
var MicdropServerCommands = /* @__PURE__ */ ((MicdropServerCommands2) => {
|
|
218
|
+
MicdropServerCommands2["Message"] = "Message";
|
|
219
|
+
MicdropServerCommands2["CancelLastAssistantMessage"] = "CancelLastAssistantMessage";
|
|
220
|
+
MicdropServerCommands2["CancelLastUserMessage"] = "CancelLastUserMessage";
|
|
221
|
+
MicdropServerCommands2["SkipAnswer"] = "SkipAnswer";
|
|
222
|
+
MicdropServerCommands2["EndCall"] = "EndCall";
|
|
223
|
+
return MicdropServerCommands2;
|
|
224
|
+
})(MicdropServerCommands || {});
|
|
62
225
|
|
|
63
|
-
// src/
|
|
64
|
-
var
|
|
226
|
+
// src/MicdropServer.ts
|
|
227
|
+
var MicdropServer = class {
|
|
65
228
|
constructor(socket, config) {
|
|
66
229
|
this.socket = null;
|
|
67
230
|
this.config = null;
|
|
68
231
|
this.startTime = Date.now();
|
|
69
|
-
this.
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
232
|
+
this.onClose = () => {
|
|
233
|
+
if (!this.config) return;
|
|
234
|
+
this.log("Connection closed");
|
|
235
|
+
const duration = Math.round((Date.now() - this.startTime) / 1e3);
|
|
236
|
+
this.config.agent.destroy();
|
|
237
|
+
this.config.stt.destroy();
|
|
238
|
+
this.config.tts.destroy();
|
|
239
|
+
this.config.onEnd?.({
|
|
240
|
+
conversation: this.config.agent.conversation.slice(1),
|
|
241
|
+
// Remove system message
|
|
242
|
+
duration
|
|
243
|
+
});
|
|
244
|
+
this.socket = null;
|
|
245
|
+
this.config = null;
|
|
246
|
+
};
|
|
247
|
+
this.onMessage = async (message) => {
|
|
248
|
+
if (message.byteLength === 0) return;
|
|
249
|
+
if (!Buffer.isBuffer(message)) {
|
|
250
|
+
this.log("Message is not a buffer");
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
if (message.byteLength < 15) {
|
|
254
|
+
const cmd = message.toString();
|
|
255
|
+
this.log(`Command: ${cmd}`);
|
|
256
|
+
if (cmd === "StartSpeaking" /* StartSpeaking */) {
|
|
257
|
+
await this.onStartSpeaking();
|
|
258
|
+
} else if (cmd === "Mute" /* Mute */) {
|
|
259
|
+
await this.onMute();
|
|
260
|
+
} else if (cmd === "StopSpeaking" /* StopSpeaking */) {
|
|
261
|
+
await this.onStopSpeaking();
|
|
262
|
+
}
|
|
263
|
+
} else if (this.currentUserStream) {
|
|
264
|
+
this.log(`Received chunk (${message.byteLength} bytes)`);
|
|
265
|
+
this.currentUserStream.write(message);
|
|
266
|
+
}
|
|
267
|
+
};
|
|
268
|
+
this.onTranscript = async (transcript) => {
|
|
269
|
+
if (!this.config) return;
|
|
270
|
+
this.log(`User transcript: "${transcript}"`);
|
|
271
|
+
this.config.agent.addUserMessage(transcript);
|
|
272
|
+
if (!this.currentUserStream) {
|
|
273
|
+
this.log("User stopped speaking, answering");
|
|
274
|
+
this.answer();
|
|
275
|
+
}
|
|
276
|
+
};
|
|
76
277
|
this.socket = socket;
|
|
77
278
|
this.config = config;
|
|
78
|
-
this.conversation = [{ role: "system", content: config.systemPrompt }];
|
|
79
279
|
this.log(`Call started`);
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
280
|
+
this.config.stt.on("Transcript", this.onTranscript);
|
|
281
|
+
this.config.agent.on(
|
|
282
|
+
"Message",
|
|
283
|
+
(message) => this.socket?.send(
|
|
284
|
+
`${"Message" /* Message */} ${JSON.stringify(message)}`
|
|
285
|
+
)
|
|
286
|
+
);
|
|
287
|
+
this.config.agent.on(
|
|
288
|
+
"CancelLastUserMessage",
|
|
289
|
+
() => this.socket?.send("CancelLastUserMessage" /* CancelLastUserMessage */)
|
|
290
|
+
);
|
|
291
|
+
this.config.agent.on(
|
|
292
|
+
"CancelLastAssistantMessage",
|
|
293
|
+
() => this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */)
|
|
294
|
+
);
|
|
295
|
+
this.config.agent.on(
|
|
296
|
+
"SkipAnswer",
|
|
297
|
+
() => this.socket?.send("SkipAnswer" /* SkipAnswer */)
|
|
298
|
+
);
|
|
299
|
+
this.config.agent.on(
|
|
300
|
+
"EndCall",
|
|
301
|
+
() => this.socket?.send("EndCall" /* EndCall */)
|
|
302
|
+
);
|
|
303
|
+
this.sendFirstMessage();
|
|
304
|
+
socket.on("close", this.onClose);
|
|
305
|
+
socket.on("message", this.onMessage);
|
|
90
306
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
this.log("Reset conversation");
|
|
94
|
-
this.conversation = conversation;
|
|
307
|
+
log(...message) {
|
|
308
|
+
this.logger?.log(...message);
|
|
95
309
|
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
this.
|
|
99
|
-
this.processing = void 0;
|
|
310
|
+
cancel() {
|
|
311
|
+
this.config?.tts.cancel();
|
|
312
|
+
this.config?.agent.cancel();
|
|
100
313
|
}
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
this.
|
|
104
|
-
this.
|
|
105
|
-
this.config.onMessage?.(message);
|
|
314
|
+
async onMute() {
|
|
315
|
+
this.currentUserStream?.end();
|
|
316
|
+
this.currentUserStream = void 0;
|
|
317
|
+
this.cancel();
|
|
106
318
|
}
|
|
107
|
-
async
|
|
108
|
-
if (!this.socket) return;
|
|
109
|
-
if (processing.aborted) {
|
|
110
|
-
onAbort?.();
|
|
111
|
-
return;
|
|
112
|
-
}
|
|
113
|
-
if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {
|
|
114
|
-
this.log(`Send audio: (${audio.byteLength} bytes)`);
|
|
115
|
-
this.socket.send(audio);
|
|
116
|
-
} else if ("paused" in audio) {
|
|
117
|
-
if (!this.speakerStreamingEnabled) {
|
|
118
|
-
this.socket.send("EnableSpeakerStreaming" /* EnableSpeakerStreaming */);
|
|
119
|
-
this.speakerStreamingEnabled = true;
|
|
120
|
-
}
|
|
121
|
-
for await (const chunk of audio) {
|
|
122
|
-
if (processing.aborted) {
|
|
123
|
-
onAbort?.();
|
|
124
|
-
return;
|
|
125
|
-
}
|
|
126
|
-
this.log(`Send audio chunk (${chunk.length} bytes)`);
|
|
127
|
-
this.socket.send(chunk);
|
|
128
|
-
}
|
|
129
|
-
} else {
|
|
130
|
-
this.log(`Unknown audio type: ${audio}`);
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
onClose() {
|
|
319
|
+
async onStartSpeaking() {
|
|
134
320
|
if (!this.config) return;
|
|
135
|
-
this.
|
|
136
|
-
this.
|
|
137
|
-
|
|
138
|
-
this.
|
|
139
|
-
conversation: this.conversation.slice(1),
|
|
140
|
-
// Remove system message
|
|
141
|
-
duration
|
|
142
|
-
});
|
|
143
|
-
this.socket = null;
|
|
144
|
-
this.config = null;
|
|
321
|
+
this.currentUserStream?.end();
|
|
322
|
+
this.currentUserStream = new import_stream3.PassThrough();
|
|
323
|
+
this.config.stt.transcribe(this.currentUserStream);
|
|
324
|
+
this.cancel();
|
|
145
325
|
}
|
|
146
|
-
async
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
326
|
+
async onStopSpeaking() {
|
|
327
|
+
this.currentUserStream?.end();
|
|
328
|
+
this.currentUserStream = void 0;
|
|
329
|
+
const conversation = this.config?.agent.conversation;
|
|
330
|
+
if (conversation && conversation[conversation.length - 1].role === "user") {
|
|
331
|
+
this.log(
|
|
332
|
+
"User stopped speaking and a transcript already exists, answering"
|
|
333
|
+
);
|
|
334
|
+
this.answer();
|
|
150
335
|
}
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
this.
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
this.
|
|
160
|
-
this.abortProcessing();
|
|
161
|
-
} else if (cmd === "StopSpeaking" /* StopSpeaking */) {
|
|
162
|
-
this.isSpeaking = false;
|
|
163
|
-
await this.onStopSpeaking();
|
|
336
|
+
}
|
|
337
|
+
async sendFirstMessage() {
|
|
338
|
+
if (!this.config) return;
|
|
339
|
+
try {
|
|
340
|
+
if (this.config.firstMessage) {
|
|
341
|
+
this.config.agent.addAssistantMessage(this.config.firstMessage);
|
|
342
|
+
await this.speak(this.config.firstMessage);
|
|
343
|
+
} else if (this.config.generateFirstMessage) {
|
|
344
|
+
await this.answer();
|
|
164
345
|
}
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
this.
|
|
346
|
+
} catch (error) {
|
|
347
|
+
console.error("[MicdropServer]", error);
|
|
348
|
+
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
168
349
|
}
|
|
169
350
|
}
|
|
170
|
-
async
|
|
351
|
+
async answer() {
|
|
171
352
|
if (!this.config) return;
|
|
172
|
-
|
|
173
|
-
this.abortProcessing();
|
|
174
|
-
const processing = this.processing = { aborted: false };
|
|
175
|
-
const blob = new Blob(this.chunks, { type: "audio/ogg" });
|
|
176
|
-
this.chunks.length = 0;
|
|
353
|
+
this.cancel();
|
|
177
354
|
try {
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()));
|
|
181
|
-
this.log(`Saved speech: ${filename}`);
|
|
182
|
-
}
|
|
183
|
-
const transcript = await this.config.speech2Text(
|
|
184
|
-
blob,
|
|
185
|
-
this.conversation[this.conversation.length - 1]?.content
|
|
186
|
-
);
|
|
187
|
-
if (!transcript) {
|
|
188
|
-
this.log("Ignoring empty transcript");
|
|
189
|
-
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
190
|
-
return;
|
|
191
|
-
}
|
|
192
|
-
this.log("User transcript:", transcript);
|
|
193
|
-
this.addMessage({ role: "user", content: transcript });
|
|
194
|
-
if (processing.aborted) {
|
|
195
|
-
this.log("Answer aborted, no answer generated");
|
|
196
|
-
return;
|
|
197
|
-
}
|
|
198
|
-
const answer = await this.config.generateAnswer(this.conversation);
|
|
199
|
-
if (processing.aborted) {
|
|
200
|
-
this.log("Answer aborted, ignoring answer");
|
|
201
|
-
return;
|
|
202
|
-
}
|
|
203
|
-
await this.answer(answer, processing);
|
|
355
|
+
const { stream } = this.config.agent.answer();
|
|
356
|
+
await this.speak(stream);
|
|
204
357
|
} catch (error) {
|
|
205
|
-
console.error("[
|
|
358
|
+
console.error("[MicdropServer]", error);
|
|
206
359
|
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
207
360
|
}
|
|
208
361
|
}
|
|
209
|
-
//
|
|
210
|
-
async
|
|
362
|
+
// Run text-to-speech and send to client
|
|
363
|
+
async speak(message) {
|
|
211
364
|
if (!this.socket || !this.config) return;
|
|
212
|
-
|
|
213
|
-
this.abortProcessing();
|
|
214
|
-
processing = this.processing = { aborted: false };
|
|
215
|
-
}
|
|
365
|
+
let textStream;
|
|
216
366
|
if (typeof message === "string") {
|
|
217
|
-
|
|
367
|
+
const stream = new import_stream3.PassThrough();
|
|
368
|
+
stream.write(message);
|
|
369
|
+
stream.end();
|
|
370
|
+
textStream = stream;
|
|
371
|
+
} else {
|
|
372
|
+
textStream = message;
|
|
218
373
|
}
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
374
|
+
const audio = this.config.tts.speak(textStream);
|
|
375
|
+
await this.sendAudio(audio);
|
|
376
|
+
}
|
|
377
|
+
async sendAudio(audio) {
|
|
378
|
+
if (!this.socket) return;
|
|
379
|
+
if (!audio.readable) {
|
|
380
|
+
this.log("Non readable audio, skipping", audio);
|
|
226
381
|
return;
|
|
227
382
|
}
|
|
228
|
-
|
|
229
|
-
this.log(
|
|
230
|
-
this.socket?.send(
|
|
231
|
-
|
|
383
|
+
audio.on("data", (chunk) => {
|
|
384
|
+
this.log(`Send audio chunk (${chunk.byteLength} bytes)`);
|
|
385
|
+
this.socket?.send(chunk);
|
|
386
|
+
});
|
|
387
|
+
audio.on("error", (error) => {
|
|
388
|
+
this.log("Error in audio stream", error);
|
|
389
|
+
});
|
|
390
|
+
audio.on("end", () => {
|
|
391
|
+
this.log("Audio stream ended");
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
};
|
|
395
|
+
|
|
396
|
+
// src/stt/STT.ts
|
|
397
|
+
var import_eventemitter32 = __toESM(require("eventemitter3"));
|
|
398
|
+
var MIME_TYPE_TO_EXTENSION = {
|
|
399
|
+
"audio/wav": "wav",
|
|
400
|
+
"audio/ogg": "ogg",
|
|
401
|
+
"audio/mpeg": "mp3",
|
|
402
|
+
"audio/webm": "webm",
|
|
403
|
+
"audio/mp4": "mp4",
|
|
404
|
+
"audio/flac": "flac"
|
|
405
|
+
};
|
|
406
|
+
var STT = class extends import_eventemitter32.default {
|
|
407
|
+
// Set stream of audio to transcribe
|
|
408
|
+
transcribe(audioStream) {
|
|
409
|
+
audioStream.once("data", (chunk) => {
|
|
410
|
+
this.mimeType = this.detectMimeType(chunk);
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
log(...message) {
|
|
414
|
+
this.logger?.log(...message);
|
|
415
|
+
}
|
|
416
|
+
destroy() {
|
|
417
|
+
this.log("Destroyed");
|
|
418
|
+
this.removeAllListeners();
|
|
419
|
+
}
|
|
420
|
+
get extension() {
|
|
421
|
+
return this.mimeType && MIME_TYPE_TO_EXTENSION[this.mimeType] || "bin";
|
|
422
|
+
}
|
|
423
|
+
detectMimeType(chunk) {
|
|
424
|
+
if (!chunk || chunk.byteLength === 0) {
|
|
425
|
+
throw new Error("Unable to detect mime type (empty chunk)");
|
|
232
426
|
}
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */);
|
|
243
|
-
}
|
|
244
|
-
};
|
|
245
|
-
if (processing.aborted) {
|
|
246
|
-
onAbort();
|
|
247
|
-
return;
|
|
248
|
-
}
|
|
249
|
-
const audio = await this.config.text2Speech(message.content);
|
|
250
|
-
await this.sendAudio(audio, processing, onAbort);
|
|
251
|
-
} catch (error) {
|
|
252
|
-
console.error("[CallServer]", error);
|
|
253
|
-
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
254
|
-
}
|
|
427
|
+
const arr = new Uint8Array(chunk);
|
|
428
|
+
if (arr[0] === 26 && arr[1] === 69 && arr[2] === 223 && arr[3] === 163) {
|
|
429
|
+
return "audio/webm";
|
|
430
|
+
}
|
|
431
|
+
if (arr[0] === 79 && arr[1] === 103 && arr[2] === 103 && arr[3] === 83) {
|
|
432
|
+
return "audio/ogg";
|
|
433
|
+
}
|
|
434
|
+
if (arr[0] === 82 && arr[1] === 73 && arr[2] === 70 && arr[3] === 70 && arr[8] === 87 && arr[9] === 65 && arr[10] === 86 && arr[11] === 69) {
|
|
435
|
+
return "audio/wav";
|
|
255
436
|
}
|
|
256
|
-
if (
|
|
257
|
-
|
|
258
|
-
this.socket.send("EndCall" /* EndCall */);
|
|
437
|
+
if (arr[0] === 73 && arr[1] === 68 && arr[2] === 51) {
|
|
438
|
+
return "audio/mpeg";
|
|
259
439
|
}
|
|
440
|
+
if (arr[4] === 102 && arr[5] === 116 && arr[6] === 121 && arr[7] === 112) {
|
|
441
|
+
return "audio/mp4";
|
|
442
|
+
}
|
|
443
|
+
if (arr[0] === 102 && arr[1] === 76 && arr[2] === 97 && arr[3] === 67) {
|
|
444
|
+
return "audio/flac";
|
|
445
|
+
}
|
|
446
|
+
this.log("Unable to detect mime type, using default", chunk);
|
|
447
|
+
return "audio/wav";
|
|
260
448
|
}
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
449
|
+
};
|
|
450
|
+
|
|
451
|
+
// src/stt/FileSTT.ts
|
|
452
|
+
var FileSTT = class extends STT {
|
|
453
|
+
transcribe(audioStream) {
|
|
454
|
+
super.transcribe(audioStream);
|
|
455
|
+
this.log("Converting stream to file...");
|
|
456
|
+
const chunks = [];
|
|
457
|
+
audioStream.on("data", (chunk) => {
|
|
458
|
+
chunks.push(chunk);
|
|
459
|
+
});
|
|
460
|
+
audioStream.on("end", async () => {
|
|
461
|
+
if (chunks.length === 0) return;
|
|
462
|
+
const arrayBuffer = Buffer.concat(chunks);
|
|
463
|
+
const file = new File([arrayBuffer], `audio.${this.extension}`, {
|
|
464
|
+
type: this.mimeType
|
|
465
|
+
});
|
|
466
|
+
this.log("Transcribing file...");
|
|
467
|
+
const transcript = await this.transcribeFile(file);
|
|
468
|
+
this.emit("Transcript", transcript);
|
|
469
|
+
});
|
|
267
470
|
}
|
|
268
471
|
};
|
|
269
472
|
|
|
270
|
-
// src/
|
|
271
|
-
var
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
constructor(code, message) {
|
|
279
|
-
super(message);
|
|
280
|
-
this.code = code;
|
|
473
|
+
// src/stt/MockSTT.ts
|
|
474
|
+
var MockSTT = class extends FileSTT {
|
|
475
|
+
constructor() {
|
|
476
|
+
super(...arguments);
|
|
477
|
+
this.i = 0;
|
|
478
|
+
}
|
|
479
|
+
async transcribeFile(file) {
|
|
480
|
+
return `User Message ${this.i++}`;
|
|
281
481
|
}
|
|
282
482
|
};
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
483
|
+
|
|
484
|
+
// src/tts/MockTTS.ts
|
|
485
|
+
var fs = __toESM(require("fs"));
|
|
486
|
+
var import_stream4 = require("stream");
|
|
487
|
+
|
|
488
|
+
// src/tts/TTS.ts
|
|
489
|
+
var TTS = class {
|
|
490
|
+
log(...message) {
|
|
491
|
+
this.logger?.log(...message);
|
|
289
492
|
}
|
|
290
|
-
|
|
291
|
-
|
|
493
|
+
destroy() {
|
|
494
|
+
this.log("Destroyed");
|
|
495
|
+
this.cancel();
|
|
496
|
+
}
|
|
497
|
+
};
|
|
498
|
+
|
|
499
|
+
// src/tts/MockTTS.ts
|
|
500
|
+
var MockTTS = class extends TTS {
|
|
501
|
+
constructor(audioFilePaths) {
|
|
502
|
+
super();
|
|
503
|
+
this.audioFilePaths = audioFilePaths;
|
|
504
|
+
}
|
|
505
|
+
speak(textStream) {
|
|
506
|
+
const audioStream = new import_stream4.PassThrough();
|
|
507
|
+
textStream.once("data", async () => {
|
|
508
|
+
for (const filePath of this.audioFilePaths) {
|
|
509
|
+
await new Promise((resolve) => setTimeout(resolve, 200));
|
|
510
|
+
const audioBuffer = fs.readFileSync(filePath);
|
|
511
|
+
this.log(`Loaded chunk (${audioBuffer.length} bytes)`);
|
|
512
|
+
audioStream.write(audioBuffer);
|
|
513
|
+
}
|
|
514
|
+
audioStream.end();
|
|
515
|
+
});
|
|
516
|
+
return audioStream;
|
|
517
|
+
}
|
|
518
|
+
cancel() {
|
|
519
|
+
}
|
|
520
|
+
};
|
|
292
521
|
|
|
293
522
|
// src/waitForParams.ts
|
|
294
523
|
async function waitForParams(socket, validate) {
|
|
295
524
|
return new Promise((resolve, reject) => {
|
|
296
525
|
const timeout = setTimeout(() => {
|
|
297
|
-
reject(new
|
|
526
|
+
reject(new MicdropError(4400 /* BadRequest */, "Missing params"));
|
|
298
527
|
}, 3e3);
|
|
299
528
|
const onParams = (payload) => {
|
|
300
529
|
clearTimeout(timeout);
|
|
@@ -303,7 +532,7 @@ async function waitForParams(socket, validate) {
|
|
|
303
532
|
const params = validate(JSON.parse(payload));
|
|
304
533
|
resolve(params);
|
|
305
534
|
} catch (error) {
|
|
306
|
-
reject(new
|
|
535
|
+
reject(new MicdropError(4400 /* BadRequest */, "Invalid params"));
|
|
307
536
|
}
|
|
308
537
|
};
|
|
309
538
|
socket.on("message", onParams);
|
|
@@ -311,11 +540,22 @@ async function waitForParams(socket, validate) {
|
|
|
311
540
|
}
|
|
312
541
|
// Annotate the CommonJS export names for ESM import in node:
|
|
313
542
|
0 && (module.exports = {
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
543
|
+
Agent,
|
|
544
|
+
FileSTT,
|
|
545
|
+
Logger,
|
|
546
|
+
MicdropClientCommands,
|
|
547
|
+
MicdropError,
|
|
548
|
+
MicdropErrorCode,
|
|
549
|
+
MicdropServer,
|
|
550
|
+
MicdropServerCommands,
|
|
551
|
+
MockAgent,
|
|
552
|
+
MockSTT,
|
|
553
|
+
MockTTS,
|
|
554
|
+
STT,
|
|
555
|
+
TTS,
|
|
556
|
+
convertPCMToOpus,
|
|
557
|
+
convertToOpus,
|
|
558
|
+
convertToPCM,
|
|
319
559
|
handleError,
|
|
320
560
|
waitForParams
|
|
321
561
|
});
|