@micdrop/server 1.7.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -232
- package/dist/index.d.mts +150 -53
- package/dist/index.d.ts +150 -53
- package/dist/index.js +460 -218
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +444 -213
- package/dist/index.mjs.map +1 -1
- package/package.json +7 -3
package/dist/index.js
CHANGED
|
@@ -30,269 +30,500 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
30
30
|
// src/index.ts
|
|
31
31
|
var index_exports = {};
|
|
32
32
|
__export(index_exports, {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
33
|
+
Agent: () => Agent,
|
|
34
|
+
FileSTT: () => FileSTT,
|
|
35
|
+
Logger: () => Logger,
|
|
36
|
+
MicdropClientCommands: () => MicdropClientCommands,
|
|
37
|
+
MicdropError: () => MicdropError,
|
|
38
|
+
MicdropErrorCode: () => MicdropErrorCode,
|
|
39
|
+
MicdropServer: () => MicdropServer,
|
|
40
|
+
MicdropServerCommands: () => MicdropServerCommands,
|
|
41
|
+
MockAgent: () => MockAgent,
|
|
42
|
+
MockSTT: () => MockSTT,
|
|
43
|
+
MockTTS: () => MockTTS,
|
|
44
|
+
STT: () => STT,
|
|
45
|
+
TTS: () => TTS,
|
|
46
|
+
convertPCMToOpus: () => convertPCMToOpus,
|
|
47
|
+
convertToOpus: () => convertToOpus,
|
|
48
|
+
convertToPCM: () => convertToPCM,
|
|
38
49
|
handleError: () => handleError,
|
|
39
50
|
waitForParams: () => waitForParams
|
|
40
51
|
});
|
|
41
52
|
module.exports = __toCommonJS(index_exports);
|
|
42
53
|
|
|
43
|
-
// src/
|
|
44
|
-
var
|
|
54
|
+
// src/agent/Agent.ts
|
|
55
|
+
var import_eventemitter3 = __toESM(require("eventemitter3"));
|
|
56
|
+
var Agent = class extends import_eventemitter3.default {
|
|
57
|
+
constructor(options) {
|
|
58
|
+
super();
|
|
59
|
+
this.options = options;
|
|
60
|
+
this.conversation = [{ role: "system", content: options.systemPrompt }];
|
|
61
|
+
}
|
|
62
|
+
addUserMessage(text) {
|
|
63
|
+
this.addMessage("user", text);
|
|
64
|
+
}
|
|
65
|
+
addAssistantMessage(text) {
|
|
66
|
+
this.addMessage("assistant", text);
|
|
67
|
+
}
|
|
68
|
+
addMessage(role, text) {
|
|
69
|
+
this.log(`Adding ${role} message to conversation: ${text}`);
|
|
70
|
+
const message = {
|
|
71
|
+
role,
|
|
72
|
+
content: text
|
|
73
|
+
};
|
|
74
|
+
this.conversation.push(message);
|
|
75
|
+
this.emit("Message", message);
|
|
76
|
+
}
|
|
77
|
+
endCall() {
|
|
78
|
+
this.log("Ending call");
|
|
79
|
+
this.emit("EndCall");
|
|
80
|
+
}
|
|
81
|
+
cancelLastUserMessage() {
|
|
82
|
+
this.log("Cancelling last user message");
|
|
83
|
+
const lastMessage = this.conversation[this.conversation.length - 1];
|
|
84
|
+
if (lastMessage?.role !== "user") return;
|
|
85
|
+
this.conversation.pop();
|
|
86
|
+
this.emit("CancelLastUserMessage");
|
|
87
|
+
}
|
|
88
|
+
cancelLastAssistantMessage() {
|
|
89
|
+
this.log("Cancelling last assistant message");
|
|
90
|
+
const lastMessage = this.conversation[this.conversation.length - 1];
|
|
91
|
+
if (lastMessage?.role !== "assistant") return;
|
|
92
|
+
this.conversation.pop();
|
|
93
|
+
this.emit("CancelLastAssistantMessage");
|
|
94
|
+
}
|
|
95
|
+
skipAnswer() {
|
|
96
|
+
this.log("Skipping answer");
|
|
97
|
+
this.emit("SkipAnswer");
|
|
98
|
+
}
|
|
99
|
+
createTextPromise() {
|
|
100
|
+
const result = {};
|
|
101
|
+
result.promise = new Promise((resolve, reject) => {
|
|
102
|
+
result.resolve = resolve;
|
|
103
|
+
result.reject = reject;
|
|
104
|
+
});
|
|
105
|
+
return result;
|
|
106
|
+
}
|
|
107
|
+
log(...message) {
|
|
108
|
+
this.logger?.log(...message);
|
|
109
|
+
}
|
|
110
|
+
destroy() {
|
|
111
|
+
this.log("Destroyed");
|
|
112
|
+
this.removeAllListeners();
|
|
113
|
+
this.cancel();
|
|
114
|
+
}
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
// src/agent/MockAgent.ts
|
|
118
|
+
var import_stream = require("stream");
|
|
119
|
+
var MockAgent = class extends Agent {
|
|
120
|
+
constructor() {
|
|
121
|
+
super({ systemPrompt: "" });
|
|
122
|
+
this.i = 0;
|
|
123
|
+
}
|
|
124
|
+
answer() {
|
|
125
|
+
const stream = new import_stream.PassThrough();
|
|
126
|
+
const textPromise = this.createTextPromise();
|
|
127
|
+
const message = `Assistant Message ${this.i++}`;
|
|
128
|
+
this.addAssistantMessage(message);
|
|
129
|
+
stream.write(message);
|
|
130
|
+
stream.end();
|
|
131
|
+
textPromise.resolve(message);
|
|
132
|
+
return { message: textPromise.promise, stream };
|
|
133
|
+
}
|
|
134
|
+
cancel() {
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
// src/audio-convert.ts
|
|
139
|
+
var import_ffmpeg = __toESM(require("@ffmpeg-installer/ffmpeg"));
|
|
140
|
+
var import_fluent_ffmpeg = __toESM(require("fluent-ffmpeg"));
|
|
141
|
+
var import_stream2 = require("stream");
|
|
142
|
+
import_fluent_ffmpeg.default.setFfmpegPath(import_ffmpeg.default.path);
|
|
143
|
+
function convertToPCM(audioStream, sampleRate = 16e3, bitDepth = 16) {
|
|
144
|
+
const pcmStream = new import_stream2.PassThrough();
|
|
145
|
+
(0, import_fluent_ffmpeg.default)(audioStream).audioChannels(1).audioFrequency(sampleRate).audioCodec(`pcm_s${bitDepth}le`).format(`s${bitDepth}le`).on("error", (error) => {
|
|
146
|
+
console.error("Error converting audio stream:", error.message);
|
|
147
|
+
}).pipe(pcmStream);
|
|
148
|
+
return pcmStream;
|
|
149
|
+
}
|
|
150
|
+
function convertToOpus(audioStream, sampleRate = 16e3) {
|
|
151
|
+
const webmStream = new import_stream2.PassThrough();
|
|
152
|
+
ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).pipe(webmStream);
|
|
153
|
+
return webmStream;
|
|
154
|
+
}
|
|
155
|
+
function convertPCMToOpus(audioStream, sampleRate = 16e3) {
|
|
156
|
+
const webmStream = new import_stream2.PassThrough();
|
|
157
|
+
ffmpegToOpus((0, import_fluent_ffmpeg.default)(audioStream), sampleRate).inputFormat("s16le").inputOptions(["-f s16le", "-ar 16000", "-ac 1"]).pipe(webmStream);
|
|
158
|
+
return webmStream;
|
|
159
|
+
}
|
|
160
|
+
function ffmpegToOpus(ffmpegCommand, sampleRate = 16e3) {
|
|
161
|
+
return ffmpegCommand.audioChannels(1).audioFrequency(sampleRate).audioCodec("libopus").format("webm").outputOptions([
|
|
162
|
+
"-application audio",
|
|
163
|
+
`-ac 1`,
|
|
164
|
+
`-ar ${sampleRate}`,
|
|
165
|
+
`-b:a 64k`,
|
|
166
|
+
`-f webm`,
|
|
167
|
+
`-map_metadata -1`
|
|
168
|
+
]).on("error", (error) => {
|
|
169
|
+
console.error("Error converting to Opus: ", error.message);
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// src/errors.ts
|
|
174
|
+
var MicdropErrorCode = /* @__PURE__ */ ((MicdropErrorCode2) => {
|
|
175
|
+
MicdropErrorCode2[MicdropErrorCode2["BadRequest"] = 4400] = "BadRequest";
|
|
176
|
+
MicdropErrorCode2[MicdropErrorCode2["Unauthorized"] = 4401] = "Unauthorized";
|
|
177
|
+
MicdropErrorCode2[MicdropErrorCode2["NotFound"] = 4404] = "NotFound";
|
|
178
|
+
return MicdropErrorCode2;
|
|
179
|
+
})(MicdropErrorCode || {});
|
|
180
|
+
var MicdropError = class extends Error {
|
|
181
|
+
constructor(code, message) {
|
|
182
|
+
super(message);
|
|
183
|
+
this.code = code;
|
|
184
|
+
}
|
|
185
|
+
};
|
|
186
|
+
function handleError(socket, error) {
|
|
187
|
+
if (error instanceof MicdropError) {
|
|
188
|
+
socket.close(error.code, error.message);
|
|
189
|
+
} else {
|
|
190
|
+
console.error(error);
|
|
191
|
+
socket.close(1011);
|
|
192
|
+
}
|
|
193
|
+
socket.terminate();
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// src/Logger.ts
|
|
197
|
+
var Logger = class {
|
|
198
|
+
constructor(name) {
|
|
199
|
+
this.name = name;
|
|
200
|
+
}
|
|
201
|
+
log(...message) {
|
|
202
|
+
const time = process.uptime().toFixed(3);
|
|
203
|
+
console.log(`[${this.name} ${time}]`, ...message);
|
|
204
|
+
}
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
// src/MicdropServer.ts
|
|
208
|
+
var import_stream3 = require("stream");
|
|
45
209
|
|
|
46
210
|
// src/types.ts
|
|
47
|
-
var
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
return
|
|
52
|
-
})(
|
|
53
|
-
var
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
})(CallServerCommands || {});
|
|
211
|
+
var MicdropClientCommands = /* @__PURE__ */ ((MicdropClientCommands2) => {
|
|
212
|
+
MicdropClientCommands2["StartSpeaking"] = "StartSpeaking";
|
|
213
|
+
MicdropClientCommands2["StopSpeaking"] = "StopSpeaking";
|
|
214
|
+
MicdropClientCommands2["Mute"] = "Mute";
|
|
215
|
+
return MicdropClientCommands2;
|
|
216
|
+
})(MicdropClientCommands || {});
|
|
217
|
+
var MicdropServerCommands = /* @__PURE__ */ ((MicdropServerCommands2) => {
|
|
218
|
+
MicdropServerCommands2["Message"] = "Message";
|
|
219
|
+
MicdropServerCommands2["CancelLastAssistantMessage"] = "CancelLastAssistantMessage";
|
|
220
|
+
MicdropServerCommands2["CancelLastUserMessage"] = "CancelLastUserMessage";
|
|
221
|
+
MicdropServerCommands2["SkipAnswer"] = "SkipAnswer";
|
|
222
|
+
MicdropServerCommands2["EndCall"] = "EndCall";
|
|
223
|
+
return MicdropServerCommands2;
|
|
224
|
+
})(MicdropServerCommands || {});
|
|
62
225
|
|
|
63
|
-
// src/
|
|
64
|
-
var
|
|
226
|
+
// src/MicdropServer.ts
|
|
227
|
+
var MicdropServer = class {
|
|
65
228
|
constructor(socket, config) {
|
|
66
229
|
this.socket = null;
|
|
67
230
|
this.config = null;
|
|
68
231
|
this.startTime = Date.now();
|
|
69
|
-
this.
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
232
|
+
this.onClose = () => {
|
|
233
|
+
if (!this.config) return;
|
|
234
|
+
this.log("Connection closed");
|
|
235
|
+
const duration = Math.round((Date.now() - this.startTime) / 1e3);
|
|
236
|
+
this.config.agent.destroy();
|
|
237
|
+
this.config.stt.destroy();
|
|
238
|
+
this.config.tts.destroy();
|
|
239
|
+
this.config.onEnd?.({
|
|
240
|
+
conversation: this.config.agent.conversation.slice(1),
|
|
241
|
+
// Remove system message
|
|
242
|
+
duration
|
|
243
|
+
});
|
|
244
|
+
this.socket = null;
|
|
245
|
+
this.config = null;
|
|
246
|
+
};
|
|
247
|
+
this.onMessage = async (message) => {
|
|
248
|
+
if (message.byteLength === 0) return;
|
|
249
|
+
if (!Buffer.isBuffer(message)) {
|
|
250
|
+
this.log("Message is not a buffer");
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
if (message.byteLength < 15) {
|
|
254
|
+
const cmd = message.toString();
|
|
255
|
+
this.log(`Command: ${cmd}`);
|
|
256
|
+
if (cmd === "StartSpeaking" /* StartSpeaking */) {
|
|
257
|
+
await this.onStartSpeaking();
|
|
258
|
+
} else if (cmd === "Mute" /* Mute */) {
|
|
259
|
+
await this.onMute();
|
|
260
|
+
} else if (cmd === "StopSpeaking" /* StopSpeaking */) {
|
|
261
|
+
await this.onStopSpeaking();
|
|
262
|
+
}
|
|
263
|
+
} else if (this.currentUserStream) {
|
|
264
|
+
this.log(`Received chunk (${message.byteLength} bytes)`);
|
|
265
|
+
this.currentUserStream.write(message);
|
|
266
|
+
}
|
|
267
|
+
};
|
|
268
|
+
this.onTranscript = async (transcript) => {
|
|
269
|
+
if (!this.config) return;
|
|
270
|
+
this.log(`User transcript: "${transcript}"`);
|
|
271
|
+
this.config.agent.addUserMessage(transcript);
|
|
272
|
+
if (!this.currentUserStream) {
|
|
273
|
+
this.log("User stopped speaking, answering");
|
|
274
|
+
this.answer();
|
|
275
|
+
}
|
|
276
|
+
};
|
|
76
277
|
this.socket = socket;
|
|
77
278
|
this.config = config;
|
|
78
|
-
this.conversation = [{ role: "system", content: config.systemPrompt }];
|
|
79
279
|
this.log(`Call started`);
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
280
|
+
this.config.stt.on("Transcript", this.onTranscript);
|
|
281
|
+
this.config.agent.on(
|
|
282
|
+
"Message",
|
|
283
|
+
(message) => this.socket?.send(
|
|
284
|
+
`${"Message" /* Message */} ${JSON.stringify(message)}`
|
|
285
|
+
)
|
|
286
|
+
);
|
|
287
|
+
this.config.agent.on(
|
|
288
|
+
"CancelLastUserMessage",
|
|
289
|
+
() => this.socket?.send("CancelLastUserMessage" /* CancelLastUserMessage */)
|
|
290
|
+
);
|
|
291
|
+
this.config.agent.on(
|
|
292
|
+
"CancelLastAssistantMessage",
|
|
293
|
+
() => this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */)
|
|
294
|
+
);
|
|
295
|
+
this.config.agent.on(
|
|
296
|
+
"SkipAnswer",
|
|
297
|
+
() => this.socket?.send("SkipAnswer" /* SkipAnswer */)
|
|
298
|
+
);
|
|
299
|
+
this.config.agent.on(
|
|
300
|
+
"EndCall",
|
|
301
|
+
() => this.socket?.send("EndCall" /* EndCall */)
|
|
302
|
+
);
|
|
303
|
+
this.sendFirstMessage();
|
|
304
|
+
socket.on("close", this.onClose);
|
|
305
|
+
socket.on("message", this.onMessage);
|
|
90
306
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
this.log("Reset conversation");
|
|
94
|
-
this.conversation = conversation;
|
|
307
|
+
log(...message) {
|
|
308
|
+
this.logger?.log(...message);
|
|
95
309
|
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
this.
|
|
99
|
-
this.processing = void 0;
|
|
310
|
+
cancel() {
|
|
311
|
+
this.config?.tts.cancel();
|
|
312
|
+
this.config?.agent.cancel();
|
|
100
313
|
}
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
this.
|
|
104
|
-
this.
|
|
105
|
-
this.config.onMessage?.(message);
|
|
314
|
+
async onMute() {
|
|
315
|
+
this.currentUserStream?.end();
|
|
316
|
+
this.currentUserStream = void 0;
|
|
317
|
+
this.cancel();
|
|
106
318
|
}
|
|
107
|
-
async
|
|
108
|
-
if (!this.socket) return;
|
|
109
|
-
if (processing.aborted) {
|
|
110
|
-
onAbort?.();
|
|
111
|
-
return;
|
|
112
|
-
}
|
|
113
|
-
if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {
|
|
114
|
-
this.log(`Send audio: (${audio.byteLength} bytes)`);
|
|
115
|
-
this.socket.send(audio);
|
|
116
|
-
} else if ("paused" in audio) {
|
|
117
|
-
if (!this.speakerStreamingEnabled) {
|
|
118
|
-
this.socket.send("EnableSpeakerStreaming" /* EnableSpeakerStreaming */);
|
|
119
|
-
this.speakerStreamingEnabled = true;
|
|
120
|
-
}
|
|
121
|
-
for await (const chunk of audio) {
|
|
122
|
-
if (processing.aborted) {
|
|
123
|
-
onAbort?.();
|
|
124
|
-
return;
|
|
125
|
-
}
|
|
126
|
-
this.log(`Send audio chunk (${chunk.length} bytes)`);
|
|
127
|
-
this.socket.send(chunk);
|
|
128
|
-
}
|
|
129
|
-
} else {
|
|
130
|
-
this.log(`Unknown audio type: ${audio}`);
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
onClose() {
|
|
319
|
+
async onStartSpeaking() {
|
|
134
320
|
if (!this.config) return;
|
|
135
|
-
this.
|
|
136
|
-
this.
|
|
137
|
-
|
|
138
|
-
this.
|
|
139
|
-
conversation: this.conversation.slice(1),
|
|
140
|
-
// Remove system message
|
|
141
|
-
duration
|
|
142
|
-
});
|
|
143
|
-
this.socket = null;
|
|
144
|
-
this.config = null;
|
|
321
|
+
this.currentUserStream?.end();
|
|
322
|
+
this.currentUserStream = new import_stream3.PassThrough();
|
|
323
|
+
this.config.stt.transcribe(this.currentUserStream);
|
|
324
|
+
this.cancel();
|
|
145
325
|
}
|
|
146
|
-
async
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
326
|
+
async onStopSpeaking() {
|
|
327
|
+
this.currentUserStream?.end();
|
|
328
|
+
this.currentUserStream = void 0;
|
|
329
|
+
const conversation = this.config?.agent.conversation;
|
|
330
|
+
if (conversation && conversation[conversation.length - 1].role === "user") {
|
|
331
|
+
this.log(
|
|
332
|
+
"User stopped speaking and a transcript already exists, answering"
|
|
333
|
+
);
|
|
334
|
+
this.answer();
|
|
150
335
|
}
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
this.
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
this.isSpeaking = false;
|
|
161
|
-
await this.onStopSpeaking();
|
|
336
|
+
}
|
|
337
|
+
async sendFirstMessage() {
|
|
338
|
+
if (!this.config) return;
|
|
339
|
+
try {
|
|
340
|
+
if (this.config.firstMessage) {
|
|
341
|
+
this.config.agent.addAssistantMessage(this.config.firstMessage);
|
|
342
|
+
await this.speak(this.config.firstMessage);
|
|
343
|
+
} else if (this.config.generateFirstMessage) {
|
|
344
|
+
await this.answer();
|
|
162
345
|
}
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
this.
|
|
346
|
+
} catch (error) {
|
|
347
|
+
console.error("[MicdropServer]", error);
|
|
348
|
+
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
166
349
|
}
|
|
167
350
|
}
|
|
168
|
-
async
|
|
351
|
+
async answer() {
|
|
169
352
|
if (!this.config) return;
|
|
170
|
-
|
|
171
|
-
this.abortProcessing();
|
|
172
|
-
const processing = this.processing = { aborted: false };
|
|
173
|
-
const blob = new Blob(this.chunks, { type: "audio/ogg" });
|
|
174
|
-
this.chunks.length = 0;
|
|
353
|
+
this.cancel();
|
|
175
354
|
try {
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()));
|
|
179
|
-
this.log(`Saved speech: ${filename}`);
|
|
180
|
-
}
|
|
181
|
-
const transcript = await this.config.speech2Text(
|
|
182
|
-
blob,
|
|
183
|
-
this.conversation[this.conversation.length - 1]?.content
|
|
184
|
-
);
|
|
185
|
-
if (!transcript) {
|
|
186
|
-
this.log("Ignoring empty transcript");
|
|
187
|
-
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
188
|
-
return;
|
|
189
|
-
}
|
|
190
|
-
this.log("User transcript:", transcript);
|
|
191
|
-
this.addMessage({ role: "user", content: transcript });
|
|
192
|
-
if (processing.aborted) {
|
|
193
|
-
this.log("Answer aborted, no answer generated");
|
|
194
|
-
return;
|
|
195
|
-
}
|
|
196
|
-
const answer = await this.config.generateAnswer(this.conversation);
|
|
197
|
-
if (processing.aborted) {
|
|
198
|
-
this.log("Answer aborted, ignoring answer");
|
|
199
|
-
return;
|
|
200
|
-
}
|
|
201
|
-
await this.answer(answer, processing);
|
|
355
|
+
const { stream } = this.config.agent.answer();
|
|
356
|
+
await this.speak(stream);
|
|
202
357
|
} catch (error) {
|
|
203
|
-
console.error("[
|
|
358
|
+
console.error("[MicdropServer]", error);
|
|
204
359
|
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
205
360
|
}
|
|
206
361
|
}
|
|
207
|
-
//
|
|
208
|
-
async
|
|
362
|
+
// Run text-to-speech and send to client
|
|
363
|
+
async speak(message) {
|
|
209
364
|
if (!this.socket || !this.config) return;
|
|
210
|
-
|
|
211
|
-
this.abortProcessing();
|
|
212
|
-
processing = this.processing = { aborted: false };
|
|
213
|
-
}
|
|
365
|
+
let textStream;
|
|
214
366
|
if (typeof message === "string") {
|
|
215
|
-
|
|
367
|
+
const stream = new import_stream3.PassThrough();
|
|
368
|
+
stream.write(message);
|
|
369
|
+
stream.end();
|
|
370
|
+
textStream = stream;
|
|
371
|
+
} else {
|
|
372
|
+
textStream = message;
|
|
216
373
|
}
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
374
|
+
const audio = this.config.tts.speak(textStream);
|
|
375
|
+
await this.sendAudio(audio);
|
|
376
|
+
}
|
|
377
|
+
async sendAudio(audio) {
|
|
378
|
+
if (!this.socket) return;
|
|
379
|
+
if (!audio.readable) {
|
|
380
|
+
this.log("Non readable audio, skipping", audio);
|
|
224
381
|
return;
|
|
225
382
|
}
|
|
226
|
-
|
|
227
|
-
this.log(
|
|
228
|
-
this.socket?.send(
|
|
229
|
-
|
|
383
|
+
audio.on("data", (chunk) => {
|
|
384
|
+
this.log(`Send audio chunk (${chunk.byteLength} bytes)`);
|
|
385
|
+
this.socket?.send(chunk);
|
|
386
|
+
});
|
|
387
|
+
audio.on("error", (error) => {
|
|
388
|
+
this.log("Error in audio stream", error);
|
|
389
|
+
});
|
|
390
|
+
audio.on("end", () => {
|
|
391
|
+
this.log("Audio stream ended");
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
};
|
|
395
|
+
|
|
396
|
+
// src/stt/STT.ts
|
|
397
|
+
var import_eventemitter32 = __toESM(require("eventemitter3"));
|
|
398
|
+
var MIME_TYPE_TO_EXTENSION = {
|
|
399
|
+
"audio/wav": "wav",
|
|
400
|
+
"audio/ogg": "ogg",
|
|
401
|
+
"audio/mpeg": "mp3",
|
|
402
|
+
"audio/webm": "webm",
|
|
403
|
+
"audio/mp4": "mp4",
|
|
404
|
+
"audio/flac": "flac"
|
|
405
|
+
};
|
|
406
|
+
var STT = class extends import_eventemitter32.default {
|
|
407
|
+
// Set stream of audio to transcribe
|
|
408
|
+
transcribe(audioStream) {
|
|
409
|
+
audioStream.once("data", (chunk) => {
|
|
410
|
+
this.mimeType = this.detectMimeType(chunk);
|
|
411
|
+
});
|
|
412
|
+
}
|
|
413
|
+
log(...message) {
|
|
414
|
+
this.logger?.log(...message);
|
|
415
|
+
}
|
|
416
|
+
destroy() {
|
|
417
|
+
this.log("Destroyed");
|
|
418
|
+
this.removeAllListeners();
|
|
419
|
+
}
|
|
420
|
+
get extension() {
|
|
421
|
+
return this.mimeType && MIME_TYPE_TO_EXTENSION[this.mimeType] || "bin";
|
|
422
|
+
}
|
|
423
|
+
detectMimeType(chunk) {
|
|
424
|
+
if (!chunk || chunk.byteLength === 0) {
|
|
425
|
+
throw new Error("Unable to detect mime type (empty chunk)");
|
|
230
426
|
}
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */);
|
|
241
|
-
}
|
|
242
|
-
};
|
|
243
|
-
if (processing.aborted) {
|
|
244
|
-
onAbort();
|
|
245
|
-
return;
|
|
246
|
-
}
|
|
247
|
-
const audio = await this.config.text2Speech(message.content);
|
|
248
|
-
await this.sendAudio(audio, processing, onAbort);
|
|
249
|
-
} catch (error) {
|
|
250
|
-
console.error("[CallServer]", error);
|
|
251
|
-
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
252
|
-
}
|
|
427
|
+
const arr = new Uint8Array(chunk);
|
|
428
|
+
if (arr[0] === 26 && arr[1] === 69 && arr[2] === 223 && arr[3] === 163) {
|
|
429
|
+
return "audio/webm";
|
|
430
|
+
}
|
|
431
|
+
if (arr[0] === 79 && arr[1] === 103 && arr[2] === 103 && arr[3] === 83) {
|
|
432
|
+
return "audio/ogg";
|
|
433
|
+
}
|
|
434
|
+
if (arr[0] === 82 && arr[1] === 73 && arr[2] === 70 && arr[3] === 70 && arr[8] === 87 && arr[9] === 65 && arr[10] === 86 && arr[11] === 69) {
|
|
435
|
+
return "audio/wav";
|
|
253
436
|
}
|
|
254
|
-
if (
|
|
255
|
-
|
|
256
|
-
this.socket.send("EndCall" /* EndCall */);
|
|
437
|
+
if (arr[0] === 73 && arr[1] === 68 && arr[2] === 51) {
|
|
438
|
+
return "audio/mpeg";
|
|
257
439
|
}
|
|
440
|
+
if (arr[4] === 102 && arr[5] === 116 && arr[6] === 121 && arr[7] === 112) {
|
|
441
|
+
return "audio/mp4";
|
|
442
|
+
}
|
|
443
|
+
if (arr[0] === 102 && arr[1] === 76 && arr[2] === 97 && arr[3] === 67) {
|
|
444
|
+
return "audio/flac";
|
|
445
|
+
}
|
|
446
|
+
this.log("Unable to detect mime type, using default", chunk);
|
|
447
|
+
return "audio/wav";
|
|
258
448
|
}
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
449
|
+
};
|
|
450
|
+
|
|
451
|
+
// src/stt/FileSTT.ts
|
|
452
|
+
var FileSTT = class extends STT {
|
|
453
|
+
transcribe(audioStream) {
|
|
454
|
+
super.transcribe(audioStream);
|
|
455
|
+
this.log("Converting stream to file...");
|
|
456
|
+
const chunks = [];
|
|
457
|
+
audioStream.on("data", (chunk) => {
|
|
458
|
+
chunks.push(chunk);
|
|
459
|
+
});
|
|
460
|
+
audioStream.on("end", async () => {
|
|
461
|
+
if (chunks.length === 0) return;
|
|
462
|
+
const arrayBuffer = Buffer.concat(chunks);
|
|
463
|
+
const file = new File([arrayBuffer], `audio.${this.extension}`, {
|
|
464
|
+
type: this.mimeType
|
|
465
|
+
});
|
|
466
|
+
this.log("Transcribing file...");
|
|
467
|
+
const transcript = await this.transcribeFile(file);
|
|
468
|
+
this.emit("Transcript", transcript);
|
|
469
|
+
});
|
|
265
470
|
}
|
|
266
471
|
};
|
|
267
472
|
|
|
268
|
-
// src/
|
|
269
|
-
var
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
constructor(code, message) {
|
|
277
|
-
super(message);
|
|
278
|
-
this.code = code;
|
|
473
|
+
// src/stt/MockSTT.ts
|
|
474
|
+
var MockSTT = class extends FileSTT {
|
|
475
|
+
constructor() {
|
|
476
|
+
super(...arguments);
|
|
477
|
+
this.i = 0;
|
|
478
|
+
}
|
|
479
|
+
async transcribeFile(file) {
|
|
480
|
+
return `User Message ${this.i++}`;
|
|
279
481
|
}
|
|
280
482
|
};
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
483
|
+
|
|
484
|
+
// src/tts/MockTTS.ts
|
|
485
|
+
var fs = __toESM(require("fs"));
|
|
486
|
+
var import_stream4 = require("stream");
|
|
487
|
+
|
|
488
|
+
// src/tts/TTS.ts
|
|
489
|
+
var TTS = class {
|
|
490
|
+
log(...message) {
|
|
491
|
+
this.logger?.log(...message);
|
|
287
492
|
}
|
|
288
|
-
|
|
289
|
-
|
|
493
|
+
destroy() {
|
|
494
|
+
this.log("Destroyed");
|
|
495
|
+
this.cancel();
|
|
496
|
+
}
|
|
497
|
+
};
|
|
498
|
+
|
|
499
|
+
// src/tts/MockTTS.ts
|
|
500
|
+
var MockTTS = class extends TTS {
|
|
501
|
+
constructor(audioFilePaths) {
|
|
502
|
+
super();
|
|
503
|
+
this.audioFilePaths = audioFilePaths;
|
|
504
|
+
}
|
|
505
|
+
speak(textStream) {
|
|
506
|
+
const audioStream = new import_stream4.PassThrough();
|
|
507
|
+
textStream.once("data", async () => {
|
|
508
|
+
for (const filePath of this.audioFilePaths) {
|
|
509
|
+
await new Promise((resolve) => setTimeout(resolve, 200));
|
|
510
|
+
const audioBuffer = fs.readFileSync(filePath);
|
|
511
|
+
this.log(`Loaded chunk (${audioBuffer.length} bytes)`);
|
|
512
|
+
audioStream.write(audioBuffer);
|
|
513
|
+
}
|
|
514
|
+
audioStream.end();
|
|
515
|
+
});
|
|
516
|
+
return audioStream;
|
|
517
|
+
}
|
|
518
|
+
cancel() {
|
|
519
|
+
}
|
|
520
|
+
};
|
|
290
521
|
|
|
291
522
|
// src/waitForParams.ts
|
|
292
523
|
async function waitForParams(socket, validate) {
|
|
293
524
|
return new Promise((resolve, reject) => {
|
|
294
525
|
const timeout = setTimeout(() => {
|
|
295
|
-
reject(new
|
|
526
|
+
reject(new MicdropError(4400 /* BadRequest */, "Missing params"));
|
|
296
527
|
}, 3e3);
|
|
297
528
|
const onParams = (payload) => {
|
|
298
529
|
clearTimeout(timeout);
|
|
@@ -301,7 +532,7 @@ async function waitForParams(socket, validate) {
|
|
|
301
532
|
const params = validate(JSON.parse(payload));
|
|
302
533
|
resolve(params);
|
|
303
534
|
} catch (error) {
|
|
304
|
-
reject(new
|
|
535
|
+
reject(new MicdropError(4400 /* BadRequest */, "Invalid params"));
|
|
305
536
|
}
|
|
306
537
|
};
|
|
307
538
|
socket.on("message", onParams);
|
|
@@ -309,11 +540,22 @@ async function waitForParams(socket, validate) {
|
|
|
309
540
|
}
|
|
310
541
|
// Annotate the CommonJS export names for ESM import in node:
|
|
311
542
|
0 && (module.exports = {
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
543
|
+
Agent,
|
|
544
|
+
FileSTT,
|
|
545
|
+
Logger,
|
|
546
|
+
MicdropClientCommands,
|
|
547
|
+
MicdropError,
|
|
548
|
+
MicdropErrorCode,
|
|
549
|
+
MicdropServer,
|
|
550
|
+
MicdropServerCommands,
|
|
551
|
+
MockAgent,
|
|
552
|
+
MockSTT,
|
|
553
|
+
MockTTS,
|
|
554
|
+
STT,
|
|
555
|
+
TTS,
|
|
556
|
+
convertPCMToOpus,
|
|
557
|
+
convertToOpus,
|
|
558
|
+
convertToPCM,
|
|
317
559
|
handleError,
|
|
318
560
|
waitForParams
|
|
319
561
|
});
|