@micdrop/server 1.7.1 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +229 -229
- package/dist/index.d.mts +150 -53
- package/dist/index.d.ts +150 -53
- package/dist/index.js +460 -220
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +444 -215
- package/dist/index.mjs.map +1 -1
- package/package.json +7 -3
package/dist/index.mjs
CHANGED
|
@@ -1,258 +1,476 @@
|
|
|
1
|
-
// src/
|
|
2
|
-
import
|
|
1
|
+
// src/agent/Agent.ts
|
|
2
|
+
import EventEmitter from "eventemitter3";
|
|
3
|
+
var Agent = class extends EventEmitter {
|
|
4
|
+
constructor(options) {
|
|
5
|
+
super();
|
|
6
|
+
this.options = options;
|
|
7
|
+
this.conversation = [{ role: "system", content: options.systemPrompt }];
|
|
8
|
+
}
|
|
9
|
+
addUserMessage(text) {
|
|
10
|
+
this.addMessage("user", text);
|
|
11
|
+
}
|
|
12
|
+
addAssistantMessage(text) {
|
|
13
|
+
this.addMessage("assistant", text);
|
|
14
|
+
}
|
|
15
|
+
addMessage(role, text) {
|
|
16
|
+
this.log(`Adding ${role} message to conversation: ${text}`);
|
|
17
|
+
const message = {
|
|
18
|
+
role,
|
|
19
|
+
content: text
|
|
20
|
+
};
|
|
21
|
+
this.conversation.push(message);
|
|
22
|
+
this.emit("Message", message);
|
|
23
|
+
}
|
|
24
|
+
endCall() {
|
|
25
|
+
this.log("Ending call");
|
|
26
|
+
this.emit("EndCall");
|
|
27
|
+
}
|
|
28
|
+
cancelLastUserMessage() {
|
|
29
|
+
this.log("Cancelling last user message");
|
|
30
|
+
const lastMessage = this.conversation[this.conversation.length - 1];
|
|
31
|
+
if (lastMessage?.role !== "user") return;
|
|
32
|
+
this.conversation.pop();
|
|
33
|
+
this.emit("CancelLastUserMessage");
|
|
34
|
+
}
|
|
35
|
+
cancelLastAssistantMessage() {
|
|
36
|
+
this.log("Cancelling last assistant message");
|
|
37
|
+
const lastMessage = this.conversation[this.conversation.length - 1];
|
|
38
|
+
if (lastMessage?.role !== "assistant") return;
|
|
39
|
+
this.conversation.pop();
|
|
40
|
+
this.emit("CancelLastAssistantMessage");
|
|
41
|
+
}
|
|
42
|
+
skipAnswer() {
|
|
43
|
+
this.log("Skipping answer");
|
|
44
|
+
this.emit("SkipAnswer");
|
|
45
|
+
}
|
|
46
|
+
createTextPromise() {
|
|
47
|
+
const result = {};
|
|
48
|
+
result.promise = new Promise((resolve, reject) => {
|
|
49
|
+
result.resolve = resolve;
|
|
50
|
+
result.reject = reject;
|
|
51
|
+
});
|
|
52
|
+
return result;
|
|
53
|
+
}
|
|
54
|
+
log(...message) {
|
|
55
|
+
this.logger?.log(...message);
|
|
56
|
+
}
|
|
57
|
+
destroy() {
|
|
58
|
+
this.log("Destroyed");
|
|
59
|
+
this.removeAllListeners();
|
|
60
|
+
this.cancel();
|
|
61
|
+
}
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
// src/agent/MockAgent.ts
|
|
65
|
+
import { PassThrough } from "stream";
|
|
66
|
+
var MockAgent = class extends Agent {
|
|
67
|
+
constructor() {
|
|
68
|
+
super({ systemPrompt: "" });
|
|
69
|
+
this.i = 0;
|
|
70
|
+
}
|
|
71
|
+
answer() {
|
|
72
|
+
const stream = new PassThrough();
|
|
73
|
+
const textPromise = this.createTextPromise();
|
|
74
|
+
const message = `Assistant Message ${this.i++}`;
|
|
75
|
+
this.addAssistantMessage(message);
|
|
76
|
+
stream.write(message);
|
|
77
|
+
stream.end();
|
|
78
|
+
textPromise.resolve(message);
|
|
79
|
+
return { message: textPromise.promise, stream };
|
|
80
|
+
}
|
|
81
|
+
cancel() {
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
// src/audio-convert.ts
|
|
86
|
+
import ffmpegInstaller from "@ffmpeg-installer/ffmpeg";
|
|
87
|
+
import ffmpeg from "fluent-ffmpeg";
|
|
88
|
+
import { PassThrough as PassThrough2 } from "stream";
|
|
89
|
+
ffmpeg.setFfmpegPath(ffmpegInstaller.path);
|
|
90
|
+
function convertToPCM(audioStream, sampleRate = 16e3, bitDepth = 16) {
|
|
91
|
+
const pcmStream = new PassThrough2();
|
|
92
|
+
ffmpeg(audioStream).audioChannels(1).audioFrequency(sampleRate).audioCodec(`pcm_s${bitDepth}le`).format(`s${bitDepth}le`).on("error", (error) => {
|
|
93
|
+
console.error("Error converting audio stream:", error.message);
|
|
94
|
+
}).pipe(pcmStream);
|
|
95
|
+
return pcmStream;
|
|
96
|
+
}
|
|
97
|
+
function convertToOpus(audioStream, sampleRate = 16e3) {
|
|
98
|
+
const webmStream = new PassThrough2();
|
|
99
|
+
ffmpegToOpus(ffmpeg(audioStream), sampleRate).pipe(webmStream);
|
|
100
|
+
return webmStream;
|
|
101
|
+
}
|
|
102
|
+
function convertPCMToOpus(audioStream, sampleRate = 16e3) {
|
|
103
|
+
const webmStream = new PassThrough2();
|
|
104
|
+
ffmpegToOpus(ffmpeg(audioStream), sampleRate).inputFormat("s16le").inputOptions(["-f s16le", "-ar 16000", "-ac 1"]).pipe(webmStream);
|
|
105
|
+
return webmStream;
|
|
106
|
+
}
|
|
107
|
+
function ffmpegToOpus(ffmpegCommand, sampleRate = 16e3) {
|
|
108
|
+
return ffmpegCommand.audioChannels(1).audioFrequency(sampleRate).audioCodec("libopus").format("webm").outputOptions([
|
|
109
|
+
"-application audio",
|
|
110
|
+
`-ac 1`,
|
|
111
|
+
`-ar ${sampleRate}`,
|
|
112
|
+
`-b:a 64k`,
|
|
113
|
+
`-f webm`,
|
|
114
|
+
`-map_metadata -1`
|
|
115
|
+
]).on("error", (error) => {
|
|
116
|
+
console.error("Error converting to Opus: ", error.message);
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// src/errors.ts
|
|
121
|
+
var MicdropErrorCode = /* @__PURE__ */ ((MicdropErrorCode2) => {
|
|
122
|
+
MicdropErrorCode2[MicdropErrorCode2["BadRequest"] = 4400] = "BadRequest";
|
|
123
|
+
MicdropErrorCode2[MicdropErrorCode2["Unauthorized"] = 4401] = "Unauthorized";
|
|
124
|
+
MicdropErrorCode2[MicdropErrorCode2["NotFound"] = 4404] = "NotFound";
|
|
125
|
+
return MicdropErrorCode2;
|
|
126
|
+
})(MicdropErrorCode || {});
|
|
127
|
+
var MicdropError = class extends Error {
|
|
128
|
+
constructor(code, message) {
|
|
129
|
+
super(message);
|
|
130
|
+
this.code = code;
|
|
131
|
+
}
|
|
132
|
+
};
|
|
133
|
+
function handleError(socket, error) {
|
|
134
|
+
if (error instanceof MicdropError) {
|
|
135
|
+
socket.close(error.code, error.message);
|
|
136
|
+
} else {
|
|
137
|
+
console.error(error);
|
|
138
|
+
socket.close(1011);
|
|
139
|
+
}
|
|
140
|
+
socket.terminate();
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// src/Logger.ts
|
|
144
|
+
var Logger = class {
|
|
145
|
+
constructor(name) {
|
|
146
|
+
this.name = name;
|
|
147
|
+
}
|
|
148
|
+
log(...message) {
|
|
149
|
+
const time = process.uptime().toFixed(3);
|
|
150
|
+
console.log(`[${this.name} ${time}]`, ...message);
|
|
151
|
+
}
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
// src/MicdropServer.ts
|
|
155
|
+
import { PassThrough as PassThrough3 } from "stream";
|
|
3
156
|
|
|
4
157
|
// src/types.ts
|
|
5
|
-
var
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
return
|
|
10
|
-
})(
|
|
11
|
-
var
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
})(CallServerCommands || {});
|
|
158
|
+
var MicdropClientCommands = /* @__PURE__ */ ((MicdropClientCommands2) => {
|
|
159
|
+
MicdropClientCommands2["StartSpeaking"] = "StartSpeaking";
|
|
160
|
+
MicdropClientCommands2["StopSpeaking"] = "StopSpeaking";
|
|
161
|
+
MicdropClientCommands2["Mute"] = "Mute";
|
|
162
|
+
return MicdropClientCommands2;
|
|
163
|
+
})(MicdropClientCommands || {});
|
|
164
|
+
var MicdropServerCommands = /* @__PURE__ */ ((MicdropServerCommands2) => {
|
|
165
|
+
MicdropServerCommands2["Message"] = "Message";
|
|
166
|
+
MicdropServerCommands2["CancelLastAssistantMessage"] = "CancelLastAssistantMessage";
|
|
167
|
+
MicdropServerCommands2["CancelLastUserMessage"] = "CancelLastUserMessage";
|
|
168
|
+
MicdropServerCommands2["SkipAnswer"] = "SkipAnswer";
|
|
169
|
+
MicdropServerCommands2["EndCall"] = "EndCall";
|
|
170
|
+
return MicdropServerCommands2;
|
|
171
|
+
})(MicdropServerCommands || {});
|
|
20
172
|
|
|
21
|
-
// src/
|
|
22
|
-
var
|
|
173
|
+
// src/MicdropServer.ts
|
|
174
|
+
var MicdropServer = class {
|
|
23
175
|
constructor(socket, config) {
|
|
24
176
|
this.socket = null;
|
|
25
177
|
this.config = null;
|
|
26
178
|
this.startTime = Date.now();
|
|
27
|
-
this.
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
179
|
+
this.onClose = () => {
|
|
180
|
+
if (!this.config) return;
|
|
181
|
+
this.log("Connection closed");
|
|
182
|
+
const duration = Math.round((Date.now() - this.startTime) / 1e3);
|
|
183
|
+
this.config.agent.destroy();
|
|
184
|
+
this.config.stt.destroy();
|
|
185
|
+
this.config.tts.destroy();
|
|
186
|
+
this.config.onEnd?.({
|
|
187
|
+
conversation: this.config.agent.conversation.slice(1),
|
|
188
|
+
// Remove system message
|
|
189
|
+
duration
|
|
190
|
+
});
|
|
191
|
+
this.socket = null;
|
|
192
|
+
this.config = null;
|
|
193
|
+
};
|
|
194
|
+
this.onMessage = async (message) => {
|
|
195
|
+
if (message.byteLength === 0) return;
|
|
196
|
+
if (!Buffer.isBuffer(message)) {
|
|
197
|
+
this.log("Message is not a buffer");
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
if (message.byteLength < 15) {
|
|
201
|
+
const cmd = message.toString();
|
|
202
|
+
this.log(`Command: ${cmd}`);
|
|
203
|
+
if (cmd === "StartSpeaking" /* StartSpeaking */) {
|
|
204
|
+
await this.onStartSpeaking();
|
|
205
|
+
} else if (cmd === "Mute" /* Mute */) {
|
|
206
|
+
await this.onMute();
|
|
207
|
+
} else if (cmd === "StopSpeaking" /* StopSpeaking */) {
|
|
208
|
+
await this.onStopSpeaking();
|
|
209
|
+
}
|
|
210
|
+
} else if (this.currentUserStream) {
|
|
211
|
+
this.log(`Received chunk (${message.byteLength} bytes)`);
|
|
212
|
+
this.currentUserStream.write(message);
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
this.onTranscript = async (transcript) => {
|
|
216
|
+
if (!this.config) return;
|
|
217
|
+
this.log(`User transcript: "${transcript}"`);
|
|
218
|
+
this.config.agent.addUserMessage(transcript);
|
|
219
|
+
if (!this.currentUserStream) {
|
|
220
|
+
this.log("User stopped speaking, answering");
|
|
221
|
+
this.answer();
|
|
222
|
+
}
|
|
223
|
+
};
|
|
34
224
|
this.socket = socket;
|
|
35
225
|
this.config = config;
|
|
36
|
-
this.conversation = [{ role: "system", content: config.systemPrompt }];
|
|
37
226
|
this.log(`Call started`);
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
227
|
+
this.config.stt.on("Transcript", this.onTranscript);
|
|
228
|
+
this.config.agent.on(
|
|
229
|
+
"Message",
|
|
230
|
+
(message) => this.socket?.send(
|
|
231
|
+
`${"Message" /* Message */} ${JSON.stringify(message)}`
|
|
232
|
+
)
|
|
233
|
+
);
|
|
234
|
+
this.config.agent.on(
|
|
235
|
+
"CancelLastUserMessage",
|
|
236
|
+
() => this.socket?.send("CancelLastUserMessage" /* CancelLastUserMessage */)
|
|
237
|
+
);
|
|
238
|
+
this.config.agent.on(
|
|
239
|
+
"CancelLastAssistantMessage",
|
|
240
|
+
() => this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */)
|
|
241
|
+
);
|
|
242
|
+
this.config.agent.on(
|
|
243
|
+
"SkipAnswer",
|
|
244
|
+
() => this.socket?.send("SkipAnswer" /* SkipAnswer */)
|
|
245
|
+
);
|
|
246
|
+
this.config.agent.on(
|
|
247
|
+
"EndCall",
|
|
248
|
+
() => this.socket?.send("EndCall" /* EndCall */)
|
|
249
|
+
);
|
|
250
|
+
this.sendFirstMessage();
|
|
251
|
+
socket.on("close", this.onClose);
|
|
252
|
+
socket.on("message", this.onMessage);
|
|
48
253
|
}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
this.log("Reset conversation");
|
|
52
|
-
this.conversation = conversation;
|
|
254
|
+
log(...message) {
|
|
255
|
+
this.logger?.log(...message);
|
|
53
256
|
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
this.
|
|
57
|
-
this.processing = void 0;
|
|
257
|
+
cancel() {
|
|
258
|
+
this.config?.tts.cancel();
|
|
259
|
+
this.config?.agent.cancel();
|
|
58
260
|
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
this.
|
|
62
|
-
this.
|
|
63
|
-
this.config.onMessage?.(message);
|
|
261
|
+
async onMute() {
|
|
262
|
+
this.currentUserStream?.end();
|
|
263
|
+
this.currentUserStream = void 0;
|
|
264
|
+
this.cancel();
|
|
64
265
|
}
|
|
65
|
-
async
|
|
66
|
-
if (!this.socket) return;
|
|
67
|
-
if (processing.aborted) {
|
|
68
|
-
onAbort?.();
|
|
69
|
-
return;
|
|
70
|
-
}
|
|
71
|
-
if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {
|
|
72
|
-
this.log(`Send audio: (${audio.byteLength} bytes)`);
|
|
73
|
-
this.socket.send(audio);
|
|
74
|
-
} else if ("paused" in audio) {
|
|
75
|
-
if (!this.speakerStreamingEnabled) {
|
|
76
|
-
this.socket.send("EnableSpeakerStreaming" /* EnableSpeakerStreaming */);
|
|
77
|
-
this.speakerStreamingEnabled = true;
|
|
78
|
-
}
|
|
79
|
-
for await (const chunk of audio) {
|
|
80
|
-
if (processing.aborted) {
|
|
81
|
-
onAbort?.();
|
|
82
|
-
return;
|
|
83
|
-
}
|
|
84
|
-
this.log(`Send audio chunk (${chunk.length} bytes)`);
|
|
85
|
-
this.socket.send(chunk);
|
|
86
|
-
}
|
|
87
|
-
} else {
|
|
88
|
-
this.log(`Unknown audio type: ${audio}`);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
onClose() {
|
|
266
|
+
async onStartSpeaking() {
|
|
92
267
|
if (!this.config) return;
|
|
93
|
-
this.
|
|
94
|
-
this.
|
|
95
|
-
|
|
96
|
-
this.
|
|
97
|
-
conversation: this.conversation.slice(1),
|
|
98
|
-
// Remove system message
|
|
99
|
-
duration
|
|
100
|
-
});
|
|
101
|
-
this.socket = null;
|
|
102
|
-
this.config = null;
|
|
268
|
+
this.currentUserStream?.end();
|
|
269
|
+
this.currentUserStream = new PassThrough3();
|
|
270
|
+
this.config.stt.transcribe(this.currentUserStream);
|
|
271
|
+
this.cancel();
|
|
103
272
|
}
|
|
104
|
-
async
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
273
|
+
async onStopSpeaking() {
|
|
274
|
+
this.currentUserStream?.end();
|
|
275
|
+
this.currentUserStream = void 0;
|
|
276
|
+
const conversation = this.config?.agent.conversation;
|
|
277
|
+
if (conversation && conversation[conversation.length - 1].role === "user") {
|
|
278
|
+
this.log(
|
|
279
|
+
"User stopped speaking and a transcript already exists, answering"
|
|
280
|
+
);
|
|
281
|
+
this.answer();
|
|
108
282
|
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
this.
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
this.
|
|
118
|
-
this.abortProcessing();
|
|
119
|
-
} else if (cmd === "StopSpeaking" /* StopSpeaking */) {
|
|
120
|
-
this.isSpeaking = false;
|
|
121
|
-
await this.onStopSpeaking();
|
|
283
|
+
}
|
|
284
|
+
async sendFirstMessage() {
|
|
285
|
+
if (!this.config) return;
|
|
286
|
+
try {
|
|
287
|
+
if (this.config.firstMessage) {
|
|
288
|
+
this.config.agent.addAssistantMessage(this.config.firstMessage);
|
|
289
|
+
await this.speak(this.config.firstMessage);
|
|
290
|
+
} else if (this.config.generateFirstMessage) {
|
|
291
|
+
await this.answer();
|
|
122
292
|
}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
this.
|
|
293
|
+
} catch (error) {
|
|
294
|
+
console.error("[MicdropServer]", error);
|
|
295
|
+
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
126
296
|
}
|
|
127
297
|
}
|
|
128
|
-
async
|
|
298
|
+
async answer() {
|
|
129
299
|
if (!this.config) return;
|
|
130
|
-
|
|
131
|
-
this.abortProcessing();
|
|
132
|
-
const processing = this.processing = { aborted: false };
|
|
133
|
-
const blob = new Blob(this.chunks, { type: "audio/ogg" });
|
|
134
|
-
this.chunks.length = 0;
|
|
300
|
+
this.cancel();
|
|
135
301
|
try {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()));
|
|
139
|
-
this.log(`Saved speech: ${filename}`);
|
|
140
|
-
}
|
|
141
|
-
const transcript = await this.config.speech2Text(
|
|
142
|
-
blob,
|
|
143
|
-
this.conversation[this.conversation.length - 1]?.content
|
|
144
|
-
);
|
|
145
|
-
if (!transcript) {
|
|
146
|
-
this.log("Ignoring empty transcript");
|
|
147
|
-
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
148
|
-
return;
|
|
149
|
-
}
|
|
150
|
-
this.log("User transcript:", transcript);
|
|
151
|
-
this.addMessage({ role: "user", content: transcript });
|
|
152
|
-
if (processing.aborted) {
|
|
153
|
-
this.log("Answer aborted, no answer generated");
|
|
154
|
-
return;
|
|
155
|
-
}
|
|
156
|
-
const answer = await this.config.generateAnswer(this.conversation);
|
|
157
|
-
if (processing.aborted) {
|
|
158
|
-
this.log("Answer aborted, ignoring answer");
|
|
159
|
-
return;
|
|
160
|
-
}
|
|
161
|
-
await this.answer(answer, processing);
|
|
302
|
+
const { stream } = this.config.agent.answer();
|
|
303
|
+
await this.speak(stream);
|
|
162
304
|
} catch (error) {
|
|
163
|
-
console.error("[
|
|
305
|
+
console.error("[MicdropServer]", error);
|
|
164
306
|
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
165
307
|
}
|
|
166
308
|
}
|
|
167
|
-
//
|
|
168
|
-
async
|
|
309
|
+
// Run text-to-speech and send to client
|
|
310
|
+
async speak(message) {
|
|
169
311
|
if (!this.socket || !this.config) return;
|
|
170
|
-
|
|
171
|
-
this.abortProcessing();
|
|
172
|
-
processing = this.processing = { aborted: false };
|
|
173
|
-
}
|
|
312
|
+
let textStream;
|
|
174
313
|
if (typeof message === "string") {
|
|
175
|
-
|
|
314
|
+
const stream = new PassThrough3();
|
|
315
|
+
stream.write(message);
|
|
316
|
+
stream.end();
|
|
317
|
+
textStream = stream;
|
|
318
|
+
} else {
|
|
319
|
+
textStream = message;
|
|
176
320
|
}
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
321
|
+
const audio = this.config.tts.speak(textStream);
|
|
322
|
+
await this.sendAudio(audio);
|
|
323
|
+
}
|
|
324
|
+
async sendAudio(audio) {
|
|
325
|
+
if (!this.socket) return;
|
|
326
|
+
if (!audio.readable) {
|
|
327
|
+
this.log("Non readable audio, skipping", audio);
|
|
184
328
|
return;
|
|
185
329
|
}
|
|
186
|
-
|
|
187
|
-
this.log(
|
|
188
|
-
this.socket?.send(
|
|
189
|
-
|
|
330
|
+
audio.on("data", (chunk) => {
|
|
331
|
+
this.log(`Send audio chunk (${chunk.byteLength} bytes)`);
|
|
332
|
+
this.socket?.send(chunk);
|
|
333
|
+
});
|
|
334
|
+
audio.on("error", (error) => {
|
|
335
|
+
this.log("Error in audio stream", error);
|
|
336
|
+
});
|
|
337
|
+
audio.on("end", () => {
|
|
338
|
+
this.log("Audio stream ended");
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
};
|
|
342
|
+
|
|
343
|
+
// src/stt/STT.ts
|
|
344
|
+
import EventEmitter2 from "eventemitter3";
|
|
345
|
+
var MIME_TYPE_TO_EXTENSION = {
|
|
346
|
+
"audio/wav": "wav",
|
|
347
|
+
"audio/ogg": "ogg",
|
|
348
|
+
"audio/mpeg": "mp3",
|
|
349
|
+
"audio/webm": "webm",
|
|
350
|
+
"audio/mp4": "mp4",
|
|
351
|
+
"audio/flac": "flac"
|
|
352
|
+
};
|
|
353
|
+
var STT = class extends EventEmitter2 {
|
|
354
|
+
// Set stream of audio to transcribe
|
|
355
|
+
transcribe(audioStream) {
|
|
356
|
+
audioStream.once("data", (chunk) => {
|
|
357
|
+
this.mimeType = this.detectMimeType(chunk);
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
log(...message) {
|
|
361
|
+
this.logger?.log(...message);
|
|
362
|
+
}
|
|
363
|
+
destroy() {
|
|
364
|
+
this.log("Destroyed");
|
|
365
|
+
this.removeAllListeners();
|
|
366
|
+
}
|
|
367
|
+
get extension() {
|
|
368
|
+
return this.mimeType && MIME_TYPE_TO_EXTENSION[this.mimeType] || "bin";
|
|
369
|
+
}
|
|
370
|
+
detectMimeType(chunk) {
|
|
371
|
+
if (!chunk || chunk.byteLength === 0) {
|
|
372
|
+
throw new Error("Unable to detect mime type (empty chunk)");
|
|
190
373
|
}
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */);
|
|
201
|
-
}
|
|
202
|
-
};
|
|
203
|
-
if (processing.aborted) {
|
|
204
|
-
onAbort();
|
|
205
|
-
return;
|
|
206
|
-
}
|
|
207
|
-
const audio = await this.config.text2Speech(message.content);
|
|
208
|
-
await this.sendAudio(audio, processing, onAbort);
|
|
209
|
-
} catch (error) {
|
|
210
|
-
console.error("[CallServer]", error);
|
|
211
|
-
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
212
|
-
}
|
|
374
|
+
const arr = new Uint8Array(chunk);
|
|
375
|
+
if (arr[0] === 26 && arr[1] === 69 && arr[2] === 223 && arr[3] === 163) {
|
|
376
|
+
return "audio/webm";
|
|
377
|
+
}
|
|
378
|
+
if (arr[0] === 79 && arr[1] === 103 && arr[2] === 103 && arr[3] === 83) {
|
|
379
|
+
return "audio/ogg";
|
|
380
|
+
}
|
|
381
|
+
if (arr[0] === 82 && arr[1] === 73 && arr[2] === 70 && arr[3] === 70 && arr[8] === 87 && arr[9] === 65 && arr[10] === 86 && arr[11] === 69) {
|
|
382
|
+
return "audio/wav";
|
|
213
383
|
}
|
|
214
|
-
if (
|
|
215
|
-
|
|
216
|
-
this.socket.send("EndCall" /* EndCall */);
|
|
384
|
+
if (arr[0] === 73 && arr[1] === 68 && arr[2] === 51) {
|
|
385
|
+
return "audio/mpeg";
|
|
217
386
|
}
|
|
387
|
+
if (arr[4] === 102 && arr[5] === 116 && arr[6] === 121 && arr[7] === 112) {
|
|
388
|
+
return "audio/mp4";
|
|
389
|
+
}
|
|
390
|
+
if (arr[0] === 102 && arr[1] === 76 && arr[2] === 97 && arr[3] === 67) {
|
|
391
|
+
return "audio/flac";
|
|
392
|
+
}
|
|
393
|
+
this.log("Unable to detect mime type, using default", chunk);
|
|
394
|
+
return "audio/wav";
|
|
218
395
|
}
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
396
|
+
};
|
|
397
|
+
|
|
398
|
+
// src/stt/FileSTT.ts
|
|
399
|
+
var FileSTT = class extends STT {
|
|
400
|
+
transcribe(audioStream) {
|
|
401
|
+
super.transcribe(audioStream);
|
|
402
|
+
this.log("Converting stream to file...");
|
|
403
|
+
const chunks = [];
|
|
404
|
+
audioStream.on("data", (chunk) => {
|
|
405
|
+
chunks.push(chunk);
|
|
406
|
+
});
|
|
407
|
+
audioStream.on("end", async () => {
|
|
408
|
+
if (chunks.length === 0) return;
|
|
409
|
+
const arrayBuffer = Buffer.concat(chunks);
|
|
410
|
+
const file = new File([arrayBuffer], `audio.${this.extension}`, {
|
|
411
|
+
type: this.mimeType
|
|
412
|
+
});
|
|
413
|
+
this.log("Transcribing file...");
|
|
414
|
+
const transcript = await this.transcribeFile(file);
|
|
415
|
+
this.emit("Transcript", transcript);
|
|
416
|
+
});
|
|
225
417
|
}
|
|
226
418
|
};
|
|
227
419
|
|
|
228
|
-
// src/
|
|
229
|
-
var
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
constructor(code, message) {
|
|
237
|
-
super(message);
|
|
238
|
-
this.code = code;
|
|
420
|
+
// src/stt/MockSTT.ts
|
|
421
|
+
var MockSTT = class extends FileSTT {
|
|
422
|
+
constructor() {
|
|
423
|
+
super(...arguments);
|
|
424
|
+
this.i = 0;
|
|
425
|
+
}
|
|
426
|
+
async transcribeFile(file) {
|
|
427
|
+
return `User Message ${this.i++}`;
|
|
239
428
|
}
|
|
240
429
|
};
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
430
|
+
|
|
431
|
+
// src/tts/MockTTS.ts
|
|
432
|
+
import * as fs from "fs";
|
|
433
|
+
import { PassThrough as PassThrough4 } from "stream";
|
|
434
|
+
|
|
435
|
+
// src/tts/TTS.ts
|
|
436
|
+
var TTS = class {
|
|
437
|
+
log(...message) {
|
|
438
|
+
this.logger?.log(...message);
|
|
247
439
|
}
|
|
248
|
-
|
|
249
|
-
|
|
440
|
+
destroy() {
|
|
441
|
+
this.log("Destroyed");
|
|
442
|
+
this.cancel();
|
|
443
|
+
}
|
|
444
|
+
};
|
|
445
|
+
|
|
446
|
+
// src/tts/MockTTS.ts
|
|
447
|
+
var MockTTS = class extends TTS {
|
|
448
|
+
constructor(audioFilePaths) {
|
|
449
|
+
super();
|
|
450
|
+
this.audioFilePaths = audioFilePaths;
|
|
451
|
+
}
|
|
452
|
+
speak(textStream) {
|
|
453
|
+
const audioStream = new PassThrough4();
|
|
454
|
+
textStream.once("data", async () => {
|
|
455
|
+
for (const filePath of this.audioFilePaths) {
|
|
456
|
+
await new Promise((resolve) => setTimeout(resolve, 200));
|
|
457
|
+
const audioBuffer = fs.readFileSync(filePath);
|
|
458
|
+
this.log(`Loaded chunk (${audioBuffer.length} bytes)`);
|
|
459
|
+
audioStream.write(audioBuffer);
|
|
460
|
+
}
|
|
461
|
+
audioStream.end();
|
|
462
|
+
});
|
|
463
|
+
return audioStream;
|
|
464
|
+
}
|
|
465
|
+
cancel() {
|
|
466
|
+
}
|
|
467
|
+
};
|
|
250
468
|
|
|
251
469
|
// src/waitForParams.ts
|
|
252
470
|
async function waitForParams(socket, validate) {
|
|
253
471
|
return new Promise((resolve, reject) => {
|
|
254
472
|
const timeout = setTimeout(() => {
|
|
255
|
-
reject(new
|
|
473
|
+
reject(new MicdropError(4400 /* BadRequest */, "Missing params"));
|
|
256
474
|
}, 3e3);
|
|
257
475
|
const onParams = (payload) => {
|
|
258
476
|
clearTimeout(timeout);
|
|
@@ -261,18 +479,29 @@ async function waitForParams(socket, validate) {
|
|
|
261
479
|
const params = validate(JSON.parse(payload));
|
|
262
480
|
resolve(params);
|
|
263
481
|
} catch (error) {
|
|
264
|
-
reject(new
|
|
482
|
+
reject(new MicdropError(4400 /* BadRequest */, "Invalid params"));
|
|
265
483
|
}
|
|
266
484
|
};
|
|
267
485
|
socket.on("message", onParams);
|
|
268
486
|
});
|
|
269
487
|
}
|
|
270
488
|
export {
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
489
|
+
Agent,
|
|
490
|
+
FileSTT,
|
|
491
|
+
Logger,
|
|
492
|
+
MicdropClientCommands,
|
|
493
|
+
MicdropError,
|
|
494
|
+
MicdropErrorCode,
|
|
495
|
+
MicdropServer,
|
|
496
|
+
MicdropServerCommands,
|
|
497
|
+
MockAgent,
|
|
498
|
+
MockSTT,
|
|
499
|
+
MockTTS,
|
|
500
|
+
STT,
|
|
501
|
+
TTS,
|
|
502
|
+
convertPCMToOpus,
|
|
503
|
+
convertToOpus,
|
|
504
|
+
convertToPCM,
|
|
276
505
|
handleError,
|
|
277
506
|
waitForParams
|
|
278
507
|
};
|