@micdrop/server 1.7.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -232
- package/dist/index.d.mts +150 -53
- package/dist/index.d.ts +150 -53
- package/dist/index.js +460 -218
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +444 -213
- package/dist/index.mjs.map +1 -1
- package/package.json +7 -3
package/dist/index.mjs
CHANGED
|
@@ -1,256 +1,476 @@
|
|
|
1
|
-
// src/
|
|
2
|
-
import
|
|
1
|
+
// src/agent/Agent.ts
|
|
2
|
+
import EventEmitter from "eventemitter3";
|
|
3
|
+
var Agent = class extends EventEmitter {
|
|
4
|
+
constructor(options) {
|
|
5
|
+
super();
|
|
6
|
+
this.options = options;
|
|
7
|
+
this.conversation = [{ role: "system", content: options.systemPrompt }];
|
|
8
|
+
}
|
|
9
|
+
addUserMessage(text) {
|
|
10
|
+
this.addMessage("user", text);
|
|
11
|
+
}
|
|
12
|
+
addAssistantMessage(text) {
|
|
13
|
+
this.addMessage("assistant", text);
|
|
14
|
+
}
|
|
15
|
+
addMessage(role, text) {
|
|
16
|
+
this.log(`Adding ${role} message to conversation: ${text}`);
|
|
17
|
+
const message = {
|
|
18
|
+
role,
|
|
19
|
+
content: text
|
|
20
|
+
};
|
|
21
|
+
this.conversation.push(message);
|
|
22
|
+
this.emit("Message", message);
|
|
23
|
+
}
|
|
24
|
+
endCall() {
|
|
25
|
+
this.log("Ending call");
|
|
26
|
+
this.emit("EndCall");
|
|
27
|
+
}
|
|
28
|
+
cancelLastUserMessage() {
|
|
29
|
+
this.log("Cancelling last user message");
|
|
30
|
+
const lastMessage = this.conversation[this.conversation.length - 1];
|
|
31
|
+
if (lastMessage?.role !== "user") return;
|
|
32
|
+
this.conversation.pop();
|
|
33
|
+
this.emit("CancelLastUserMessage");
|
|
34
|
+
}
|
|
35
|
+
cancelLastAssistantMessage() {
|
|
36
|
+
this.log("Cancelling last assistant message");
|
|
37
|
+
const lastMessage = this.conversation[this.conversation.length - 1];
|
|
38
|
+
if (lastMessage?.role !== "assistant") return;
|
|
39
|
+
this.conversation.pop();
|
|
40
|
+
this.emit("CancelLastAssistantMessage");
|
|
41
|
+
}
|
|
42
|
+
skipAnswer() {
|
|
43
|
+
this.log("Skipping answer");
|
|
44
|
+
this.emit("SkipAnswer");
|
|
45
|
+
}
|
|
46
|
+
createTextPromise() {
|
|
47
|
+
const result = {};
|
|
48
|
+
result.promise = new Promise((resolve, reject) => {
|
|
49
|
+
result.resolve = resolve;
|
|
50
|
+
result.reject = reject;
|
|
51
|
+
});
|
|
52
|
+
return result;
|
|
53
|
+
}
|
|
54
|
+
log(...message) {
|
|
55
|
+
this.logger?.log(...message);
|
|
56
|
+
}
|
|
57
|
+
destroy() {
|
|
58
|
+
this.log("Destroyed");
|
|
59
|
+
this.removeAllListeners();
|
|
60
|
+
this.cancel();
|
|
61
|
+
}
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
// src/agent/MockAgent.ts
|
|
65
|
+
import { PassThrough } from "stream";
|
|
66
|
+
var MockAgent = class extends Agent {
|
|
67
|
+
constructor() {
|
|
68
|
+
super({ systemPrompt: "" });
|
|
69
|
+
this.i = 0;
|
|
70
|
+
}
|
|
71
|
+
answer() {
|
|
72
|
+
const stream = new PassThrough();
|
|
73
|
+
const textPromise = this.createTextPromise();
|
|
74
|
+
const message = `Assistant Message ${this.i++}`;
|
|
75
|
+
this.addAssistantMessage(message);
|
|
76
|
+
stream.write(message);
|
|
77
|
+
stream.end();
|
|
78
|
+
textPromise.resolve(message);
|
|
79
|
+
return { message: textPromise.promise, stream };
|
|
80
|
+
}
|
|
81
|
+
cancel() {
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
// src/audio-convert.ts
|
|
86
|
+
import ffmpegInstaller from "@ffmpeg-installer/ffmpeg";
|
|
87
|
+
import ffmpeg from "fluent-ffmpeg";
|
|
88
|
+
import { PassThrough as PassThrough2 } from "stream";
|
|
89
|
+
ffmpeg.setFfmpegPath(ffmpegInstaller.path);
|
|
90
|
+
function convertToPCM(audioStream, sampleRate = 16e3, bitDepth = 16) {
|
|
91
|
+
const pcmStream = new PassThrough2();
|
|
92
|
+
ffmpeg(audioStream).audioChannels(1).audioFrequency(sampleRate).audioCodec(`pcm_s${bitDepth}le`).format(`s${bitDepth}le`).on("error", (error) => {
|
|
93
|
+
console.error("Error converting audio stream:", error.message);
|
|
94
|
+
}).pipe(pcmStream);
|
|
95
|
+
return pcmStream;
|
|
96
|
+
}
|
|
97
|
+
function convertToOpus(audioStream, sampleRate = 16e3) {
|
|
98
|
+
const webmStream = new PassThrough2();
|
|
99
|
+
ffmpegToOpus(ffmpeg(audioStream), sampleRate).pipe(webmStream);
|
|
100
|
+
return webmStream;
|
|
101
|
+
}
|
|
102
|
+
function convertPCMToOpus(audioStream, sampleRate = 16e3) {
|
|
103
|
+
const webmStream = new PassThrough2();
|
|
104
|
+
ffmpegToOpus(ffmpeg(audioStream), sampleRate).inputFormat("s16le").inputOptions(["-f s16le", "-ar 16000", "-ac 1"]).pipe(webmStream);
|
|
105
|
+
return webmStream;
|
|
106
|
+
}
|
|
107
|
+
function ffmpegToOpus(ffmpegCommand, sampleRate = 16e3) {
|
|
108
|
+
return ffmpegCommand.audioChannels(1).audioFrequency(sampleRate).audioCodec("libopus").format("webm").outputOptions([
|
|
109
|
+
"-application audio",
|
|
110
|
+
`-ac 1`,
|
|
111
|
+
`-ar ${sampleRate}`,
|
|
112
|
+
`-b:a 64k`,
|
|
113
|
+
`-f webm`,
|
|
114
|
+
`-map_metadata -1`
|
|
115
|
+
]).on("error", (error) => {
|
|
116
|
+
console.error("Error converting to Opus: ", error.message);
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// src/errors.ts
|
|
121
|
+
var MicdropErrorCode = /* @__PURE__ */ ((MicdropErrorCode2) => {
|
|
122
|
+
MicdropErrorCode2[MicdropErrorCode2["BadRequest"] = 4400] = "BadRequest";
|
|
123
|
+
MicdropErrorCode2[MicdropErrorCode2["Unauthorized"] = 4401] = "Unauthorized";
|
|
124
|
+
MicdropErrorCode2[MicdropErrorCode2["NotFound"] = 4404] = "NotFound";
|
|
125
|
+
return MicdropErrorCode2;
|
|
126
|
+
})(MicdropErrorCode || {});
|
|
127
|
+
var MicdropError = class extends Error {
|
|
128
|
+
constructor(code, message) {
|
|
129
|
+
super(message);
|
|
130
|
+
this.code = code;
|
|
131
|
+
}
|
|
132
|
+
};
|
|
133
|
+
function handleError(socket, error) {
|
|
134
|
+
if (error instanceof MicdropError) {
|
|
135
|
+
socket.close(error.code, error.message);
|
|
136
|
+
} else {
|
|
137
|
+
console.error(error);
|
|
138
|
+
socket.close(1011);
|
|
139
|
+
}
|
|
140
|
+
socket.terminate();
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// src/Logger.ts
|
|
144
|
+
var Logger = class {
|
|
145
|
+
constructor(name) {
|
|
146
|
+
this.name = name;
|
|
147
|
+
}
|
|
148
|
+
log(...message) {
|
|
149
|
+
const time = process.uptime().toFixed(3);
|
|
150
|
+
console.log(`[${this.name} ${time}]`, ...message);
|
|
151
|
+
}
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
// src/MicdropServer.ts
|
|
155
|
+
import { PassThrough as PassThrough3 } from "stream";
|
|
3
156
|
|
|
4
157
|
// src/types.ts
|
|
5
|
-
var
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
return
|
|
10
|
-
})(
|
|
11
|
-
var
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
})(CallServerCommands || {});
|
|
158
|
+
var MicdropClientCommands = /* @__PURE__ */ ((MicdropClientCommands2) => {
|
|
159
|
+
MicdropClientCommands2["StartSpeaking"] = "StartSpeaking";
|
|
160
|
+
MicdropClientCommands2["StopSpeaking"] = "StopSpeaking";
|
|
161
|
+
MicdropClientCommands2["Mute"] = "Mute";
|
|
162
|
+
return MicdropClientCommands2;
|
|
163
|
+
})(MicdropClientCommands || {});
|
|
164
|
+
var MicdropServerCommands = /* @__PURE__ */ ((MicdropServerCommands2) => {
|
|
165
|
+
MicdropServerCommands2["Message"] = "Message";
|
|
166
|
+
MicdropServerCommands2["CancelLastAssistantMessage"] = "CancelLastAssistantMessage";
|
|
167
|
+
MicdropServerCommands2["CancelLastUserMessage"] = "CancelLastUserMessage";
|
|
168
|
+
MicdropServerCommands2["SkipAnswer"] = "SkipAnswer";
|
|
169
|
+
MicdropServerCommands2["EndCall"] = "EndCall";
|
|
170
|
+
return MicdropServerCommands2;
|
|
171
|
+
})(MicdropServerCommands || {});
|
|
20
172
|
|
|
21
|
-
// src/
|
|
22
|
-
var
|
|
173
|
+
// src/MicdropServer.ts
|
|
174
|
+
var MicdropServer = class {
|
|
23
175
|
constructor(socket, config) {
|
|
24
176
|
this.socket = null;
|
|
25
177
|
this.config = null;
|
|
26
178
|
this.startTime = Date.now();
|
|
27
|
-
this.
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
179
|
+
this.onClose = () => {
|
|
180
|
+
if (!this.config) return;
|
|
181
|
+
this.log("Connection closed");
|
|
182
|
+
const duration = Math.round((Date.now() - this.startTime) / 1e3);
|
|
183
|
+
this.config.agent.destroy();
|
|
184
|
+
this.config.stt.destroy();
|
|
185
|
+
this.config.tts.destroy();
|
|
186
|
+
this.config.onEnd?.({
|
|
187
|
+
conversation: this.config.agent.conversation.slice(1),
|
|
188
|
+
// Remove system message
|
|
189
|
+
duration
|
|
190
|
+
});
|
|
191
|
+
this.socket = null;
|
|
192
|
+
this.config = null;
|
|
193
|
+
};
|
|
194
|
+
this.onMessage = async (message) => {
|
|
195
|
+
if (message.byteLength === 0) return;
|
|
196
|
+
if (!Buffer.isBuffer(message)) {
|
|
197
|
+
this.log("Message is not a buffer");
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
if (message.byteLength < 15) {
|
|
201
|
+
const cmd = message.toString();
|
|
202
|
+
this.log(`Command: ${cmd}`);
|
|
203
|
+
if (cmd === "StartSpeaking" /* StartSpeaking */) {
|
|
204
|
+
await this.onStartSpeaking();
|
|
205
|
+
} else if (cmd === "Mute" /* Mute */) {
|
|
206
|
+
await this.onMute();
|
|
207
|
+
} else if (cmd === "StopSpeaking" /* StopSpeaking */) {
|
|
208
|
+
await this.onStopSpeaking();
|
|
209
|
+
}
|
|
210
|
+
} else if (this.currentUserStream) {
|
|
211
|
+
this.log(`Received chunk (${message.byteLength} bytes)`);
|
|
212
|
+
this.currentUserStream.write(message);
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
this.onTranscript = async (transcript) => {
|
|
216
|
+
if (!this.config) return;
|
|
217
|
+
this.log(`User transcript: "${transcript}"`);
|
|
218
|
+
this.config.agent.addUserMessage(transcript);
|
|
219
|
+
if (!this.currentUserStream) {
|
|
220
|
+
this.log("User stopped speaking, answering");
|
|
221
|
+
this.answer();
|
|
222
|
+
}
|
|
223
|
+
};
|
|
34
224
|
this.socket = socket;
|
|
35
225
|
this.config = config;
|
|
36
|
-
this.conversation = [{ role: "system", content: config.systemPrompt }];
|
|
37
226
|
this.log(`Call started`);
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
227
|
+
this.config.stt.on("Transcript", this.onTranscript);
|
|
228
|
+
this.config.agent.on(
|
|
229
|
+
"Message",
|
|
230
|
+
(message) => this.socket?.send(
|
|
231
|
+
`${"Message" /* Message */} ${JSON.stringify(message)}`
|
|
232
|
+
)
|
|
233
|
+
);
|
|
234
|
+
this.config.agent.on(
|
|
235
|
+
"CancelLastUserMessage",
|
|
236
|
+
() => this.socket?.send("CancelLastUserMessage" /* CancelLastUserMessage */)
|
|
237
|
+
);
|
|
238
|
+
this.config.agent.on(
|
|
239
|
+
"CancelLastAssistantMessage",
|
|
240
|
+
() => this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */)
|
|
241
|
+
);
|
|
242
|
+
this.config.agent.on(
|
|
243
|
+
"SkipAnswer",
|
|
244
|
+
() => this.socket?.send("SkipAnswer" /* SkipAnswer */)
|
|
245
|
+
);
|
|
246
|
+
this.config.agent.on(
|
|
247
|
+
"EndCall",
|
|
248
|
+
() => this.socket?.send("EndCall" /* EndCall */)
|
|
249
|
+
);
|
|
250
|
+
this.sendFirstMessage();
|
|
251
|
+
socket.on("close", this.onClose);
|
|
252
|
+
socket.on("message", this.onMessage);
|
|
48
253
|
}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
this.log("Reset conversation");
|
|
52
|
-
this.conversation = conversation;
|
|
254
|
+
log(...message) {
|
|
255
|
+
this.logger?.log(...message);
|
|
53
256
|
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
this.
|
|
57
|
-
this.processing = void 0;
|
|
257
|
+
cancel() {
|
|
258
|
+
this.config?.tts.cancel();
|
|
259
|
+
this.config?.agent.cancel();
|
|
58
260
|
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
this.
|
|
62
|
-
this.
|
|
63
|
-
this.config.onMessage?.(message);
|
|
261
|
+
async onMute() {
|
|
262
|
+
this.currentUserStream?.end();
|
|
263
|
+
this.currentUserStream = void 0;
|
|
264
|
+
this.cancel();
|
|
64
265
|
}
|
|
65
|
-
async
|
|
66
|
-
if (!this.socket) return;
|
|
67
|
-
if (processing.aborted) {
|
|
68
|
-
onAbort?.();
|
|
69
|
-
return;
|
|
70
|
-
}
|
|
71
|
-
if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {
|
|
72
|
-
this.log(`Send audio: (${audio.byteLength} bytes)`);
|
|
73
|
-
this.socket.send(audio);
|
|
74
|
-
} else if ("paused" in audio) {
|
|
75
|
-
if (!this.speakerStreamingEnabled) {
|
|
76
|
-
this.socket.send("EnableSpeakerStreaming" /* EnableSpeakerStreaming */);
|
|
77
|
-
this.speakerStreamingEnabled = true;
|
|
78
|
-
}
|
|
79
|
-
for await (const chunk of audio) {
|
|
80
|
-
if (processing.aborted) {
|
|
81
|
-
onAbort?.();
|
|
82
|
-
return;
|
|
83
|
-
}
|
|
84
|
-
this.log(`Send audio chunk (${chunk.length} bytes)`);
|
|
85
|
-
this.socket.send(chunk);
|
|
86
|
-
}
|
|
87
|
-
} else {
|
|
88
|
-
this.log(`Unknown audio type: ${audio}`);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
onClose() {
|
|
266
|
+
async onStartSpeaking() {
|
|
92
267
|
if (!this.config) return;
|
|
93
|
-
this.
|
|
94
|
-
this.
|
|
95
|
-
|
|
96
|
-
this.
|
|
97
|
-
conversation: this.conversation.slice(1),
|
|
98
|
-
// Remove system message
|
|
99
|
-
duration
|
|
100
|
-
});
|
|
101
|
-
this.socket = null;
|
|
102
|
-
this.config = null;
|
|
268
|
+
this.currentUserStream?.end();
|
|
269
|
+
this.currentUserStream = new PassThrough3();
|
|
270
|
+
this.config.stt.transcribe(this.currentUserStream);
|
|
271
|
+
this.cancel();
|
|
103
272
|
}
|
|
104
|
-
async
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
273
|
+
async onStopSpeaking() {
|
|
274
|
+
this.currentUserStream?.end();
|
|
275
|
+
this.currentUserStream = void 0;
|
|
276
|
+
const conversation = this.config?.agent.conversation;
|
|
277
|
+
if (conversation && conversation[conversation.length - 1].role === "user") {
|
|
278
|
+
this.log(
|
|
279
|
+
"User stopped speaking and a transcript already exists, answering"
|
|
280
|
+
);
|
|
281
|
+
this.answer();
|
|
108
282
|
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
this.
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
this.isSpeaking = false;
|
|
119
|
-
await this.onStopSpeaking();
|
|
283
|
+
}
|
|
284
|
+
async sendFirstMessage() {
|
|
285
|
+
if (!this.config) return;
|
|
286
|
+
try {
|
|
287
|
+
if (this.config.firstMessage) {
|
|
288
|
+
this.config.agent.addAssistantMessage(this.config.firstMessage);
|
|
289
|
+
await this.speak(this.config.firstMessage);
|
|
290
|
+
} else if (this.config.generateFirstMessage) {
|
|
291
|
+
await this.answer();
|
|
120
292
|
}
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
this.
|
|
293
|
+
} catch (error) {
|
|
294
|
+
console.error("[MicdropServer]", error);
|
|
295
|
+
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
124
296
|
}
|
|
125
297
|
}
|
|
126
|
-
async
|
|
298
|
+
async answer() {
|
|
127
299
|
if (!this.config) return;
|
|
128
|
-
|
|
129
|
-
this.abortProcessing();
|
|
130
|
-
const processing = this.processing = { aborted: false };
|
|
131
|
-
const blob = new Blob(this.chunks, { type: "audio/ogg" });
|
|
132
|
-
this.chunks.length = 0;
|
|
300
|
+
this.cancel();
|
|
133
301
|
try {
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()));
|
|
137
|
-
this.log(`Saved speech: ${filename}`);
|
|
138
|
-
}
|
|
139
|
-
const transcript = await this.config.speech2Text(
|
|
140
|
-
blob,
|
|
141
|
-
this.conversation[this.conversation.length - 1]?.content
|
|
142
|
-
);
|
|
143
|
-
if (!transcript) {
|
|
144
|
-
this.log("Ignoring empty transcript");
|
|
145
|
-
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
146
|
-
return;
|
|
147
|
-
}
|
|
148
|
-
this.log("User transcript:", transcript);
|
|
149
|
-
this.addMessage({ role: "user", content: transcript });
|
|
150
|
-
if (processing.aborted) {
|
|
151
|
-
this.log("Answer aborted, no answer generated");
|
|
152
|
-
return;
|
|
153
|
-
}
|
|
154
|
-
const answer = await this.config.generateAnswer(this.conversation);
|
|
155
|
-
if (processing.aborted) {
|
|
156
|
-
this.log("Answer aborted, ignoring answer");
|
|
157
|
-
return;
|
|
158
|
-
}
|
|
159
|
-
await this.answer(answer, processing);
|
|
302
|
+
const { stream } = this.config.agent.answer();
|
|
303
|
+
await this.speak(stream);
|
|
160
304
|
} catch (error) {
|
|
161
|
-
console.error("[
|
|
305
|
+
console.error("[MicdropServer]", error);
|
|
162
306
|
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
163
307
|
}
|
|
164
308
|
}
|
|
165
|
-
//
|
|
166
|
-
async
|
|
309
|
+
// Run text-to-speech and send to client
|
|
310
|
+
async speak(message) {
|
|
167
311
|
if (!this.socket || !this.config) return;
|
|
168
|
-
|
|
169
|
-
this.abortProcessing();
|
|
170
|
-
processing = this.processing = { aborted: false };
|
|
171
|
-
}
|
|
312
|
+
let textStream;
|
|
172
313
|
if (typeof message === "string") {
|
|
173
|
-
|
|
314
|
+
const stream = new PassThrough3();
|
|
315
|
+
stream.write(message);
|
|
316
|
+
stream.end();
|
|
317
|
+
textStream = stream;
|
|
318
|
+
} else {
|
|
319
|
+
textStream = message;
|
|
174
320
|
}
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
321
|
+
const audio = this.config.tts.speak(textStream);
|
|
322
|
+
await this.sendAudio(audio);
|
|
323
|
+
}
|
|
324
|
+
async sendAudio(audio) {
|
|
325
|
+
if (!this.socket) return;
|
|
326
|
+
if (!audio.readable) {
|
|
327
|
+
this.log("Non readable audio, skipping", audio);
|
|
182
328
|
return;
|
|
183
329
|
}
|
|
184
|
-
|
|
185
|
-
this.log(
|
|
186
|
-
this.socket?.send(
|
|
187
|
-
|
|
330
|
+
audio.on("data", (chunk) => {
|
|
331
|
+
this.log(`Send audio chunk (${chunk.byteLength} bytes)`);
|
|
332
|
+
this.socket?.send(chunk);
|
|
333
|
+
});
|
|
334
|
+
audio.on("error", (error) => {
|
|
335
|
+
this.log("Error in audio stream", error);
|
|
336
|
+
});
|
|
337
|
+
audio.on("end", () => {
|
|
338
|
+
this.log("Audio stream ended");
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
};
|
|
342
|
+
|
|
343
|
+
// src/stt/STT.ts
|
|
344
|
+
import EventEmitter2 from "eventemitter3";
|
|
345
|
+
var MIME_TYPE_TO_EXTENSION = {
|
|
346
|
+
"audio/wav": "wav",
|
|
347
|
+
"audio/ogg": "ogg",
|
|
348
|
+
"audio/mpeg": "mp3",
|
|
349
|
+
"audio/webm": "webm",
|
|
350
|
+
"audio/mp4": "mp4",
|
|
351
|
+
"audio/flac": "flac"
|
|
352
|
+
};
|
|
353
|
+
var STT = class extends EventEmitter2 {
|
|
354
|
+
// Set stream of audio to transcribe
|
|
355
|
+
transcribe(audioStream) {
|
|
356
|
+
audioStream.once("data", (chunk) => {
|
|
357
|
+
this.mimeType = this.detectMimeType(chunk);
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
log(...message) {
|
|
361
|
+
this.logger?.log(...message);
|
|
362
|
+
}
|
|
363
|
+
destroy() {
|
|
364
|
+
this.log("Destroyed");
|
|
365
|
+
this.removeAllListeners();
|
|
366
|
+
}
|
|
367
|
+
get extension() {
|
|
368
|
+
return this.mimeType && MIME_TYPE_TO_EXTENSION[this.mimeType] || "bin";
|
|
369
|
+
}
|
|
370
|
+
detectMimeType(chunk) {
|
|
371
|
+
if (!chunk || chunk.byteLength === 0) {
|
|
372
|
+
throw new Error("Unable to detect mime type (empty chunk)");
|
|
188
373
|
}
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
this.socket?.send("CancelLastAssistantMessage" /* CancelLastAssistantMessage */);
|
|
199
|
-
}
|
|
200
|
-
};
|
|
201
|
-
if (processing.aborted) {
|
|
202
|
-
onAbort();
|
|
203
|
-
return;
|
|
204
|
-
}
|
|
205
|
-
const audio = await this.config.text2Speech(message.content);
|
|
206
|
-
await this.sendAudio(audio, processing, onAbort);
|
|
207
|
-
} catch (error) {
|
|
208
|
-
console.error("[CallServer]", error);
|
|
209
|
-
this.socket?.send("SkipAnswer" /* SkipAnswer */);
|
|
210
|
-
}
|
|
374
|
+
const arr = new Uint8Array(chunk);
|
|
375
|
+
if (arr[0] === 26 && arr[1] === 69 && arr[2] === 223 && arr[3] === 163) {
|
|
376
|
+
return "audio/webm";
|
|
377
|
+
}
|
|
378
|
+
if (arr[0] === 79 && arr[1] === 103 && arr[2] === 103 && arr[3] === 83) {
|
|
379
|
+
return "audio/ogg";
|
|
380
|
+
}
|
|
381
|
+
if (arr[0] === 82 && arr[1] === 73 && arr[2] === 70 && arr[3] === 70 && arr[8] === 87 && arr[9] === 65 && arr[10] === 86 && arr[11] === 69) {
|
|
382
|
+
return "audio/wav";
|
|
211
383
|
}
|
|
212
|
-
if (
|
|
213
|
-
|
|
214
|
-
this.socket.send("EndCall" /* EndCall */);
|
|
384
|
+
if (arr[0] === 73 && arr[1] === 68 && arr[2] === 51) {
|
|
385
|
+
return "audio/mpeg";
|
|
215
386
|
}
|
|
387
|
+
if (arr[4] === 102 && arr[5] === 116 && arr[6] === 121 && arr[7] === 112) {
|
|
388
|
+
return "audio/mp4";
|
|
389
|
+
}
|
|
390
|
+
if (arr[0] === 102 && arr[1] === 76 && arr[2] === 97 && arr[3] === 67) {
|
|
391
|
+
return "audio/flac";
|
|
392
|
+
}
|
|
393
|
+
this.log("Unable to detect mime type, using default", chunk);
|
|
394
|
+
return "audio/wav";
|
|
216
395
|
}
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
396
|
+
};
|
|
397
|
+
|
|
398
|
+
// src/stt/FileSTT.ts
|
|
399
|
+
var FileSTT = class extends STT {
|
|
400
|
+
transcribe(audioStream) {
|
|
401
|
+
super.transcribe(audioStream);
|
|
402
|
+
this.log("Converting stream to file...");
|
|
403
|
+
const chunks = [];
|
|
404
|
+
audioStream.on("data", (chunk) => {
|
|
405
|
+
chunks.push(chunk);
|
|
406
|
+
});
|
|
407
|
+
audioStream.on("end", async () => {
|
|
408
|
+
if (chunks.length === 0) return;
|
|
409
|
+
const arrayBuffer = Buffer.concat(chunks);
|
|
410
|
+
const file = new File([arrayBuffer], `audio.${this.extension}`, {
|
|
411
|
+
type: this.mimeType
|
|
412
|
+
});
|
|
413
|
+
this.log("Transcribing file...");
|
|
414
|
+
const transcript = await this.transcribeFile(file);
|
|
415
|
+
this.emit("Transcript", transcript);
|
|
416
|
+
});
|
|
223
417
|
}
|
|
224
418
|
};
|
|
225
419
|
|
|
226
|
-
// src/
|
|
227
|
-
var
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
constructor(code, message) {
|
|
235
|
-
super(message);
|
|
236
|
-
this.code = code;
|
|
420
|
+
// src/stt/MockSTT.ts
|
|
421
|
+
var MockSTT = class extends FileSTT {
|
|
422
|
+
constructor() {
|
|
423
|
+
super(...arguments);
|
|
424
|
+
this.i = 0;
|
|
425
|
+
}
|
|
426
|
+
async transcribeFile(file) {
|
|
427
|
+
return `User Message ${this.i++}`;
|
|
237
428
|
}
|
|
238
429
|
};
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
430
|
+
|
|
431
|
+
// src/tts/MockTTS.ts
|
|
432
|
+
import * as fs from "fs";
|
|
433
|
+
import { PassThrough as PassThrough4 } from "stream";
|
|
434
|
+
|
|
435
|
+
// src/tts/TTS.ts
|
|
436
|
+
var TTS = class {
|
|
437
|
+
log(...message) {
|
|
438
|
+
this.logger?.log(...message);
|
|
245
439
|
}
|
|
246
|
-
|
|
247
|
-
|
|
440
|
+
destroy() {
|
|
441
|
+
this.log("Destroyed");
|
|
442
|
+
this.cancel();
|
|
443
|
+
}
|
|
444
|
+
};
|
|
445
|
+
|
|
446
|
+
// src/tts/MockTTS.ts
|
|
447
|
+
var MockTTS = class extends TTS {
|
|
448
|
+
constructor(audioFilePaths) {
|
|
449
|
+
super();
|
|
450
|
+
this.audioFilePaths = audioFilePaths;
|
|
451
|
+
}
|
|
452
|
+
speak(textStream) {
|
|
453
|
+
const audioStream = new PassThrough4();
|
|
454
|
+
textStream.once("data", async () => {
|
|
455
|
+
for (const filePath of this.audioFilePaths) {
|
|
456
|
+
await new Promise((resolve) => setTimeout(resolve, 200));
|
|
457
|
+
const audioBuffer = fs.readFileSync(filePath);
|
|
458
|
+
this.log(`Loaded chunk (${audioBuffer.length} bytes)`);
|
|
459
|
+
audioStream.write(audioBuffer);
|
|
460
|
+
}
|
|
461
|
+
audioStream.end();
|
|
462
|
+
});
|
|
463
|
+
return audioStream;
|
|
464
|
+
}
|
|
465
|
+
cancel() {
|
|
466
|
+
}
|
|
467
|
+
};
|
|
248
468
|
|
|
249
469
|
// src/waitForParams.ts
|
|
250
470
|
async function waitForParams(socket, validate) {
|
|
251
471
|
return new Promise((resolve, reject) => {
|
|
252
472
|
const timeout = setTimeout(() => {
|
|
253
|
-
reject(new
|
|
473
|
+
reject(new MicdropError(4400 /* BadRequest */, "Missing params"));
|
|
254
474
|
}, 3e3);
|
|
255
475
|
const onParams = (payload) => {
|
|
256
476
|
clearTimeout(timeout);
|
|
@@ -259,18 +479,29 @@ async function waitForParams(socket, validate) {
|
|
|
259
479
|
const params = validate(JSON.parse(payload));
|
|
260
480
|
resolve(params);
|
|
261
481
|
} catch (error) {
|
|
262
|
-
reject(new
|
|
482
|
+
reject(new MicdropError(4400 /* BadRequest */, "Invalid params"));
|
|
263
483
|
}
|
|
264
484
|
};
|
|
265
485
|
socket.on("message", onParams);
|
|
266
486
|
});
|
|
267
487
|
}
|
|
268
488
|
export {
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
489
|
+
Agent,
|
|
490
|
+
FileSTT,
|
|
491
|
+
Logger,
|
|
492
|
+
MicdropClientCommands,
|
|
493
|
+
MicdropError,
|
|
494
|
+
MicdropErrorCode,
|
|
495
|
+
MicdropServer,
|
|
496
|
+
MicdropServerCommands,
|
|
497
|
+
MockAgent,
|
|
498
|
+
MockSTT,
|
|
499
|
+
MockTTS,
|
|
500
|
+
STT,
|
|
501
|
+
TTS,
|
|
502
|
+
convertPCMToOpus,
|
|
503
|
+
convertToOpus,
|
|
504
|
+
convertToPCM,
|
|
274
505
|
handleError,
|
|
275
506
|
waitForParams
|
|
276
507
|
};
|