@micdrop/server 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,245 @@
1
+ # @micdrop/server
2
+
3
+ A Node.js library for handling real-time voice conversations with WebSocket-based audio streaming.
4
+
5
+ For browser implementation, see [@micdrop/client](../client/README.md) package.
6
+
7
+ ## Features
8
+
9
+ - 🌐 WebSocket server for real-time audio streaming
10
+ - 🔊 Audio data handling and processing
11
+ - 💬 Conversation state management
12
+ - ⚡ Event-based architecture
13
+ - 🔄 Bi-directional communication
14
+ - 🛡️ Built-in error handling
15
+ - 🎙️ Speech-to-text and text-to-speech integration
16
+ - 🤖 AI conversation generation support
17
+ - 💾 Debug mode with optional audio saving
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ npm install @micdrop/server
23
+ # or
24
+ yarn add @micdrop/server
25
+ # or
26
+ pnpm add @micdrop/server
27
+ ```
28
+
29
+ ## Quick Start
30
+
31
+ ```typescript
32
+ import { WebSocketServer } from 'ws'
33
+ import { CallSocket, CallConfig } from '@micdrop/server'
34
+
35
+ // Create WebSocket server
36
+ const wss = new WebSocketServer({ port: 8080 })
37
+
38
+ // Define call configuration
39
+ const config: CallConfig = {
40
+ // Initial system prompt for the conversation
41
+ systemPrompt: 'You are a helpful assistant',
42
+
43
+ // Optional first message from assistant
44
+ firstMessage: 'Hello!',
45
+
46
+ // Function to generate assistant responses
47
+ async generateAnswer(conversation) {
48
+ // Implement your LLM or response generation logic
49
+ return 'Assistant response'
50
+ },
51
+
52
+ // Function to convert speech to text
53
+ async speech2Text(audioBlob, lastMessagePrompt) {
54
+ // Implement your STT logic
55
+ return 'Transcribed text'
56
+ },
57
+
58
+ // Function to convert text to speech
59
+ async text2Speech(text) {
60
+ // Implement your TTS logic
61
+ return new ArrayBuffer(0) // Audio data
62
+ },
63
+
64
+ // Optional callback when a message is added
65
+ onMessage(message) {
66
+ console.log('New message:', message)
67
+ },
68
+
69
+ // Optional callback when call ends
70
+ onEnd(summary) {
71
+ console.log('Call ended:', summary)
72
+ },
73
+ }
74
+
75
+ // Handle new connections
76
+ wss.on('connection', (ws) => {
77
+ // Create call handler with configuration
78
+ new CallSocket(ws, config)
79
+ })
80
+ ```
81
+
82
+ ## Documentation
83
+
84
+ The server package provides several core components:
85
+
86
+ - **CallSocket** - Main class that handles WebSocket connections, audio streaming, and conversation flow
87
+ - **CallConfig** - Configuration interface for customizing speech processing and conversation behavior
88
+ - **Types** - Common TypeScript types and interfaces for messages and commands
89
+ - **Error Handling** - Standardized error handling with specific error codes
90
+
91
+ ## API Reference
92
+
93
+ ### CallSocket
94
+
95
+ The main class for managing WebSocket connections and audio streaming.
96
+
97
+ ```typescript
98
+ class CallSocket {
99
+ constructor(socket: WebSocket, config: CallConfig)
100
+ }
101
+ ```
102
+
103
+ ### CallConfig
104
+
105
+ Configuration interface for customizing the call behavior.
106
+
107
+ ```typescript
108
+ interface CallConfig {
109
+ // Initial system prompt for the conversation
110
+ systemPrompt: string
111
+
112
+ // Optional first message from assistant
113
+ firstMessage?: string
114
+
115
+ // Generate assistant's response
116
+ generateAnswer(conversation: Conversation): Promise<string>
117
+
118
+ // Convert audio to text
119
+ speech2Text(blob: Blob, prompt?: string): Promise<string>
120
+
121
+ // Convert text to audio
122
+ text2Speech(text: string): Promise<ArrayBuffer>
123
+
124
+ // Optional callbacks
125
+ onMessage?(message: ConversationMessage): void
126
+ onEnd?(summary: CallSummary): void
127
+ }
128
+ ```
129
+
130
+ ### Message Types
131
+
132
+ ```typescript
133
+ interface ConversationMessage {
134
+ role: 'system' | 'user' | 'assistant'
135
+ content: string
136
+ }
137
+
138
+ type Conversation = ConversationMessage[]
139
+
140
+ interface CallSummary {
141
+ conversation: Conversation
142
+ duration: number
143
+ }
144
+ ```
145
+
146
+ ## WebSocket Protocol
147
+
148
+ The server implements a specific protocol for client-server communication:
149
+
150
+ ### Client Commands
151
+
152
+ ```typescript
153
+ enum CallClientCommands {
154
+ StartSpeaking = 'startSpeaking',
155
+ StopSpeaking = 'stopSpeaking',
156
+ Mute = 'mute',
157
+ }
158
+ ```
159
+
160
+ ### Server Commands
161
+
162
+ ```typescript
163
+ enum CallServerCommands {
164
+ UserMessage = 'userMessage',
165
+ AssistantMessage = 'assistantMessage',
166
+ CancelLastAssistantMessage = 'cancelLastAssistantMessage',
167
+ EndInterview = 'endInterview',
168
+ }
169
+ ```
170
+
171
+ ### Message Flow
172
+
173
+ 1. Client connects to WebSocket server
174
+ 2. Server sends initial assistant message (if configured)
175
+ 3. Client sends audio chunks when user speaks
176
+ 4. Server processes audio and responds with text/audio
177
+ 5. Process continues until interview ends
178
+
179
+ ## Error Handling
180
+
181
+ The server implements standardized error handling with specific codes:
182
+
183
+ ```typescript
184
+ enum CallErrorCode {
185
+ BadRequest = 4400,
186
+ Unauthorized = 4401,
187
+ NotFound = 4404,
188
+ }
189
+ ```
190
+
191
+ Common error scenarios:
192
+
193
+ - Invalid WebSocket messages
194
+ - Authentication failures
195
+ - Missing or invalid parameters
196
+ - Audio processing errors
197
+ - Connection timeouts
198
+
199
+ ## Integration Example
200
+
201
+ Here's an example using Fastify:
202
+
203
+ ```typescript
204
+ import fastify from 'fastify'
205
+ import fastifyWebsocket from '@fastify/websocket'
206
+ import { CallSocket, CallConfig } from '@micdrop/server'
207
+
208
+ const server = fastify()
209
+ server.register(fastifyWebsocket)
210
+
211
+ server.get('/call', { websocket: true }, (socket) => {
212
+ const config: CallConfig = {
213
+ systemPrompt: 'You are a helpful assistant',
214
+ // ... other config options
215
+ }
216
+ new CallSocket(socket, config)
217
+ })
218
+
219
+ server.listen({ port: 8080 })
220
+ ```
221
+
222
+ ## Debug Mode
223
+
224
+ The server includes a debug mode that can:
225
+
226
+ - Log detailed timing information
227
+ - Save audio files for debugging (optional)
228
+ - Track conversation state
229
+ - Monitor WebSocket events
230
+
231
+ ## Browser Support
232
+
233
+ The server is designed to work with any WebSocket client, but is specifically tested with:
234
+
235
+ - Modern browsers supporting WebSocket API
236
+ - Node.js clients
237
+ - @micdrop/client package
238
+
239
+ ## License
240
+
241
+ MIT
242
+
243
+ ## Author
244
+
245
+ [Lonestone](https://www.lonestone.io) ([GitHub](https://github.com/lonestone))
@@ -0,0 +1,65 @@
1
+ import WebSocket$1, { WebSocket } from 'ws';
2
+
3
+ declare enum CallClientCommands {
4
+ StartSpeaking = "startSpeaking",
5
+ StopSpeaking = "stopSpeaking",
6
+ Mute = "mute"
7
+ }
8
+ declare enum CallServerCommands {
9
+ UserMessage = "userMessage",
10
+ AssistantMessage = "assistantMessage",
11
+ CancelLastAssistantMessage = "cancelLastAssistantMessage",
12
+ EndInterview = "endInterview"
13
+ }
14
+ interface CallConfig {
15
+ systemPrompt: string;
16
+ firstMessage?: string;
17
+ generateAnswer(conversation: Conversation): Promise<string>;
18
+ speech2Text(blob: Blob, prompt?: string): Promise<string>;
19
+ text2Speech(text: string): Promise<ArrayBuffer>;
20
+ onMessage?(message: ConversationMessage): void;
21
+ onEnd?(call: CallSummary): void;
22
+ }
23
+ interface CallSummary {
24
+ conversation: Conversation;
25
+ duration: number;
26
+ }
27
+ type Conversation = ConversationMessage[];
28
+ interface ConversationMessage {
29
+ role: 'system' | 'user' | 'assistant';
30
+ content: string;
31
+ }
32
+
33
+ declare const END_INTERVIEW = "END_INTERVIEW";
34
+ declare class CallSocket {
35
+ socket: WebSocket;
36
+ config: CallConfig;
37
+ private startTime;
38
+ private lastDebug;
39
+ private abortAnswer;
40
+ private isSpeaking;
41
+ private chunks;
42
+ private conversation;
43
+ constructor(socket: WebSocket, config: CallConfig);
44
+ private addMessage;
45
+ private onClose;
46
+ private onMessage;
47
+ private onStopSpeaking;
48
+ private answer;
49
+ private debug;
50
+ }
51
+
52
+ declare enum CallErrorCode {
53
+ BadRequest = 4400,
54
+ Unauthorized = 4401,
55
+ NotFound = 4404
56
+ }
57
+ declare class CallError extends Error {
58
+ code: number;
59
+ constructor(code: number, message: string);
60
+ }
61
+ declare function handleError(socket: WebSocket$1, error: unknown): void;
62
+
63
+ declare function waitForParams<CallParams>(socket: WebSocket, validate: (params: any) => CallParams): Promise<CallParams>;
64
+
65
+ export { CallClientCommands, type CallConfig, CallError, CallErrorCode, CallServerCommands, CallSocket, type CallSummary, type Conversation, type ConversationMessage, END_INTERVIEW, handleError, waitForParams };
@@ -0,0 +1,65 @@
1
+ import WebSocket$1, { WebSocket } from 'ws';
2
+
3
+ declare enum CallClientCommands {
4
+ StartSpeaking = "startSpeaking",
5
+ StopSpeaking = "stopSpeaking",
6
+ Mute = "mute"
7
+ }
8
+ declare enum CallServerCommands {
9
+ UserMessage = "userMessage",
10
+ AssistantMessage = "assistantMessage",
11
+ CancelLastAssistantMessage = "cancelLastAssistantMessage",
12
+ EndInterview = "endInterview"
13
+ }
14
+ interface CallConfig {
15
+ systemPrompt: string;
16
+ firstMessage?: string;
17
+ generateAnswer(conversation: Conversation): Promise<string>;
18
+ speech2Text(blob: Blob, prompt?: string): Promise<string>;
19
+ text2Speech(text: string): Promise<ArrayBuffer>;
20
+ onMessage?(message: ConversationMessage): void;
21
+ onEnd?(call: CallSummary): void;
22
+ }
23
+ interface CallSummary {
24
+ conversation: Conversation;
25
+ duration: number;
26
+ }
27
+ type Conversation = ConversationMessage[];
28
+ interface ConversationMessage {
29
+ role: 'system' | 'user' | 'assistant';
30
+ content: string;
31
+ }
32
+
33
+ declare const END_INTERVIEW = "END_INTERVIEW";
34
+ declare class CallSocket {
35
+ socket: WebSocket;
36
+ config: CallConfig;
37
+ private startTime;
38
+ private lastDebug;
39
+ private abortAnswer;
40
+ private isSpeaking;
41
+ private chunks;
42
+ private conversation;
43
+ constructor(socket: WebSocket, config: CallConfig);
44
+ private addMessage;
45
+ private onClose;
46
+ private onMessage;
47
+ private onStopSpeaking;
48
+ private answer;
49
+ private debug;
50
+ }
51
+
52
+ declare enum CallErrorCode {
53
+ BadRequest = 4400,
54
+ Unauthorized = 4401,
55
+ NotFound = 4404
56
+ }
57
+ declare class CallError extends Error {
58
+ code: number;
59
+ constructor(code: number, message: string);
60
+ }
61
+ declare function handleError(socket: WebSocket$1, error: unknown): void;
62
+
63
+ declare function waitForParams<CallParams>(socket: WebSocket, validate: (params: any) => CallParams): Promise<CallParams>;
64
+
65
+ export { CallClientCommands, type CallConfig, CallError, CallErrorCode, CallServerCommands, CallSocket, type CallSummary, type Conversation, type ConversationMessage, END_INTERVIEW, handleError, waitForParams };
package/dist/index.js ADDED
@@ -0,0 +1,263 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // index.ts
31
+ var index_exports = {};
32
+ __export(index_exports, {
33
+ CallClientCommands: () => CallClientCommands,
34
+ CallError: () => CallError,
35
+ CallErrorCode: () => CallErrorCode,
36
+ CallServerCommands: () => CallServerCommands,
37
+ CallSocket: () => CallSocket,
38
+ END_INTERVIEW: () => END_INTERVIEW,
39
+ handleError: () => handleError,
40
+ waitForParams: () => waitForParams
41
+ });
42
+ module.exports = __toCommonJS(index_exports);
43
+
44
+ // src/CallSocket.ts
45
+ var fs = __toESM(require("fs"));
46
+ var path = __toESM(require("path"));
47
+
48
+ // src/types.ts
49
+ var CallClientCommands = /* @__PURE__ */ ((CallClientCommands2) => {
50
+ CallClientCommands2["StartSpeaking"] = "startSpeaking";
51
+ CallClientCommands2["StopSpeaking"] = "stopSpeaking";
52
+ CallClientCommands2["Mute"] = "mute";
53
+ return CallClientCommands2;
54
+ })(CallClientCommands || {});
55
+ var CallServerCommands = /* @__PURE__ */ ((CallServerCommands2) => {
56
+ CallServerCommands2["UserMessage"] = "userMessage";
57
+ CallServerCommands2["AssistantMessage"] = "assistantMessage";
58
+ CallServerCommands2["CancelLastAssistantMessage"] = "cancelLastAssistantMessage";
59
+ CallServerCommands2["EndInterview"] = "endInterview";
60
+ return CallServerCommands2;
61
+ })(CallServerCommands || {});
62
+
63
+ // src/CallSocket.ts
64
+ var debugEnabled = false;
65
+ var debugSaveFile = false;
66
+ var disableTTS = false;
67
+ var END_INTERVIEW = "END_INTERVIEW";
68
+ var CallSocket = class {
69
+ constructor(socket, config) {
70
+ this.socket = socket;
71
+ this.config = config;
72
+ this.startTime = Date.now();
73
+ this.lastDebug = Date.now();
74
+ // An answer can be aborted if user is speaking
75
+ this.abortAnswer = false;
76
+ // When user is speaking, we're waiting to chunks or to stop
77
+ this.isSpeaking = false;
78
+ // Chunks of user speech since user started speaking
79
+ this.chunks = [];
80
+ this.conversation = [{ role: "system", content: config.systemPrompt }];
81
+ this.debug(`Call started`);
82
+ if (config.firstMessage) {
83
+ this.answer(config.firstMessage);
84
+ } else {
85
+ this.config.generateAnswer(this.conversation).then((answer) => this.answer(answer)).catch((error) => {
86
+ console.error("[WS]", error);
87
+ this.socket.close();
88
+ });
89
+ }
90
+ this.socket.on("close", this.onClose.bind(this));
91
+ this.socket.on("message", this.onMessage.bind(this));
92
+ }
93
+ addMessage(message) {
94
+ this.conversation.push(message);
95
+ this.socket.send(
96
+ `${message.role === "user" ? "userMessage" /* UserMessage */ : "assistantMessage" /* AssistantMessage */} ${message.content}`
97
+ );
98
+ this.config.onMessage?.(message);
99
+ }
100
+ onClose() {
101
+ this.debug("Connection closed");
102
+ this.abortAnswer = true;
103
+ const duration = Math.round((Date.now() - this.startTime) / 1e3);
104
+ this.config.onEnd?.({
105
+ conversation: this.conversation.slice(1),
106
+ // Remove system message
107
+ duration
108
+ });
109
+ this.socket = null;
110
+ this.config = null;
111
+ }
112
+ async onMessage(message) {
113
+ if (!Buffer.isBuffer(message)) {
114
+ console.warn(`[WS] Message is not a buffer`);
115
+ return;
116
+ }
117
+ if (message.byteLength < 15) {
118
+ const cmd = message.toString();
119
+ this.debug(`Command: ${cmd}`);
120
+ if (cmd === "startSpeaking" /* StartSpeaking */) {
121
+ this.isSpeaking = true;
122
+ this.abortAnswer = true;
123
+ } else if (cmd === "mute" /* Mute */) {
124
+ this.abortAnswer = true;
125
+ } else if (cmd === "stopSpeaking" /* StopSpeaking */) {
126
+ this.isSpeaking = false;
127
+ await this.onStopSpeaking();
128
+ }
129
+ } else if (Buffer.isBuffer(message) && this.isSpeaking) {
130
+ this.debug(`Received chunk (${message.byteLength} bytes)`);
131
+ this.chunks.push(message);
132
+ }
133
+ }
134
+ async onStopSpeaking() {
135
+ if (this.chunks.length === 0) return;
136
+ this.abortAnswer = false;
137
+ const blob = new Blob(this.chunks, { type: "audio/ogg" });
138
+ this.chunks.length = 0;
139
+ try {
140
+ if (debugSaveFile) {
141
+ const filePath = path.join(__dirname, "speech.ogg");
142
+ fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()));
143
+ return;
144
+ }
145
+ const transcript = await this.config.speech2Text(
146
+ blob,
147
+ this.conversation[this.conversation.length - 1]?.content
148
+ );
149
+ if (!transcript) {
150
+ this.debug("Ignoring empty transcript");
151
+ return;
152
+ }
153
+ this.debug("User transcript:", transcript);
154
+ this.addMessage({ role: "user", content: transcript });
155
+ if (this.abortAnswer) {
156
+ this.debug("Answer aborted, no answer generated");
157
+ return;
158
+ }
159
+ const answer = await this.config.generateAnswer(this.conversation);
160
+ if (this.abortAnswer) {
161
+ this.debug("Answer aborted, ignoring answer");
162
+ return;
163
+ }
164
+ await this.answer(answer);
165
+ } catch (error) {
166
+ console.error("[WS]", error);
167
+ this.socket.close();
168
+ }
169
+ }
170
+ async answer(message) {
171
+ let isEnd = false;
172
+ if (message.includes(END_INTERVIEW)) {
173
+ message = message.replace(END_INTERVIEW, "").trim();
174
+ isEnd = true;
175
+ }
176
+ if (message.length) {
177
+ this.debug("Assistant message:", message);
178
+ this.addMessage({ role: "assistant", content: message });
179
+ if (!disableTTS) {
180
+ const audio = await this.config.text2Speech(message);
181
+ if (this.abortAnswer) {
182
+ this.debug("Answer aborted, removing last assistant message");
183
+ const lastMessage = this.conversation[this.conversation.length - 1];
184
+ if (lastMessage?.role === "assistant") {
185
+ this.conversation.pop();
186
+ this.socket.send("cancelLastAssistantMessage" /* CancelLastAssistantMessage */);
187
+ }
188
+ return;
189
+ }
190
+ this.debug(`Send audio: (${audio.byteLength} bytes)`);
191
+ this.socket.send(audio);
192
+ }
193
+ }
194
+ if (isEnd) {
195
+ this.debug("Interview ended");
196
+ this.socket.send("endInterview" /* EndInterview */);
197
+ }
198
+ }
199
+ debug(...message) {
200
+ if (!debugEnabled) return;
201
+ const nowTime = Date.now();
202
+ console.log(
203
+ `[WS] [${nowTime - this.startTime} | ${nowTime - this.lastDebug}ms]`,
204
+ ...message
205
+ );
206
+ this.lastDebug = nowTime;
207
+ }
208
+ };
209
+
210
+ // src/errors.ts
211
+ var CallErrorCode = /* @__PURE__ */ ((CallErrorCode2) => {
212
+ CallErrorCode2[CallErrorCode2["BadRequest"] = 4400] = "BadRequest";
213
+ CallErrorCode2[CallErrorCode2["Unauthorized"] = 4401] = "Unauthorized";
214
+ CallErrorCode2[CallErrorCode2["NotFound"] = 4404] = "NotFound";
215
+ return CallErrorCode2;
216
+ })(CallErrorCode || {});
217
+ var CallError = class extends Error {
218
+ constructor(code, message) {
219
+ super(message);
220
+ this.code = code;
221
+ }
222
+ };
223
+ function handleError(socket, error) {
224
+ if (error instanceof CallError) {
225
+ socket.close(error.code, error.message);
226
+ } else {
227
+ console.error(error);
228
+ socket.close(1011);
229
+ }
230
+ socket.terminate();
231
+ }
232
+
233
+ // src/waitForParams.ts
234
+ async function waitForParams(socket, validate) {
235
+ return new Promise((resolve, reject) => {
236
+ const timeout = setTimeout(() => {
237
+ reject(new CallError(4400 /* BadRequest */, "Missing params"));
238
+ }, 3e3);
239
+ const onParams = (payload) => {
240
+ clearTimeout(timeout);
241
+ socket.off("message", onParams);
242
+ try {
243
+ const params = validate(JSON.parse(payload));
244
+ resolve(params);
245
+ } catch (error) {
246
+ reject(new CallError(4400 /* BadRequest */, "Invalid params"));
247
+ }
248
+ };
249
+ socket.on("message", onParams);
250
+ });
251
+ }
252
+ // Annotate the CommonJS export names for ESM import in node:
253
+ 0 && (module.exports = {
254
+ CallClientCommands,
255
+ CallError,
256
+ CallErrorCode,
257
+ CallServerCommands,
258
+ CallSocket,
259
+ END_INTERVIEW,
260
+ handleError,
261
+ waitForParams
262
+ });
263
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../index.ts","../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["export * from './src/CallSocket'\nexport * from './src/errors'\nexport * from './src/types'\nexport * from './src/waitForParams'\n","import * as fs from 'fs'\nimport * as path from 'path'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nconst debugEnabled = false\nconst debugSaveFile = false\nconst disableTTS = false\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(\n public socket: WebSocket,\n public config: CallConfig\n ) {\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.debug(`Call started`)\n\n // Assistant speaks first\n\n // LLM: Generate answer\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[WS]', error)\n this.socket.close()\n })\n }\n\n // Listen to events\n this.socket.on('close', this.onClose.bind(this))\n this.socket.on('message', this.onMessage.bind(this))\n }\n\n private addMessage(message: ConversationMessage) {\n this.conversation.push(message)\n this.socket.send(\n `${\n message.role === 'user'\n ? CallServerCommands.UserMessage\n : CallServerCommands.AssistantMessage\n } ${message.content}`\n )\n this.config.onMessage?.(message)\n }\n\n private onClose() {\n this.debug('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n // @ts-ignore\n this.socket = null\n // @ts-ignore\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[WS] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.debug(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.debug(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (debugSaveFile) {\n const filePath = path.join(__dirname, 'speech.ogg')\n fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()))\n return\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.debug('Ignoring empty transcript')\n return\n }\n\n this.debug('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.debug('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.debug('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[WS]', error)\n this.socket.close()\n }\n }\n\n private async answer(message: string) {\n let isEnd = false\n\n // Detect end of interview\n if (message.includes(END_INTERVIEW)) {\n message = message.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (message.length) {\n // Send answer to client\n this.debug('Assistant message:', message)\n this.addMessage({ role: 'assistant', content: message })\n\n // TTS: Generate answer audio\n if (!disableTTS) {\n const audio = await this.config.text2Speech(message)\n if (this.abortAnswer) {\n // Remove last assistant message if aborted\n this.debug('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket.send(CallServerCommands.CancelLastAssistantMessage)\n }\n return\n }\n\n // Send audio to client\n this.debug(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n }\n }\n\n // End of call\n if (isEnd) {\n this.debug('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private debug(...message: any[]) {\n if (!debugEnabled) return\n const nowTime = Date.now()\n console.log(\n `[WS] [${nowTime - this.startTime} | ${nowTime - this.lastDebug}ms]`,\n ...message\n )\n this.lastDebug = nowTime\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n UserMessage = 'userMessage',\n AssistantMessage = 'assistantMessage',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n generateAnswer(conversation: Conversation): Promise<string>\n speech2Text(blob: Blob, prompt?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage {\n role: 'system' | 'user' | 'assistant'\n content: string\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAoB;AACpB,WAAsB;;;ACDf,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,iBAAc;AACd,EAAAA,oBAAA,sBAAmB;AACnB,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADKZ,IAAM,eAAe;AACrB,IAAM,gBAAgB;AACtB,IAAM,aAAa;AAEZ,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAgBtB,YACS,QACA,QACP;AAFO;AACA;AAjBT,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAS1B,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,MAAM,cAAc;AAKzB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,QAAQ,KAAK;AAC3B,aAAK,OAAO,MAAM;AAAA,MACpB,CAAC;AAAA,IACL;AAGA,SAAK,OAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC/C,SAAK,OAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EACrD;AAAA,EAEQ,WAAW,SAA8B;AAC/C,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO;AAAA,MACV,GACE,QAAQ,SAAS,oFAGnB,IAAI,QAAQ,OAAO;AAAA,IACrB;AACA,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEQ,UAAU;AAChB,SAAK,MAAM,mBAAmB;AAC9B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAID,SAAK,SAAS;AAEd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,8BAA8B;AAC3C;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,MAAM,YAAY,GAAG,EAAE;AAE5B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,MAAM,mBAAmB,QAAQ,UAAU,SAAS;AACzD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAE7B,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,eAAe;AACjB,cAAM,WAAgB,UAAK,WAAW,YAAY;AAClD,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE;AAAA,MACF;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,MAAM,2BAA2B;AACtC;AAAA,MACF;AAEA,WAAK,MAAM,oBAAoB,UAAU;AAGzC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,MAAM,qCAAqC;AAChD;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,MAAM,iCAAiC;AAC5C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,QAAQ,KAAK;AAC3B,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAEA,MAAc,OAAO,SAAiB;AACpC,QAAI,QAAQ;AAGZ,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,MAAM,sBAAsB,OAAO;AACxC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,QAAQ,CAAC;AAGvD,UAAI,CAAC,YAAY;AACf,cAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AACnD,YAAI,KAAK,aAAa;AAEpB,eAAK,MAAM,iDAAiD;AAC5D,gBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,cAAI,aAAa,SAAS,aAAa;AACrC,iBAAK,aAAa,IAAI;AACtB,iBAAK,OAAO,kEAAkD;AAAA,UAChE;AACA;AAAA,QACF;AAGA,aAAK,MAAM,gBAAgB,MAAM,UAAU,SAAS;AACpD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,MAAM,iBAAiB;AAC5B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,SAAS,SAAgB;AAC/B,QAAI,CAAC,aAAc;AACnB,UAAM,UAAU,KAAK,IAAI;AACzB,YAAQ;AAAA,MACN,SAAS,UAAU,KAAK,SAAS,MAAM,UAAU,KAAK,SAAS;AAAA,MAC/D,GAAG;AAAA,IACL;AACA,SAAK,YAAY;AAAA,EACnB;AACF;;;AElOO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
package/dist/index.mjs ADDED
@@ -0,0 +1,219 @@
1
+ // src/CallSocket.ts
2
+ import * as fs from "fs";
3
+ import * as path from "path";
4
+
5
+ // src/types.ts
6
+ var CallClientCommands = /* @__PURE__ */ ((CallClientCommands2) => {
7
+ CallClientCommands2["StartSpeaking"] = "startSpeaking";
8
+ CallClientCommands2["StopSpeaking"] = "stopSpeaking";
9
+ CallClientCommands2["Mute"] = "mute";
10
+ return CallClientCommands2;
11
+ })(CallClientCommands || {});
12
+ var CallServerCommands = /* @__PURE__ */ ((CallServerCommands2) => {
13
+ CallServerCommands2["UserMessage"] = "userMessage";
14
+ CallServerCommands2["AssistantMessage"] = "assistantMessage";
15
+ CallServerCommands2["CancelLastAssistantMessage"] = "cancelLastAssistantMessage";
16
+ CallServerCommands2["EndInterview"] = "endInterview";
17
+ return CallServerCommands2;
18
+ })(CallServerCommands || {});
19
+
20
+ // src/CallSocket.ts
21
+ var debugEnabled = false;
22
+ var debugSaveFile = false;
23
+ var disableTTS = false;
24
+ var END_INTERVIEW = "END_INTERVIEW";
25
+ var CallSocket = class {
26
+ constructor(socket, config) {
27
+ this.socket = socket;
28
+ this.config = config;
29
+ this.startTime = Date.now();
30
+ this.lastDebug = Date.now();
31
+ // An answer can be aborted if user is speaking
32
+ this.abortAnswer = false;
33
+ // When user is speaking, we're waiting to chunks or to stop
34
+ this.isSpeaking = false;
35
+ // Chunks of user speech since user started speaking
36
+ this.chunks = [];
37
+ this.conversation = [{ role: "system", content: config.systemPrompt }];
38
+ this.debug(`Call started`);
39
+ if (config.firstMessage) {
40
+ this.answer(config.firstMessage);
41
+ } else {
42
+ this.config.generateAnswer(this.conversation).then((answer) => this.answer(answer)).catch((error) => {
43
+ console.error("[WS]", error);
44
+ this.socket.close();
45
+ });
46
+ }
47
+ this.socket.on("close", this.onClose.bind(this));
48
+ this.socket.on("message", this.onMessage.bind(this));
49
+ }
50
+ addMessage(message) {
51
+ this.conversation.push(message);
52
+ this.socket.send(
53
+ `${message.role === "user" ? "userMessage" /* UserMessage */ : "assistantMessage" /* AssistantMessage */} ${message.content}`
54
+ );
55
+ this.config.onMessage?.(message);
56
+ }
57
+ onClose() {
58
+ this.debug("Connection closed");
59
+ this.abortAnswer = true;
60
+ const duration = Math.round((Date.now() - this.startTime) / 1e3);
61
+ this.config.onEnd?.({
62
+ conversation: this.conversation.slice(1),
63
+ // Remove system message
64
+ duration
65
+ });
66
+ this.socket = null;
67
+ this.config = null;
68
+ }
69
+ async onMessage(message) {
70
+ if (!Buffer.isBuffer(message)) {
71
+ console.warn(`[WS] Message is not a buffer`);
72
+ return;
73
+ }
74
+ if (message.byteLength < 15) {
75
+ const cmd = message.toString();
76
+ this.debug(`Command: ${cmd}`);
77
+ if (cmd === "startSpeaking" /* StartSpeaking */) {
78
+ this.isSpeaking = true;
79
+ this.abortAnswer = true;
80
+ } else if (cmd === "mute" /* Mute */) {
81
+ this.abortAnswer = true;
82
+ } else if (cmd === "stopSpeaking" /* StopSpeaking */) {
83
+ this.isSpeaking = false;
84
+ await this.onStopSpeaking();
85
+ }
86
+ } else if (Buffer.isBuffer(message) && this.isSpeaking) {
87
+ this.debug(`Received chunk (${message.byteLength} bytes)`);
88
+ this.chunks.push(message);
89
+ }
90
+ }
91
+ async onStopSpeaking() {
92
+ if (this.chunks.length === 0) return;
93
+ this.abortAnswer = false;
94
+ const blob = new Blob(this.chunks, { type: "audio/ogg" });
95
+ this.chunks.length = 0;
96
+ try {
97
+ if (debugSaveFile) {
98
+ const filePath = path.join(__dirname, "speech.ogg");
99
+ fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()));
100
+ return;
101
+ }
102
+ const transcript = await this.config.speech2Text(
103
+ blob,
104
+ this.conversation[this.conversation.length - 1]?.content
105
+ );
106
+ if (!transcript) {
107
+ this.debug("Ignoring empty transcript");
108
+ return;
109
+ }
110
+ this.debug("User transcript:", transcript);
111
+ this.addMessage({ role: "user", content: transcript });
112
+ if (this.abortAnswer) {
113
+ this.debug("Answer aborted, no answer generated");
114
+ return;
115
+ }
116
+ const answer = await this.config.generateAnswer(this.conversation);
117
+ if (this.abortAnswer) {
118
+ this.debug("Answer aborted, ignoring answer");
119
+ return;
120
+ }
121
+ await this.answer(answer);
122
+ } catch (error) {
123
+ console.error("[WS]", error);
124
+ this.socket.close();
125
+ }
126
+ }
127
+ async answer(message) {
128
+ let isEnd = false;
129
+ if (message.includes(END_INTERVIEW)) {
130
+ message = message.replace(END_INTERVIEW, "").trim();
131
+ isEnd = true;
132
+ }
133
+ if (message.length) {
134
+ this.debug("Assistant message:", message);
135
+ this.addMessage({ role: "assistant", content: message });
136
+ if (!disableTTS) {
137
+ const audio = await this.config.text2Speech(message);
138
+ if (this.abortAnswer) {
139
+ this.debug("Answer aborted, removing last assistant message");
140
+ const lastMessage = this.conversation[this.conversation.length - 1];
141
+ if (lastMessage?.role === "assistant") {
142
+ this.conversation.pop();
143
+ this.socket.send("cancelLastAssistantMessage" /* CancelLastAssistantMessage */);
144
+ }
145
+ return;
146
+ }
147
+ this.debug(`Send audio: (${audio.byteLength} bytes)`);
148
+ this.socket.send(audio);
149
+ }
150
+ }
151
+ if (isEnd) {
152
+ this.debug("Interview ended");
153
+ this.socket.send("endInterview" /* EndInterview */);
154
+ }
155
+ }
156
+ debug(...message) {
157
+ if (!debugEnabled) return;
158
+ const nowTime = Date.now();
159
+ console.log(
160
+ `[WS] [${nowTime - this.startTime} | ${nowTime - this.lastDebug}ms]`,
161
+ ...message
162
+ );
163
+ this.lastDebug = nowTime;
164
+ }
165
+ };
166
+
167
+ // src/errors.ts
168
+ var CallErrorCode = /* @__PURE__ */ ((CallErrorCode2) => {
169
+ CallErrorCode2[CallErrorCode2["BadRequest"] = 4400] = "BadRequest";
170
+ CallErrorCode2[CallErrorCode2["Unauthorized"] = 4401] = "Unauthorized";
171
+ CallErrorCode2[CallErrorCode2["NotFound"] = 4404] = "NotFound";
172
+ return CallErrorCode2;
173
+ })(CallErrorCode || {});
174
+ var CallError = class extends Error {
175
+ constructor(code, message) {
176
+ super(message);
177
+ this.code = code;
178
+ }
179
+ };
180
+ function handleError(socket, error) {
181
+ if (error instanceof CallError) {
182
+ socket.close(error.code, error.message);
183
+ } else {
184
+ console.error(error);
185
+ socket.close(1011);
186
+ }
187
+ socket.terminate();
188
+ }
189
+
190
+ // src/waitForParams.ts
191
+ async function waitForParams(socket, validate) {
192
+ return new Promise((resolve, reject) => {
193
+ const timeout = setTimeout(() => {
194
+ reject(new CallError(4400 /* BadRequest */, "Missing params"));
195
+ }, 3e3);
196
+ const onParams = (payload) => {
197
+ clearTimeout(timeout);
198
+ socket.off("message", onParams);
199
+ try {
200
+ const params = validate(JSON.parse(payload));
201
+ resolve(params);
202
+ } catch (error) {
203
+ reject(new CallError(4400 /* BadRequest */, "Invalid params"));
204
+ }
205
+ };
206
+ socket.on("message", onParams);
207
+ });
208
+ }
209
+ export {
210
+ CallClientCommands,
211
+ CallError,
212
+ CallErrorCode,
213
+ CallServerCommands,
214
+ CallSocket,
215
+ END_INTERVIEW,
216
+ handleError,
217
+ waitForParams
218
+ };
219
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["import * as fs from 'fs'\nimport * as path from 'path'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nconst debugEnabled = false\nconst debugSaveFile = false\nconst disableTTS = false\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(\n public socket: WebSocket,\n public config: CallConfig\n ) {\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.debug(`Call started`)\n\n // Assistant speaks first\n\n // LLM: Generate answer\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[WS]', error)\n this.socket.close()\n })\n }\n\n // Listen to events\n this.socket.on('close', this.onClose.bind(this))\n this.socket.on('message', this.onMessage.bind(this))\n }\n\n private addMessage(message: ConversationMessage) {\n this.conversation.push(message)\n this.socket.send(\n `${\n message.role === 'user'\n ? CallServerCommands.UserMessage\n : CallServerCommands.AssistantMessage\n } ${message.content}`\n )\n this.config.onMessage?.(message)\n }\n\n private onClose() {\n this.debug('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n // @ts-ignore\n this.socket = null\n // @ts-ignore\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[WS] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.debug(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.debug(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (debugSaveFile) {\n const filePath = path.join(__dirname, 'speech.ogg')\n fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()))\n return\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.debug('Ignoring empty transcript')\n return\n }\n\n this.debug('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.debug('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.debug('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[WS]', error)\n this.socket.close()\n }\n }\n\n private async answer(message: string) {\n let isEnd = false\n\n // Detect end of interview\n if (message.includes(END_INTERVIEW)) {\n message = message.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (message.length) {\n // Send answer to client\n this.debug('Assistant message:', message)\n this.addMessage({ role: 'assistant', content: message })\n\n // TTS: Generate answer audio\n if (!disableTTS) {\n const audio = await this.config.text2Speech(message)\n if (this.abortAnswer) {\n // Remove last assistant message if aborted\n this.debug('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket.send(CallServerCommands.CancelLastAssistantMessage)\n }\n return\n }\n\n // Send audio to client\n this.debug(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n }\n }\n\n // End of call\n if (isEnd) {\n this.debug('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private debug(...message: any[]) {\n if (!debugEnabled) return\n const nowTime = Date.now()\n console.log(\n `[WS] [${nowTime - this.startTime} | ${nowTime - this.lastDebug}ms]`,\n ...message\n )\n this.lastDebug = nowTime\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n UserMessage = 'userMessage',\n AssistantMessage = 'assistantMessage',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n generateAnswer(conversation: Conversation): Promise<string>\n speech2Text(blob: Blob, prompt?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage {\n role: 'system' | 'user' | 'assistant'\n content: string\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";AAAA,YAAY,QAAQ;AACpB,YAAY,UAAU;;;ACDf,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,iBAAc;AACd,EAAAA,oBAAA,sBAAmB;AACnB,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADKZ,IAAM,eAAe;AACrB,IAAM,gBAAgB;AACtB,IAAM,aAAa;AAEZ,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAgBtB,YACS,QACA,QACP;AAFO;AACA;AAjBT,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAS1B,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,MAAM,cAAc;AAKzB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,QAAQ,KAAK;AAC3B,aAAK,OAAO,MAAM;AAAA,MACpB,CAAC;AAAA,IACL;AAGA,SAAK,OAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC/C,SAAK,OAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EACrD;AAAA,EAEQ,WAAW,SAA8B;AAC/C,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO;AAAA,MACV,GACE,QAAQ,SAAS,oFAGnB,IAAI,QAAQ,OAAO;AAAA,IACrB;AACA,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEQ,UAAU;AAChB,SAAK,MAAM,mBAAmB;AAC9B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAID,SAAK,SAAS;AAEd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,8BAA8B;AAC3C;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,MAAM,YAAY,GAAG,EAAE;AAE5B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,MAAM,mBAAmB,QAAQ,UAAU,SAAS;AACzD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAE7B,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,eAAe;AACjB,cAAM,WAAgB,UAAK,WAAW,YAAY;AAClD,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE;AAAA,MACF;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,MAAM,2BAA2B;AACtC;AAAA,MACF;AAEA,WAAK,MAAM,oBAAoB,UAAU;AAGzC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,MAAM,qCAAqC;AAChD;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,MAAM,iCAAiC;AAC5C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,QAAQ,KAAK;AAC3B,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAEA,MAAc,OAAO,SAAiB;AACpC,QAAI,QAAQ;AAGZ,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,MAAM,sBAAsB,OAAO;AACxC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,QAAQ,CAAC;AAGvD,UAAI,CAAC,YAAY;AACf,cAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AACnD,YAAI,KAAK,aAAa;AAEpB,eAAK,MAAM,iDAAiD;AAC5D,gBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,cAAI,aAAa,SAAS,aAAa;AACrC,iBAAK,aAAa,IAAI;AACtB,iBAAK,OAAO,kEAAkD;AAAA,UAChE;AACA;AAAA,QACF;AAGA,aAAK,MAAM,gBAAgB,MAAM,UAAU,SAAS;AACpD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,MAAM,iBAAiB;AAC5B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,SAAS,SAAgB;AAC/B,QAAI,CAAC,aAAc;AACnB,UAAM,UAAU,KAAK,IAAI;AACzB,YAAQ;AAAA,MACN,SAAS,UAAU,KAAK,SAAS,MAAM,UAAU,KAAK,SAAS;AAAA,MAC/D,GAAG;AAAA,IACL;AACA,SAAK,YAAY;AAAA,EACnB;AACF;;;AElOO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
package/package.json ADDED
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "@micdrop/server",
3
+ "version": "1.0.0",
4
+ "description": "A lib for Node.js that helps to use the mic and speaker for voice conversation",
5
+ "author": "Lonestone",
6
+ "license": "MIT",
7
+ "main": "./dist/index.js",
8
+ "module": "./dist/index.mjs",
9
+ "types": "./dist/index.d.ts",
10
+ "scripts": {
11
+ "build": "tsup",
12
+ "dev": "tsup --watch",
13
+ "clean": "rm -rf dist",
14
+ "typecheck": "tsc --noEmit",
15
+ "prepublishOnly": "npm run build"
16
+ },
17
+ "files": [
18
+ "dist"
19
+ ],
20
+ "repository": {
21
+ "type": "git",
22
+ "url": "git+https://github.com/lonestone/micdrop.git",
23
+ "directory": "packages/server"
24
+ },
25
+ "publishConfig": {
26
+ "access": "public"
27
+ },
28
+ "keywords": [
29
+ "microphone",
30
+ "audio",
31
+ "browser",
32
+ "voice",
33
+ "conversation"
34
+ ],
35
+ "dependencies": {
36
+ "ws": "^8.18.0"
37
+ },
38
+ "devDependencies": {
39
+ "@types/node": "^20.0.0",
40
+ "@types/ws": "^8.5.14",
41
+ "eslint": "^8.0.0",
42
+ "tsup": "^8.0.0",
43
+ "typescript": "^5.0.0"
44
+ }
45
+ }