@micdrop/server 1.0.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,11 +7,11 @@ For browser implementation, see [@micdrop/client](../client/README.md) package.
7
7
  ## Features
8
8
 
9
9
  - 🌐 WebSocket server for real-time audio streaming
10
- - 🔊 Audio data handling and processing
10
+ - 🔊 Advanced audio processing:
11
+ - Streaming TTS support
12
+ - Efficient audio chunk delivery
13
+ - Interrupt handling
11
14
  - 💬 Conversation state management
12
- - ⚡ Event-based architecture
13
- - 🔄 Bi-directional communication
14
- - 🛡️ Built-in error handling
15
15
  - 🎙️ Speech-to-text and text-to-speech integration
16
16
  - 🤖 AI conversation generation support
17
17
  - 💾 Debug mode with optional audio saving
@@ -57,7 +57,10 @@ const config: CallConfig = {
57
57
  },
58
58
 
59
59
  // Function to convert text to speech
60
- async text2Speech(text) {
60
+ // Can return either a complete ArrayBuffer or a ReadableStream for streaming
61
+ async text2Speech(
62
+ text: string
63
+ ): Promise<ArrayBuffer | NodeJS.ReadableStream> {
61
64
  // Implement your TTS logic
62
65
  return new ArrayBuffer(0) // Audio data
63
66
  },
@@ -147,7 +150,8 @@ interface CallConfig {
147
150
  speech2Text(blob: Blob, prompt?: string): Promise<string>
148
151
 
149
152
  // Convert text to audio
150
- text2Speech(text: string): Promise<ArrayBuffer>
153
+ // Can return either a complete ArrayBuffer or a ReadableStream for streaming
154
+ text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>
151
155
 
152
156
  // Optional callbacks
153
157
  onMessage?(message: ConversationMessage): void
@@ -265,6 +269,8 @@ server.get('/call', { websocket: true }, (socket) => {
265
269
  server.listen({ port: 8080 })
266
270
  ```
267
271
 
272
+ See [@micdrop/demo-server](../demo-server/src/ai/index.ts) for a complete example using OpenAI and ElevenLabs.
273
+
268
274
  ## Debug Mode
269
275
 
270
276
  The server includes a debug mode that can:
package/dist/index.d.mts CHANGED
@@ -19,7 +19,7 @@ interface CallConfig {
19
19
  disableTTS?: boolean;
20
20
  generateAnswer(conversation: Conversation): Promise<string>;
21
21
  speech2Text(blob: Blob, prompt?: string): Promise<string>;
22
- text2Speech(text: string): Promise<ArrayBuffer>;
22
+ text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>;
23
23
  onMessage?(message: ConversationMessage): void;
24
24
  onEnd?(call: CallSummary): void;
25
25
  }
@@ -46,6 +46,7 @@ declare class CallSocket {
46
46
  constructor(socket: WebSocket, config: CallConfig);
47
47
  resetConversation(conversation: Conversation): void;
48
48
  private addMessage;
49
+ private sendAudio;
49
50
  private onClose;
50
51
  private onMessage;
51
52
  private onStopSpeaking;
package/dist/index.d.ts CHANGED
@@ -19,7 +19,7 @@ interface CallConfig {
19
19
  disableTTS?: boolean;
20
20
  generateAnswer(conversation: Conversation): Promise<string>;
21
21
  speech2Text(blob: Blob, prompt?: string): Promise<string>;
22
- text2Speech(text: string): Promise<ArrayBuffer>;
22
+ text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>;
23
23
  onMessage?(message: ConversationMessage): void;
24
24
  onEnd?(call: CallSummary): void;
25
25
  }
@@ -46,6 +46,7 @@ declare class CallSocket {
46
46
  constructor(socket: WebSocket, config: CallConfig);
47
47
  resetConversation(conversation: Conversation): void;
48
48
  private addMessage;
49
+ private sendAudio;
49
50
  private onClose;
50
51
  private onMessage;
51
52
  private onStopSpeaking;
package/dist/index.js CHANGED
@@ -43,7 +43,6 @@ module.exports = __toCommonJS(index_exports);
43
43
 
44
44
  // src/CallSocket.ts
45
45
  var fs = __toESM(require("fs"));
46
- var path = __toESM(require("path"));
47
46
 
48
47
  // src/types.ts
49
48
  var CallClientCommands = /* @__PURE__ */ ((CallClientCommands2) => {
@@ -99,6 +98,27 @@ var CallSocket = class {
99
98
  );
100
99
  this.config.onMessage?.(message);
101
100
  }
101
+ async sendAudio(audio, abort) {
102
+ if (this.abortAnswer) {
103
+ abort?.();
104
+ return;
105
+ }
106
+ if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {
107
+ this.log(`Send audio: (${audio.byteLength} bytes)`);
108
+ this.socket.send(audio);
109
+ } else if ("paused" in audio) {
110
+ for await (const chunk of audio) {
111
+ if (this.abortAnswer) {
112
+ abort?.();
113
+ return;
114
+ }
115
+ this.log(`Send audio chunk (${chunk.length} bytes)`);
116
+ this.socket.send(chunk);
117
+ }
118
+ } else {
119
+ this.log(`Unknown audio type: ${audio}`);
120
+ }
121
+ }
102
122
  onClose() {
103
123
  this.log("Connection closed");
104
124
  this.abortAnswer = true;
@@ -140,9 +160,9 @@ var CallSocket = class {
140
160
  this.chunks.length = 0;
141
161
  try {
142
162
  if (this.config.debugSaveSpeech) {
143
- const filePath = path.join(__dirname, "speech.ogg");
144
- fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()));
145
- return;
163
+ const filename = `speech-${Date.now()}.ogg`;
164
+ fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()));
165
+ this.log(`Saved speech: ${filename}`);
146
166
  }
147
167
  const transcript = await this.config.speech2Text(
148
168
  blob,
@@ -181,17 +201,15 @@ var CallSocket = class {
181
201
  this.addMessage({ role: "assistant", content: message });
182
202
  if (!this.config.disableTTS) {
183
203
  const audio = await this.config.text2Speech(message);
184
- if (this.abortAnswer) {
204
+ const abort = () => {
185
205
  this.log("Answer aborted, removing last assistant message");
186
206
  const lastMessage = this.conversation[this.conversation.length - 1];
187
207
  if (lastMessage?.role === "assistant") {
188
208
  this.conversation.pop();
189
209
  this.socket.send("cancelLastAssistantMessage" /* CancelLastAssistantMessage */);
190
210
  }
191
- return;
192
- }
193
- this.log(`Send audio: (${audio.byteLength} bytes)`);
194
- this.socket.send(audio);
211
+ };
212
+ await this.sendAudio(audio, abort);
195
213
  }
196
214
  }
197
215
  if (isEnd) {
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["export * from './CallSocket'\nexport * from './errors'\nexport * from './types'\nexport * from './waitForParams'\n","import * as fs from 'fs'\nimport * as path from 'path'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(\n public socket: WebSocket,\n public config: CallConfig\n ) {\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.log(`Call started`)\n\n // Assistant speaks first\n\n // LLM: Generate answer\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[WS]', error)\n this.socket.close()\n })\n }\n\n // Listen to events\n this.socket.on('close', this.onClose.bind(this))\n this.socket.on('message', this.onMessage.bind(this))\n }\n\n // Reset conversation\n public resetConversation(conversation: Conversation) {\n this.log('Reset conversation')\n this.conversation = conversation\n }\n\n private addMessage(message: ConversationMessage) {\n this.conversation.push(message)\n this.socket.send(\n `${\n message.role === 'user'\n ? CallServerCommands.UserMessage\n : CallServerCommands.AssistantMessage\n } ${message.content}`\n )\n this.config.onMessage?.(message)\n }\n\n private onClose() {\n this.log('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n // @ts-ignore\n this.socket = null\n // @ts-ignore\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[WS] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.log(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.log(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (this.config.debugSaveSpeech) {\n const filePath = path.join(__dirname, 'speech.ogg')\n fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()))\n return\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.log('Ignoring empty transcript')\n return\n }\n\n this.log('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.log('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.log('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[WS]', error)\n this.socket.close()\n }\n }\n\n // Add assistant message and send to client with audio (TTS)\n public async answer(message: string) {\n let isEnd = false\n\n // Detect end of interview\n if (message.includes(END_INTERVIEW)) {\n message = message.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (message.length) {\n // Send answer to client\n this.log('Assistant message:', message)\n this.addMessage({ role: 'assistant', content: message })\n\n // TTS: Generate answer audio\n if (!this.config.disableTTS) {\n const audio = await this.config.text2Speech(message)\n if (this.abortAnswer) {\n // Remove last assistant message if aborted\n this.log('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket.send(CallServerCommands.CancelLastAssistantMessage)\n }\n return\n }\n\n // Send audio to client\n this.log(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n }\n }\n\n // End of call\n if (isEnd) {\n this.log('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private log(...message: any[]) {\n if (!this.config.debugLog) return\n const now = Date.now()\n const delta = now - this.lastDebug\n this.lastDebug = now\n console.log(`[Debug +${delta}ms]`, ...message)\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n UserMessage = 'userMessage',\n AssistantMessage = 'assistantMessage',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n debugLog?: boolean\n debugSaveSpeech?: boolean\n disableTTS?: boolean\n generateAnswer(conversation: Conversation): Promise<string>\n speech2Text(blob: Blob, prompt?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage {\n role: 'system' | 'user' | 'assistant'\n content: string\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAoB;AACpB,WAAsB;;;ACDf,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,iBAAc;AACd,EAAAA,oBAAA,sBAAmB;AACnB,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADKL,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAgBtB,YACS,QACA,QACP;AAFO;AACA;AAjBT,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAS1B,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,IAAI,cAAc;AAKvB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,QAAQ,KAAK;AAC3B,aAAK,OAAO,MAAM;AAAA,MACpB,CAAC;AAAA,IACL;AAGA,SAAK,OAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC/C,SAAK,OAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EACrD;AAAA;AAAA,EAGO,kBAAkB,cAA4B;AACnD,SAAK,IAAI,oBAAoB;AAC7B,SAAK,eAAe;AAAA,EACtB;AAAA,EAEQ,WAAW,SAA8B;AAC/C,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO;AAAA,MACV,GACE,QAAQ,SAAS,oFAGnB,IAAI,QAAQ,OAAO;AAAA,IACrB;AACA,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEQ,UAAU;AAChB,SAAK,IAAI,mBAAmB;AAC5B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAID,SAAK,SAAS;AAEd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,8BAA8B;AAC3C;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,IAAI,YAAY,GAAG,EAAE;AAE1B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,IAAI,mBAAmB,QAAQ,UAAU,SAAS;AACvD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAE7B,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,KAAK,OAAO,iBAAiB;AAC/B,cAAM,WAAgB,UAAK,WAAW,YAAY;AAClD,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE;AAAA,MACF;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,IAAI,2BAA2B;AACpC;AAAA,MACF;AAEA,WAAK,IAAI,oBAAoB,UAAU;AAGvC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,qCAAqC;AAC9C;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,iCAAiC;AAC1C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,QAAQ,KAAK;AAC3B,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA;AAAA,EAGA,MAAa,OAAO,SAAiB;AACnC,QAAI,QAAQ;AAGZ,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,IAAI,sBAAsB,OAAO;AACtC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,QAAQ,CAAC;AAGvD,UAAI,CAAC,KAAK,OAAO,YAAY;AAC3B,cAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AACnD,YAAI,KAAK,aAAa;AAEpB,eAAK,IAAI,iDAAiD;AAC1D,gBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,cAAI,aAAa,SAAS,aAAa;AACrC,iBAAK,aAAa,IAAI;AACtB,iBAAK,OAAO,kEAAkD;AAAA,UAChE;AACA;AAAA,QACF;AAGA,aAAK,IAAI,gBAAgB,MAAM,UAAU,SAAS;AAClD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,IAAI,iBAAiB;AAC1B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,OAAO,SAAgB;AAC7B,QAAI,CAAC,KAAK,OAAO,SAAU;AAC3B,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,QAAQ,MAAM,KAAK;AACzB,SAAK,YAAY;AACjB,YAAQ,IAAI,WAAW,KAAK,OAAO,GAAG,OAAO;AAAA,EAC/C;AACF;;;AEnOO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
1
+ {"version":3,"sources":["../src/index.ts","../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["export * from './CallSocket'\nexport * from './errors'\nexport * from './types'\nexport * from './waitForParams'\n","import * as fs from 'fs'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(\n public socket: WebSocket,\n public config: CallConfig\n ) {\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.log(`Call started`)\n\n // Assistant speaks first\n\n // LLM: Generate answer\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[WS]', error)\n this.socket.close()\n })\n }\n\n // Listen to events\n this.socket.on('close', this.onClose.bind(this))\n this.socket.on('message', this.onMessage.bind(this))\n }\n\n // Reset conversation\n public resetConversation(conversation: Conversation) {\n this.log('Reset conversation')\n this.conversation = conversation\n }\n\n private addMessage(message: ConversationMessage) {\n this.conversation.push(message)\n this.socket.send(\n `${\n message.role === 'user'\n ? CallServerCommands.UserMessage\n : CallServerCommands.AssistantMessage\n } ${message.content}`\n )\n this.config.onMessage?.(message)\n }\n\n private async sendAudio(\n audio: ArrayBuffer | NodeJS.ReadableStream,\n abort?: () => void\n ) {\n if (this.abortAnswer) {\n abort?.()\n return\n }\n\n if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {\n // Whole audio as a single buffer\n this.log(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n } else if ('paused' in audio) {\n // Audio as a stream\n for await (const chunk of audio) {\n if (this.abortAnswer) {\n abort?.()\n return\n }\n this.log(`Send audio chunk (${chunk.length} bytes)`)\n this.socket.send(chunk)\n }\n } else {\n this.log(`Unknown audio type: ${audio}`)\n }\n }\n\n private onClose() {\n this.log('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n // @ts-ignore\n this.socket = null\n // @ts-ignore\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[WS] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.log(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.log(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (this.config.debugSaveSpeech) {\n const filename = `speech-${Date.now()}.ogg`\n fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()))\n this.log(`Saved speech: ${filename}`)\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.log('Ignoring empty transcript')\n return\n }\n\n this.log('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.log('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.log('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[WS]', error)\n this.socket.close()\n }\n }\n\n // Add assistant message and send to client with audio (TTS)\n public async answer(message: string) {\n let isEnd = false\n\n // Detect end of interview\n if (message.includes(END_INTERVIEW)) {\n message = message.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (message.length) {\n // Send answer to client\n this.log('Assistant message:', message)\n this.addMessage({ role: 'assistant', content: message })\n\n // TTS: Generate answer audio\n if (!this.config.disableTTS) {\n const audio = await this.config.text2Speech(message)\n\n // Remove last assistant message if aborted\n const abort = () => {\n this.log('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket.send(CallServerCommands.CancelLastAssistantMessage)\n }\n }\n\n // Send audio to client\n await this.sendAudio(audio, abort)\n }\n }\n\n // End of call\n if (isEnd) {\n this.log('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private log(...message: any[]) {\n if (!this.config.debugLog) return\n const now = Date.now()\n const delta = now - this.lastDebug\n this.lastDebug = now\n console.log(`[Debug +${delta}ms]`, ...message)\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n UserMessage = 'userMessage',\n AssistantMessage = 'assistantMessage',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n debugLog?: boolean\n debugSaveSpeech?: boolean\n disableTTS?: boolean\n generateAnswer(conversation: Conversation): Promise<string>\n speech2Text(blob: Blob, prompt?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage {\n role: 'system' | 'user' | 'assistant'\n content: string\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAoB;;;ACAb,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,iBAAc;AACd,EAAAA,oBAAA,sBAAmB;AACnB,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADIL,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAgBtB,YACS,QACA,QACP;AAFO;AACA;AAjBT,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAS1B,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,IAAI,cAAc;AAKvB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,QAAQ,KAAK;AAC3B,aAAK,OAAO,MAAM;AAAA,MACpB,CAAC;AAAA,IACL;AAGA,SAAK,OAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC/C,SAAK,OAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EACrD;AAAA;AAAA,EAGO,kBAAkB,cAA4B;AACnD,SAAK,IAAI,oBAAoB;AAC7B,SAAK,eAAe;AAAA,EACtB;AAAA,EAEQ,WAAW,SAA8B;AAC/C,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO;AAAA,MACV,GACE,QAAQ,SAAS,oFAGnB,IAAI,QAAQ,OAAO;AAAA,IACrB;AACA,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEA,MAAc,UACZ,OACA,OACA;AACA,QAAI,KAAK,aAAa;AACpB,cAAQ;AACR;AAAA,IACF;AAEA,QAAI,OAAO,SAAS,KAAK,KAAK,iBAAiB,aAAa;AAE1D,WAAK,IAAI,gBAAgB,MAAM,UAAU,SAAS;AAClD,WAAK,OAAO,KAAK,KAAK;AAAA,IACxB,WAAW,YAAY,OAAO;AAE5B,uBAAiB,SAAS,OAAO;AAC/B,YAAI,KAAK,aAAa;AACpB,kBAAQ;AACR;AAAA,QACF;AACA,aAAK,IAAI,qBAAqB,MAAM,MAAM,SAAS;AACnD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF,OAAO;AACL,WAAK,IAAI,uBAAuB,KAAK,EAAE;AAAA,IACzC;AAAA,EACF;AAAA,EAEQ,UAAU;AAChB,SAAK,IAAI,mBAAmB;AAC5B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAID,SAAK,SAAS;AAEd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,8BAA8B;AAC3C;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,IAAI,YAAY,GAAG,EAAE;AAE1B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,IAAI,mBAAmB,QAAQ,UAAU,SAAS;AACvD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAE7B,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,KAAK,OAAO,iBAAiB;AAC/B,cAAM,WAAW,UAAU,KAAK,IAAI,CAAC;AACrC,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE,aAAK,IAAI,iBAAiB,QAAQ,EAAE;AAAA,MACtC;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,IAAI,2BAA2B;AACpC;AAAA,MACF;AAEA,WAAK,IAAI,oBAAoB,UAAU;AAGvC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,qCAAqC;AAC9C;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,iCAAiC;AAC1C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,QAAQ,KAAK;AAC3B,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA;AAAA,EAGA,MAAa,OAAO,SAAiB;AACnC,QAAI,QAAQ;AAGZ,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,IAAI,sBAAsB,OAAO;AACtC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,QAAQ,CAAC;AAGvD,UAAI,CAAC,KAAK,OAAO,YAAY;AAC3B,cAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AAGnD,cAAM,QAAQ,MAAM;AAClB,eAAK,IAAI,iDAAiD;AAC1D,gBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,cAAI,aAAa,SAAS,aAAa;AACrC,iBAAK,aAAa,IAAI;AACtB,iBAAK,OAAO,kEAAkD;AAAA,UAChE;AAAA,QACF;AAGA,cAAM,KAAK,UAAU,OAAO,KAAK;AAAA,MACnC;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,IAAI,iBAAiB;AAC1B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,OAAO,SAAgB;AAC7B,QAAI,CAAC,KAAK,OAAO,SAAU;AAC3B,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,QAAQ,MAAM,KAAK;AACzB,SAAK,YAAY;AACjB,YAAQ,IAAI,WAAW,KAAK,OAAO,GAAG,OAAO;AAAA,EAC/C;AACF;;;AE7PO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
package/dist/index.mjs CHANGED
@@ -1,6 +1,5 @@
1
1
  // src/CallSocket.ts
2
2
  import * as fs from "fs";
3
- import * as path from "path";
4
3
 
5
4
  // src/types.ts
6
5
  var CallClientCommands = /* @__PURE__ */ ((CallClientCommands2) => {
@@ -56,6 +55,27 @@ var CallSocket = class {
56
55
  );
57
56
  this.config.onMessage?.(message);
58
57
  }
58
+ async sendAudio(audio, abort) {
59
+ if (this.abortAnswer) {
60
+ abort?.();
61
+ return;
62
+ }
63
+ if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {
64
+ this.log(`Send audio: (${audio.byteLength} bytes)`);
65
+ this.socket.send(audio);
66
+ } else if ("paused" in audio) {
67
+ for await (const chunk of audio) {
68
+ if (this.abortAnswer) {
69
+ abort?.();
70
+ return;
71
+ }
72
+ this.log(`Send audio chunk (${chunk.length} bytes)`);
73
+ this.socket.send(chunk);
74
+ }
75
+ } else {
76
+ this.log(`Unknown audio type: ${audio}`);
77
+ }
78
+ }
59
79
  onClose() {
60
80
  this.log("Connection closed");
61
81
  this.abortAnswer = true;
@@ -97,9 +117,9 @@ var CallSocket = class {
97
117
  this.chunks.length = 0;
98
118
  try {
99
119
  if (this.config.debugSaveSpeech) {
100
- const filePath = path.join(__dirname, "speech.ogg");
101
- fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()));
102
- return;
120
+ const filename = `speech-${Date.now()}.ogg`;
121
+ fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()));
122
+ this.log(`Saved speech: ${filename}`);
103
123
  }
104
124
  const transcript = await this.config.speech2Text(
105
125
  blob,
@@ -138,17 +158,15 @@ var CallSocket = class {
138
158
  this.addMessage({ role: "assistant", content: message });
139
159
  if (!this.config.disableTTS) {
140
160
  const audio = await this.config.text2Speech(message);
141
- if (this.abortAnswer) {
161
+ const abort = () => {
142
162
  this.log("Answer aborted, removing last assistant message");
143
163
  const lastMessage = this.conversation[this.conversation.length - 1];
144
164
  if (lastMessage?.role === "assistant") {
145
165
  this.conversation.pop();
146
166
  this.socket.send("cancelLastAssistantMessage" /* CancelLastAssistantMessage */);
147
167
  }
148
- return;
149
- }
150
- this.log(`Send audio: (${audio.byteLength} bytes)`);
151
- this.socket.send(audio);
168
+ };
169
+ await this.sendAudio(audio, abort);
152
170
  }
153
171
  }
154
172
  if (isEnd) {
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["import * as fs from 'fs'\nimport * as path from 'path'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(\n public socket: WebSocket,\n public config: CallConfig\n ) {\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.log(`Call started`)\n\n // Assistant speaks first\n\n // LLM: Generate answer\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[WS]', error)\n this.socket.close()\n })\n }\n\n // Listen to events\n this.socket.on('close', this.onClose.bind(this))\n this.socket.on('message', this.onMessage.bind(this))\n }\n\n // Reset conversation\n public resetConversation(conversation: Conversation) {\n this.log('Reset conversation')\n this.conversation = conversation\n }\n\n private addMessage(message: ConversationMessage) {\n this.conversation.push(message)\n this.socket.send(\n `${\n message.role === 'user'\n ? CallServerCommands.UserMessage\n : CallServerCommands.AssistantMessage\n } ${message.content}`\n )\n this.config.onMessage?.(message)\n }\n\n private onClose() {\n this.log('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n // @ts-ignore\n this.socket = null\n // @ts-ignore\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[WS] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.log(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.log(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (this.config.debugSaveSpeech) {\n const filePath = path.join(__dirname, 'speech.ogg')\n fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()))\n return\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.log('Ignoring empty transcript')\n return\n }\n\n this.log('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.log('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.log('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[WS]', error)\n this.socket.close()\n }\n }\n\n // Add assistant message and send to client with audio (TTS)\n public async answer(message: string) {\n let isEnd = false\n\n // Detect end of interview\n if (message.includes(END_INTERVIEW)) {\n message = message.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (message.length) {\n // Send answer to client\n this.log('Assistant message:', message)\n this.addMessage({ role: 'assistant', content: message })\n\n // TTS: Generate answer audio\n if (!this.config.disableTTS) {\n const audio = await this.config.text2Speech(message)\n if (this.abortAnswer) {\n // Remove last assistant message if aborted\n this.log('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket.send(CallServerCommands.CancelLastAssistantMessage)\n }\n return\n }\n\n // Send audio to client\n this.log(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n }\n }\n\n // End of call\n if (isEnd) {\n this.log('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private log(...message: any[]) {\n if (!this.config.debugLog) return\n const now = Date.now()\n const delta = now - this.lastDebug\n this.lastDebug = now\n console.log(`[Debug +${delta}ms]`, ...message)\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n UserMessage = 'userMessage',\n AssistantMessage = 'assistantMessage',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n debugLog?: boolean\n debugSaveSpeech?: boolean\n disableTTS?: boolean\n generateAnswer(conversation: Conversation): Promise<string>\n speech2Text(blob: Blob, prompt?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage {\n role: 'system' | 'user' | 'assistant'\n content: string\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";AAAA,YAAY,QAAQ;AACpB,YAAY,UAAU;;;ACDf,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,iBAAc;AACd,EAAAA,oBAAA,sBAAmB;AACnB,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADKL,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAgBtB,YACS,QACA,QACP;AAFO;AACA;AAjBT,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAS1B,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,IAAI,cAAc;AAKvB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,QAAQ,KAAK;AAC3B,aAAK,OAAO,MAAM;AAAA,MACpB,CAAC;AAAA,IACL;AAGA,SAAK,OAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC/C,SAAK,OAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EACrD;AAAA;AAAA,EAGO,kBAAkB,cAA4B;AACnD,SAAK,IAAI,oBAAoB;AAC7B,SAAK,eAAe;AAAA,EACtB;AAAA,EAEQ,WAAW,SAA8B;AAC/C,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO;AAAA,MACV,GACE,QAAQ,SAAS,oFAGnB,IAAI,QAAQ,OAAO;AAAA,IACrB;AACA,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEQ,UAAU;AAChB,SAAK,IAAI,mBAAmB;AAC5B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAID,SAAK,SAAS;AAEd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,8BAA8B;AAC3C;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,IAAI,YAAY,GAAG,EAAE;AAE1B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,IAAI,mBAAmB,QAAQ,UAAU,SAAS;AACvD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAE7B,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,KAAK,OAAO,iBAAiB;AAC/B,cAAM,WAAgB,UAAK,WAAW,YAAY;AAClD,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE;AAAA,MACF;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,IAAI,2BAA2B;AACpC;AAAA,MACF;AAEA,WAAK,IAAI,oBAAoB,UAAU;AAGvC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,qCAAqC;AAC9C;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,iCAAiC;AAC1C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,QAAQ,KAAK;AAC3B,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA;AAAA,EAGA,MAAa,OAAO,SAAiB;AACnC,QAAI,QAAQ;AAGZ,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,IAAI,sBAAsB,OAAO;AACtC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,QAAQ,CAAC;AAGvD,UAAI,CAAC,KAAK,OAAO,YAAY;AAC3B,cAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AACnD,YAAI,KAAK,aAAa;AAEpB,eAAK,IAAI,iDAAiD;AAC1D,gBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,cAAI,aAAa,SAAS,aAAa;AACrC,iBAAK,aAAa,IAAI;AACtB,iBAAK,OAAO,kEAAkD;AAAA,UAChE;AACA;AAAA,QACF;AAGA,aAAK,IAAI,gBAAgB,MAAM,UAAU,SAAS;AAClD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,IAAI,iBAAiB;AAC1B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,OAAO,SAAgB;AAC7B,QAAI,CAAC,KAAK,OAAO,SAAU;AAC3B,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,QAAQ,MAAM,KAAK;AACzB,SAAK,YAAY;AACjB,YAAQ,IAAI,WAAW,KAAK,OAAO,GAAG,OAAO;AAAA,EAC/C;AACF;;;AEnOO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
1
+ {"version":3,"sources":["../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["import * as fs from 'fs'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(\n public socket: WebSocket,\n public config: CallConfig\n ) {\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.log(`Call started`)\n\n // Assistant speaks first\n\n // LLM: Generate answer\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[WS]', error)\n this.socket.close()\n })\n }\n\n // Listen to events\n this.socket.on('close', this.onClose.bind(this))\n this.socket.on('message', this.onMessage.bind(this))\n }\n\n // Reset conversation\n public resetConversation(conversation: Conversation) {\n this.log('Reset conversation')\n this.conversation = conversation\n }\n\n private addMessage(message: ConversationMessage) {\n this.conversation.push(message)\n this.socket.send(\n `${\n message.role === 'user'\n ? CallServerCommands.UserMessage\n : CallServerCommands.AssistantMessage\n } ${message.content}`\n )\n this.config.onMessage?.(message)\n }\n\n private async sendAudio(\n audio: ArrayBuffer | NodeJS.ReadableStream,\n abort?: () => void\n ) {\n if (this.abortAnswer) {\n abort?.()\n return\n }\n\n if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {\n // Whole audio as a single buffer\n this.log(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n } else if ('paused' in audio) {\n // Audio as a stream\n for await (const chunk of audio) {\n if (this.abortAnswer) {\n abort?.()\n return\n }\n this.log(`Send audio chunk (${chunk.length} bytes)`)\n this.socket.send(chunk)\n }\n } else {\n this.log(`Unknown audio type: ${audio}`)\n }\n }\n\n private onClose() {\n this.log('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n // @ts-ignore\n this.socket = null\n // @ts-ignore\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[WS] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.log(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.log(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (this.config.debugSaveSpeech) {\n const filename = `speech-${Date.now()}.ogg`\n fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()))\n this.log(`Saved speech: ${filename}`)\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.log('Ignoring empty transcript')\n return\n }\n\n this.log('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.log('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.log('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[WS]', error)\n this.socket.close()\n }\n }\n\n // Add assistant message and send to client with audio (TTS)\n public async answer(message: string) {\n let isEnd = false\n\n // Detect end of interview\n if (message.includes(END_INTERVIEW)) {\n message = message.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (message.length) {\n // Send answer to client\n this.log('Assistant message:', message)\n this.addMessage({ role: 'assistant', content: message })\n\n // TTS: Generate answer audio\n if (!this.config.disableTTS) {\n const audio = await this.config.text2Speech(message)\n\n // Remove last assistant message if aborted\n const abort = () => {\n this.log('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket.send(CallServerCommands.CancelLastAssistantMessage)\n }\n }\n\n // Send audio to client\n await this.sendAudio(audio, abort)\n }\n }\n\n // End of call\n if (isEnd) {\n this.log('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private log(...message: any[]) {\n if (!this.config.debugLog) return\n const now = Date.now()\n const delta = now - this.lastDebug\n this.lastDebug = now\n console.log(`[Debug +${delta}ms]`, ...message)\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n UserMessage = 'userMessage',\n AssistantMessage = 'assistantMessage',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n debugLog?: boolean\n debugSaveSpeech?: boolean\n disableTTS?: boolean\n generateAnswer(conversation: Conversation): Promise<string>\n speech2Text(blob: Blob, prompt?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage {\n role: 'system' | 'user' | 'assistant'\n content: string\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";AAAA,YAAY,QAAQ;;;ACAb,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,iBAAc;AACd,EAAAA,oBAAA,sBAAmB;AACnB,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADIL,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAgBtB,YACS,QACA,QACP;AAFO;AACA;AAjBT,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAS1B,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,IAAI,cAAc;AAKvB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,QAAQ,KAAK;AAC3B,aAAK,OAAO,MAAM;AAAA,MACpB,CAAC;AAAA,IACL;AAGA,SAAK,OAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC/C,SAAK,OAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EACrD;AAAA;AAAA,EAGO,kBAAkB,cAA4B;AACnD,SAAK,IAAI,oBAAoB;AAC7B,SAAK,eAAe;AAAA,EACtB;AAAA,EAEQ,WAAW,SAA8B;AAC/C,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO;AAAA,MACV,GACE,QAAQ,SAAS,oFAGnB,IAAI,QAAQ,OAAO;AAAA,IACrB;AACA,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEA,MAAc,UACZ,OACA,OACA;AACA,QAAI,KAAK,aAAa;AACpB,cAAQ;AACR;AAAA,IACF;AAEA,QAAI,OAAO,SAAS,KAAK,KAAK,iBAAiB,aAAa;AAE1D,WAAK,IAAI,gBAAgB,MAAM,UAAU,SAAS;AAClD,WAAK,OAAO,KAAK,KAAK;AAAA,IACxB,WAAW,YAAY,OAAO;AAE5B,uBAAiB,SAAS,OAAO;AAC/B,YAAI,KAAK,aAAa;AACpB,kBAAQ;AACR;AAAA,QACF;AACA,aAAK,IAAI,qBAAqB,MAAM,MAAM,SAAS;AACnD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF,OAAO;AACL,WAAK,IAAI,uBAAuB,KAAK,EAAE;AAAA,IACzC;AAAA,EACF;AAAA,EAEQ,UAAU;AAChB,SAAK,IAAI,mBAAmB;AAC5B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAID,SAAK,SAAS;AAEd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,8BAA8B;AAC3C;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,IAAI,YAAY,GAAG,EAAE;AAE1B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,IAAI,mBAAmB,QAAQ,UAAU,SAAS;AACvD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAE7B,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,KAAK,OAAO,iBAAiB;AAC/B,cAAM,WAAW,UAAU,KAAK,IAAI,CAAC;AACrC,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE,aAAK,IAAI,iBAAiB,QAAQ,EAAE;AAAA,MACtC;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,IAAI,2BAA2B;AACpC;AAAA,MACF;AAEA,WAAK,IAAI,oBAAoB,UAAU;AAGvC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,qCAAqC;AAC9C;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,iCAAiC;AAC1C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,QAAQ,KAAK;AAC3B,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA;AAAA,EAGA,MAAa,OAAO,SAAiB;AACnC,QAAI,QAAQ;AAGZ,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,IAAI,sBAAsB,OAAO;AACtC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,QAAQ,CAAC;AAGvD,UAAI,CAAC,KAAK,OAAO,YAAY;AAC3B,cAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AAGnD,cAAM,QAAQ,MAAM;AAClB,eAAK,IAAI,iDAAiD;AAC1D,gBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,cAAI,aAAa,SAAS,aAAa;AACrC,iBAAK,aAAa,IAAI;AACtB,iBAAK,OAAO,kEAAkD;AAAA,UAChE;AAAA,QACF;AAGA,cAAM,KAAK,UAAU,OAAO,KAAK;AAAA,MACnC;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,IAAI,iBAAiB;AAC1B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,OAAO,SAAgB;AAC7B,QAAI,CAAC,KAAK,OAAO,SAAU;AAC3B,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,QAAQ,MAAM,KAAK;AACzB,SAAK,YAAY;AACjB,YAAQ,IAAI,WAAW,KAAK,OAAO,GAAG,OAAO;AAAA,EAC/C;AACF;;;AE7PO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@micdrop/server",
3
- "version": "1.0.3",
3
+ "version": "1.2.0",
4
4
  "description": "A lib for Node.js that helps to use the mic and speaker for voice conversation",
5
5
  "author": "Lonestone",
6
6
  "license": "MIT",