@micdrop/server 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -8,6 +8,7 @@ declare enum CallClientCommands {
8
8
  declare enum CallServerCommands {
9
9
  Message = "message",
10
10
  CancelLastAssistantMessage = "cancelLastAssistantMessage",
11
+ EnableSpeakerStreaming = "enableSpeakerStreaming",
11
12
  EndInterview = "endInterview"
12
13
  }
13
14
  interface CallConfig {
@@ -43,6 +44,7 @@ declare class CallSocket {
43
44
  private isSpeaking;
44
45
  private chunks;
45
46
  private conversation;
47
+ private speakerStreamingEnabled;
46
48
  constructor(socket: WebSocket, config: CallConfig);
47
49
  resetConversation(conversation: Conversation): void;
48
50
  private addMessage;
package/dist/index.d.ts CHANGED
@@ -8,6 +8,7 @@ declare enum CallClientCommands {
8
8
  declare enum CallServerCommands {
9
9
  Message = "message",
10
10
  CancelLastAssistantMessage = "cancelLastAssistantMessage",
11
+ EnableSpeakerStreaming = "enableSpeakerStreaming",
11
12
  EndInterview = "endInterview"
12
13
  }
13
14
  interface CallConfig {
@@ -43,6 +44,7 @@ declare class CallSocket {
43
44
  private isSpeaking;
44
45
  private chunks;
45
46
  private conversation;
47
+ private speakerStreamingEnabled;
46
48
  constructor(socket: WebSocket, config: CallConfig);
47
49
  resetConversation(conversation: Conversation): void;
48
50
  private addMessage;
package/dist/index.js CHANGED
@@ -54,6 +54,7 @@ var CallClientCommands = /* @__PURE__ */ ((CallClientCommands2) => {
54
54
  var CallServerCommands = /* @__PURE__ */ ((CallServerCommands2) => {
55
55
  CallServerCommands2["Message"] = "message";
56
56
  CallServerCommands2["CancelLastAssistantMessage"] = "cancelLastAssistantMessage";
57
+ CallServerCommands2["EnableSpeakerStreaming"] = "enableSpeakerStreaming";
57
58
  CallServerCommands2["EndInterview"] = "endInterview";
58
59
  return CallServerCommands2;
59
60
  })(CallServerCommands || {});
@@ -72,6 +73,8 @@ var CallSocket = class {
72
73
  this.isSpeaking = false;
73
74
  // Chunks of user speech since user started speaking
74
75
  this.chunks = [];
76
+ // Enable speaker streaming
77
+ this.speakerStreamingEnabled = false;
75
78
  this.socket = socket;
76
79
  this.config = config;
77
80
  this.conversation = [{ role: "system", content: config.systemPrompt }];
@@ -108,6 +111,10 @@ var CallSocket = class {
108
111
  this.log(`Send audio: (${audio.byteLength} bytes)`);
109
112
  this.socket.send(audio);
110
113
  } else if ("paused" in audio) {
114
+ if (!this.speakerStreamingEnabled) {
115
+ this.socket.send("enableSpeakerStreaming" /* EnableSpeakerStreaming */);
116
+ this.speakerStreamingEnabled = true;
117
+ }
111
118
  for await (const chunk of audio) {
112
119
  if (this.abortAnswer) {
113
120
  abort?.();
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["export * from './CallSocket'\nexport * from './errors'\nexport * from './types'\nexport * from './waitForParams'\n","import * as fs from 'fs'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n public socket: WebSocket | null = null\n public config: CallConfig | null = null\n\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(socket: WebSocket, config: CallConfig) {\n this.socket = socket\n this.config = config\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.log(`Call started`)\n\n // Assistant speaks first\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[CallSocket]', error)\n socket?.close()\n // TODO: Implement retry\n })\n }\n\n // Listen to events\n socket.on('close', this.onClose.bind(this))\n socket.on('message', this.onMessage.bind(this))\n }\n\n // Reset conversation\n public resetConversation(conversation: Conversation) {\n this.log('Reset conversation')\n this.conversation = conversation\n }\n\n private addMessage(message: ConversationMessage) {\n if (!this.socket || !this.config) return\n this.conversation.push(message)\n this.socket.send(`${CallServerCommands.Message} ${JSON.stringify(message)}`)\n this.config.onMessage?.(message)\n }\n\n private async sendAudio(\n audio: ArrayBuffer | NodeJS.ReadableStream,\n abort?: () => void\n ) {\n if (!this.socket) return\n if (this.abortAnswer) {\n abort?.()\n return\n }\n\n if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {\n // Whole audio as a single buffer\n this.log(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n } else if ('paused' in audio) {\n // Audio as a stream\n for await (const chunk of audio) {\n if (this.abortAnswer) {\n abort?.()\n return\n }\n this.log(`Send audio chunk (${chunk.length} bytes)`)\n this.socket.send(chunk)\n }\n } else {\n this.log(`Unknown audio type: ${audio}`)\n }\n }\n\n private onClose() {\n if (!this.config) return\n this.log('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n this.socket = null\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[CallSocket] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.log(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.log(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n if (!this.config) return\n\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (this.config.debugSaveSpeech) {\n const filename = `speech-${Date.now()}.ogg`\n fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()))\n this.log(`Saved speech: ${filename}`)\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.log('Ignoring empty transcript')\n return\n }\n\n this.log('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.log('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.log('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[CallSocket]', error)\n // TODO: Implement retry\n }\n }\n\n // Add assistant message and send to client with audio (TTS)\n public async answer(message: string | ConversationMessage) {\n if (!this.socket || !this.config) return\n let isEnd = false\n\n let content = typeof message === 'string' ? message : message.content\n const metadata = typeof message === 'string' ? undefined : message.metadata\n\n // Detect end of interview\n if (content.includes(END_INTERVIEW)) {\n content = content.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (content.length) {\n // Send answer to client\n this.log('Assistant message:', message)\n this.addMessage({ role: 'assistant', content, metadata })\n\n // TTS: Generate answer audio\n if (!this.config.disableTTS) {\n try {\n const audio = await this.config.text2Speech(content)\n\n // Remove last assistant message if aborted\n const abort = () => {\n this.log('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket?.send(CallServerCommands.CancelLastAssistantMessage)\n }\n }\n\n // Send audio to client\n await this.sendAudio(audio, abort)\n } catch (error) {\n console.error('[CallSocket]', error)\n // TODO: Implement retry\n }\n }\n }\n\n // End of call\n if (isEnd) {\n this.log('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private log(...message: any[]) {\n if (!this.config?.debugLog) return\n const now = Date.now()\n const delta = now - this.lastDebug\n this.lastDebug = now\n console.log(`[Debug +${delta}ms]`, ...message)\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n Message = 'message',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n debugLog?: boolean\n debugSaveSpeech?: boolean\n disableTTS?: boolean\n generateAnswer(\n conversation: Conversation\n ): Promise<string | ConversationMessage>\n speech2Text(audioBlob: Blob, prevMessage?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage<Data = any> {\n role: 'system' | 'user' | 'assistant'\n content: string\n metadata?: Data\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAoB;;;ACAb,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,aAAU;AACV,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAHL,SAAAA;AAAA,GAAA;;;ADIL,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAmBtB,YAAY,QAAmB,QAAoB;AAlBnD,SAAO,SAA2B;AAClC,SAAO,SAA4B;AAEnC,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAM1B,SAAK,SAAS;AACd,SAAK,SAAS;AACd,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,IAAI,cAAc;AAGvB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,gBAAgB,KAAK;AACnC,gBAAQ,MAAM;AAAA,MAEhB,CAAC;AAAA,IACL;AAGA,WAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC1C,WAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EAChD;AAAA;AAAA,EAGO,kBAAkB,cAA4B;AACnD,SAAK,IAAI,oBAAoB;AAC7B,SAAK,eAAe;AAAA,EACtB;AAAA,EAEQ,WAAW,SAA8B;AAC/C,QAAI,CAAC,KAAK,UAAU,CAAC,KAAK,OAAQ;AAClC,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO,KAAK,0BAA6B,IAAI,KAAK,UAAU,OAAO,CAAC,EAAE;AAC3E,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEA,MAAc,UACZ,OACA,OACA;AACA,QAAI,CAAC,KAAK,OAAQ;AAClB,QAAI,KAAK,aAAa;AACpB,cAAQ;AACR;AAAA,IACF;AAEA,QAAI,OAAO,SAAS,KAAK,KAAK,iBAAiB,aAAa;AAE1D,WAAK,IAAI,gBAAgB,MAAM,UAAU,SAAS;AAClD,WAAK,OAAO,KAAK,KAAK;AAAA,IACxB,WAAW,YAAY,OAAO;AAE5B,uBAAiB,SAAS,OAAO;AAC/B,YAAI,KAAK,aAAa;AACpB,kBAAQ;AACR;AAAA,QACF;AACA,aAAK,IAAI,qBAAqB,MAAM,MAAM,SAAS;AACnD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF,OAAO;AACL,WAAK,IAAI,uBAAuB,KAAK,EAAE;AAAA,IACzC;AAAA,EACF;AAAA,EAEQ,UAAU;AAChB,QAAI,CAAC,KAAK,OAAQ;AAClB,SAAK,IAAI,mBAAmB;AAC5B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAGD,SAAK,SAAS;AACd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,sCAAsC;AACnD;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,IAAI,YAAY,GAAG,EAAE;AAE1B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,IAAI,mBAAmB,QAAQ,UAAU,SAAS;AACvD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAC7B,QAAI,CAAC,KAAK,OAAQ;AAGlB,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,KAAK,OAAO,iBAAiB;AAC/B,cAAM,WAAW,UAAU,KAAK,IAAI,CAAC;AACrC,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE,aAAK,IAAI,iBAAiB,QAAQ,EAAE;AAAA,MACtC;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,IAAI,2BAA2B;AACpC;AAAA,MACF;AAEA,WAAK,IAAI,oBAAoB,UAAU;AAGvC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,qCAAqC;AAC9C;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,iCAAiC;AAC1C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,gBAAgB,KAAK;AAAA,IAErC;AAAA,EACF;AAAA;AAAA,EAGA,MAAa,OAAO,SAAuC;AACzD,QAAI,CAAC,KAAK,UAAU,CAAC,KAAK,OAAQ;AAClC,QAAI,QAAQ;AAEZ,QAAI,UAAU,OAAO,YAAY,WAAW,UAAU,QAAQ;AAC9D,UAAM,WAAW,OAAO,YAAY,WAAW,SAAY,QAAQ;AAGnE,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,IAAI,sBAAsB,OAAO;AACtC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,SAAS,CAAC;AAGxD,UAAI,CAAC,KAAK,OAAO,YAAY;AAC3B,YAAI;AACF,gBAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AAGnD,gBAAM,QAAQ,MAAM;AAClB,iBAAK,IAAI,iDAAiD;AAC1D,kBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,gBAAI,aAAa,SAAS,aAAa;AACrC,mBAAK,aAAa,IAAI;AACtB,mBAAK,QAAQ,kEAAkD;AAAA,YACjE;AAAA,UACF;AAGA,gBAAM,KAAK,UAAU,OAAO,KAAK;AAAA,QACnC,SAAS,OAAO;AACd,kBAAQ,MAAM,gBAAgB,KAAK;AAAA,QAErC;AAAA,MACF;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,IAAI,iBAAiB;AAC1B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,OAAO,SAAgB;AAC7B,QAAI,CAAC,KAAK,QAAQ,SAAU;AAC5B,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,QAAQ,MAAM,KAAK;AACzB,SAAK,YAAY;AACjB,YAAQ,IAAI,WAAW,KAAK,OAAO,GAAG,OAAO;AAAA,EAC/C;AACF;;;AEpQO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
1
+ {"version":3,"sources":["../src/index.ts","../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["export * from './CallSocket'\nexport * from './errors'\nexport * from './types'\nexport * from './waitForParams'\n","import * as fs from 'fs'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n public socket: WebSocket | null = null\n public config: CallConfig | null = null\n\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n // Enable speaker streaming\n private speakerStreamingEnabled = false\n\n constructor(socket: WebSocket, config: CallConfig) {\n this.socket = socket\n this.config = config\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.log(`Call started`)\n\n // Assistant speaks first\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[CallSocket]', error)\n socket?.close()\n // TODO: Implement retry\n })\n }\n\n // Listen to events\n socket.on('close', this.onClose.bind(this))\n socket.on('message', this.onMessage.bind(this))\n }\n\n // Reset conversation\n public resetConversation(conversation: Conversation) {\n this.log('Reset conversation')\n this.conversation = conversation\n }\n\n private addMessage(message: ConversationMessage) {\n if (!this.socket || !this.config) return\n this.conversation.push(message)\n this.socket.send(`${CallServerCommands.Message} ${JSON.stringify(message)}`)\n this.config.onMessage?.(message)\n }\n\n private async sendAudio(\n audio: ArrayBuffer | NodeJS.ReadableStream,\n abort?: () => void\n ) {\n if (!this.socket) return\n if (this.abortAnswer) {\n abort?.()\n return\n }\n\n if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {\n // Whole audio as a single buffer\n this.log(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n } else if ('paused' in audio) {\n // Enable speaker streaming if not already enabled\n if (!this.speakerStreamingEnabled) {\n this.socket.send(CallServerCommands.EnableSpeakerStreaming)\n this.speakerStreamingEnabled = true\n }\n\n // Audio as a stream\n for await (const chunk of audio) {\n if (this.abortAnswer) {\n abort?.()\n return\n }\n this.log(`Send audio chunk (${chunk.length} bytes)`)\n this.socket.send(chunk)\n }\n } else {\n this.log(`Unknown audio type: ${audio}`)\n }\n }\n\n private onClose() {\n if (!this.config) return\n this.log('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n this.socket = null\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[CallSocket] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.log(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.log(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n if (!this.config) return\n\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (this.config.debugSaveSpeech) {\n const filename = `speech-${Date.now()}.ogg`\n fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()))\n this.log(`Saved speech: ${filename}`)\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.log('Ignoring empty transcript')\n return\n }\n\n this.log('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.log('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.log('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[CallSocket]', error)\n // TODO: Implement retry\n }\n }\n\n // Add assistant message and send to client with audio (TTS)\n public async answer(message: string | ConversationMessage) {\n if (!this.socket || !this.config) return\n let isEnd = false\n\n let content = typeof message === 'string' ? message : message.content\n const metadata = typeof message === 'string' ? undefined : message.metadata\n\n // Detect end of interview\n if (content.includes(END_INTERVIEW)) {\n content = content.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (content.length) {\n // Send answer to client\n this.log('Assistant message:', message)\n this.addMessage({ role: 'assistant', content, metadata })\n\n // TTS: Generate answer audio\n if (!this.config.disableTTS) {\n try {\n const audio = await this.config.text2Speech(content)\n\n // Remove last assistant message if aborted\n const abort = () => {\n this.log('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket?.send(CallServerCommands.CancelLastAssistantMessage)\n }\n }\n\n // Send audio to client\n await this.sendAudio(audio, abort)\n } catch (error) {\n console.error('[CallSocket]', error)\n // TODO: Implement retry\n }\n }\n }\n\n // End of call\n if (isEnd) {\n this.log('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private log(...message: any[]) {\n if (!this.config?.debugLog) return\n const now = Date.now()\n const delta = now - this.lastDebug\n this.lastDebug = now\n console.log(`[Debug +${delta}ms]`, ...message)\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n Message = 'message',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EnableSpeakerStreaming = 'enableSpeakerStreaming',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n debugLog?: boolean\n debugSaveSpeech?: boolean\n disableTTS?: boolean\n generateAnswer(\n conversation: Conversation\n ): Promise<string | ConversationMessage>\n speech2Text(audioBlob: Blob, prevMessage?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage<Data = any> {\n role: 'system' | 'user' | 'assistant'\n content: string\n metadata?: Data\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAoB;;;ACAb,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,aAAU;AACV,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,4BAAyB;AACzB,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADIL,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAsBtB,YAAY,QAAmB,QAAoB;AArBnD,SAAO,SAA2B;AAClC,SAAO,SAA4B;AAEnC,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAM5B;AAAA,SAAQ,0BAA0B;AAGhC,SAAK,SAAS;AACd,SAAK,SAAS;AACd,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,IAAI,cAAc;AAGvB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,gBAAgB,KAAK;AACnC,gBAAQ,MAAM;AAAA,MAEhB,CAAC;AAAA,IACL;AAGA,WAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC1C,WAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EAChD;AAAA;AAAA,EAGO,kBAAkB,cAA4B;AACnD,SAAK,IAAI,oBAAoB;AAC7B,SAAK,eAAe;AAAA,EACtB;AAAA,EAEQ,WAAW,SAA8B;AAC/C,QAAI,CAAC,KAAK,UAAU,CAAC,KAAK,OAAQ;AAClC,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO,KAAK,0BAA6B,IAAI,KAAK,UAAU,OAAO,CAAC,EAAE;AAC3E,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEA,MAAc,UACZ,OACA,OACA;AACA,QAAI,CAAC,KAAK,OAAQ;AAClB,QAAI,KAAK,aAAa;AACpB,cAAQ;AACR;AAAA,IACF;AAEA,QAAI,OAAO,SAAS,KAAK,KAAK,iBAAiB,aAAa;AAE1D,WAAK,IAAI,gBAAgB,MAAM,UAAU,SAAS;AAClD,WAAK,OAAO,KAAK,KAAK;AAAA,IACxB,WAAW,YAAY,OAAO;AAE5B,UAAI,CAAC,KAAK,yBAAyB;AACjC,aAAK,OAAO,0DAA8C;AAC1D,aAAK,0BAA0B;AAAA,MACjC;AAGA,uBAAiB,SAAS,OAAO;AAC/B,YAAI,KAAK,aAAa;AACpB,kBAAQ;AACR;AAAA,QACF;AACA,aAAK,IAAI,qBAAqB,MAAM,MAAM,SAAS;AACnD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF,OAAO;AACL,WAAK,IAAI,uBAAuB,KAAK,EAAE;AAAA,IACzC;AAAA,EACF;AAAA,EAEQ,UAAU;AAChB,QAAI,CAAC,KAAK,OAAQ;AAClB,SAAK,IAAI,mBAAmB;AAC5B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAGD,SAAK,SAAS;AACd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,sCAAsC;AACnD;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,IAAI,YAAY,GAAG,EAAE;AAE1B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,IAAI,mBAAmB,QAAQ,UAAU,SAAS;AACvD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAC7B,QAAI,CAAC,KAAK,OAAQ;AAGlB,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,KAAK,OAAO,iBAAiB;AAC/B,cAAM,WAAW,UAAU,KAAK,IAAI,CAAC;AACrC,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE,aAAK,IAAI,iBAAiB,QAAQ,EAAE;AAAA,MACtC;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,IAAI,2BAA2B;AACpC;AAAA,MACF;AAEA,WAAK,IAAI,oBAAoB,UAAU;AAGvC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,qCAAqC;AAC9C;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,iCAAiC;AAC1C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,gBAAgB,KAAK;AAAA,IAErC;AAAA,EACF;AAAA;AAAA,EAGA,MAAa,OAAO,SAAuC;AACzD,QAAI,CAAC,KAAK,UAAU,CAAC,KAAK,OAAQ;AAClC,QAAI,QAAQ;AAEZ,QAAI,UAAU,OAAO,YAAY,WAAW,UAAU,QAAQ;AAC9D,UAAM,WAAW,OAAO,YAAY,WAAW,SAAY,QAAQ;AAGnE,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,IAAI,sBAAsB,OAAO;AACtC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,SAAS,CAAC;AAGxD,UAAI,CAAC,KAAK,OAAO,YAAY;AAC3B,YAAI;AACF,gBAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AAGnD,gBAAM,QAAQ,MAAM;AAClB,iBAAK,IAAI,iDAAiD;AAC1D,kBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,gBAAI,aAAa,SAAS,aAAa;AACrC,mBAAK,aAAa,IAAI;AACtB,mBAAK,QAAQ,kEAAkD;AAAA,YACjE;AAAA,UACF;AAGA,gBAAM,KAAK,UAAU,OAAO,KAAK;AAAA,QACnC,SAAS,OAAO;AACd,kBAAQ,MAAM,gBAAgB,KAAK;AAAA,QAErC;AAAA,MACF;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,IAAI,iBAAiB;AAC1B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,OAAO,SAAgB;AAC7B,QAAI,CAAC,KAAK,QAAQ,SAAU;AAC5B,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,QAAQ,MAAM,KAAK;AACzB,SAAK,YAAY;AACjB,YAAQ,IAAI,WAAW,KAAK,OAAO,GAAG,OAAO;AAAA,EAC/C;AACF;;;AE7QO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
package/dist/index.mjs CHANGED
@@ -11,6 +11,7 @@ var CallClientCommands = /* @__PURE__ */ ((CallClientCommands2) => {
11
11
  var CallServerCommands = /* @__PURE__ */ ((CallServerCommands2) => {
12
12
  CallServerCommands2["Message"] = "message";
13
13
  CallServerCommands2["CancelLastAssistantMessage"] = "cancelLastAssistantMessage";
14
+ CallServerCommands2["EnableSpeakerStreaming"] = "enableSpeakerStreaming";
14
15
  CallServerCommands2["EndInterview"] = "endInterview";
15
16
  return CallServerCommands2;
16
17
  })(CallServerCommands || {});
@@ -29,6 +30,8 @@ var CallSocket = class {
29
30
  this.isSpeaking = false;
30
31
  // Chunks of user speech since user started speaking
31
32
  this.chunks = [];
33
+ // Enable speaker streaming
34
+ this.speakerStreamingEnabled = false;
32
35
  this.socket = socket;
33
36
  this.config = config;
34
37
  this.conversation = [{ role: "system", content: config.systemPrompt }];
@@ -65,6 +68,10 @@ var CallSocket = class {
65
68
  this.log(`Send audio: (${audio.byteLength} bytes)`);
66
69
  this.socket.send(audio);
67
70
  } else if ("paused" in audio) {
71
+ if (!this.speakerStreamingEnabled) {
72
+ this.socket.send("enableSpeakerStreaming" /* EnableSpeakerStreaming */);
73
+ this.speakerStreamingEnabled = true;
74
+ }
68
75
  for await (const chunk of audio) {
69
76
  if (this.abortAnswer) {
70
77
  abort?.();
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["import * as fs from 'fs'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n public socket: WebSocket | null = null\n public config: CallConfig | null = null\n\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(socket: WebSocket, config: CallConfig) {\n this.socket = socket\n this.config = config\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.log(`Call started`)\n\n // Assistant speaks first\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[CallSocket]', error)\n socket?.close()\n // TODO: Implement retry\n })\n }\n\n // Listen to events\n socket.on('close', this.onClose.bind(this))\n socket.on('message', this.onMessage.bind(this))\n }\n\n // Reset conversation\n public resetConversation(conversation: Conversation) {\n this.log('Reset conversation')\n this.conversation = conversation\n }\n\n private addMessage(message: ConversationMessage) {\n if (!this.socket || !this.config) return\n this.conversation.push(message)\n this.socket.send(`${CallServerCommands.Message} ${JSON.stringify(message)}`)\n this.config.onMessage?.(message)\n }\n\n private async sendAudio(\n audio: ArrayBuffer | NodeJS.ReadableStream,\n abort?: () => void\n ) {\n if (!this.socket) return\n if (this.abortAnswer) {\n abort?.()\n return\n }\n\n if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {\n // Whole audio as a single buffer\n this.log(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n } else if ('paused' in audio) {\n // Audio as a stream\n for await (const chunk of audio) {\n if (this.abortAnswer) {\n abort?.()\n return\n }\n this.log(`Send audio chunk (${chunk.length} bytes)`)\n this.socket.send(chunk)\n }\n } else {\n this.log(`Unknown audio type: ${audio}`)\n }\n }\n\n private onClose() {\n if (!this.config) return\n this.log('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n this.socket = null\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[CallSocket] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.log(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.log(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n if (!this.config) return\n\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (this.config.debugSaveSpeech) {\n const filename = `speech-${Date.now()}.ogg`\n fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()))\n this.log(`Saved speech: ${filename}`)\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.log('Ignoring empty transcript')\n return\n }\n\n this.log('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.log('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.log('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[CallSocket]', error)\n // TODO: Implement retry\n }\n }\n\n // Add assistant message and send to client with audio (TTS)\n public async answer(message: string | ConversationMessage) {\n if (!this.socket || !this.config) return\n let isEnd = false\n\n let content = typeof message === 'string' ? message : message.content\n const metadata = typeof message === 'string' ? undefined : message.metadata\n\n // Detect end of interview\n if (content.includes(END_INTERVIEW)) {\n content = content.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (content.length) {\n // Send answer to client\n this.log('Assistant message:', message)\n this.addMessage({ role: 'assistant', content, metadata })\n\n // TTS: Generate answer audio\n if (!this.config.disableTTS) {\n try {\n const audio = await this.config.text2Speech(content)\n\n // Remove last assistant message if aborted\n const abort = () => {\n this.log('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket?.send(CallServerCommands.CancelLastAssistantMessage)\n }\n }\n\n // Send audio to client\n await this.sendAudio(audio, abort)\n } catch (error) {\n console.error('[CallSocket]', error)\n // TODO: Implement retry\n }\n }\n }\n\n // End of call\n if (isEnd) {\n this.log('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private log(...message: any[]) {\n if (!this.config?.debugLog) return\n const now = Date.now()\n const delta = now - this.lastDebug\n this.lastDebug = now\n console.log(`[Debug +${delta}ms]`, ...message)\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n Message = 'message',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n debugLog?: boolean\n debugSaveSpeech?: boolean\n disableTTS?: boolean\n generateAnswer(\n conversation: Conversation\n ): Promise<string | ConversationMessage>\n speech2Text(audioBlob: Blob, prevMessage?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage<Data = any> {\n role: 'system' | 'user' | 'assistant'\n content: string\n metadata?: Data\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";AAAA,YAAY,QAAQ;;;ACAb,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,aAAU;AACV,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAHL,SAAAA;AAAA,GAAA;;;ADIL,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAmBtB,YAAY,QAAmB,QAAoB;AAlBnD,SAAO,SAA2B;AAClC,SAAO,SAA4B;AAEnC,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAM1B,SAAK,SAAS;AACd,SAAK,SAAS;AACd,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,IAAI,cAAc;AAGvB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,gBAAgB,KAAK;AACnC,gBAAQ,MAAM;AAAA,MAEhB,CAAC;AAAA,IACL;AAGA,WAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC1C,WAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EAChD;AAAA;AAAA,EAGO,kBAAkB,cAA4B;AACnD,SAAK,IAAI,oBAAoB;AAC7B,SAAK,eAAe;AAAA,EACtB;AAAA,EAEQ,WAAW,SAA8B;AAC/C,QAAI,CAAC,KAAK,UAAU,CAAC,KAAK,OAAQ;AAClC,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO,KAAK,0BAA6B,IAAI,KAAK,UAAU,OAAO,CAAC,EAAE;AAC3E,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEA,MAAc,UACZ,OACA,OACA;AACA,QAAI,CAAC,KAAK,OAAQ;AAClB,QAAI,KAAK,aAAa;AACpB,cAAQ;AACR;AAAA,IACF;AAEA,QAAI,OAAO,SAAS,KAAK,KAAK,iBAAiB,aAAa;AAE1D,WAAK,IAAI,gBAAgB,MAAM,UAAU,SAAS;AAClD,WAAK,OAAO,KAAK,KAAK;AAAA,IACxB,WAAW,YAAY,OAAO;AAE5B,uBAAiB,SAAS,OAAO;AAC/B,YAAI,KAAK,aAAa;AACpB,kBAAQ;AACR;AAAA,QACF;AACA,aAAK,IAAI,qBAAqB,MAAM,MAAM,SAAS;AACnD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF,OAAO;AACL,WAAK,IAAI,uBAAuB,KAAK,EAAE;AAAA,IACzC;AAAA,EACF;AAAA,EAEQ,UAAU;AAChB,QAAI,CAAC,KAAK,OAAQ;AAClB,SAAK,IAAI,mBAAmB;AAC5B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAGD,SAAK,SAAS;AACd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,sCAAsC;AACnD;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,IAAI,YAAY,GAAG,EAAE;AAE1B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,IAAI,mBAAmB,QAAQ,UAAU,SAAS;AACvD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAC7B,QAAI,CAAC,KAAK,OAAQ;AAGlB,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,KAAK,OAAO,iBAAiB;AAC/B,cAAM,WAAW,UAAU,KAAK,IAAI,CAAC;AACrC,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE,aAAK,IAAI,iBAAiB,QAAQ,EAAE;AAAA,MACtC;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,IAAI,2BAA2B;AACpC;AAAA,MACF;AAEA,WAAK,IAAI,oBAAoB,UAAU;AAGvC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,qCAAqC;AAC9C;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,iCAAiC;AAC1C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,gBAAgB,KAAK;AAAA,IAErC;AAAA,EACF;AAAA;AAAA,EAGA,MAAa,OAAO,SAAuC;AACzD,QAAI,CAAC,KAAK,UAAU,CAAC,KAAK,OAAQ;AAClC,QAAI,QAAQ;AAEZ,QAAI,UAAU,OAAO,YAAY,WAAW,UAAU,QAAQ;AAC9D,UAAM,WAAW,OAAO,YAAY,WAAW,SAAY,QAAQ;AAGnE,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,IAAI,sBAAsB,OAAO;AACtC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,SAAS,CAAC;AAGxD,UAAI,CAAC,KAAK,OAAO,YAAY;AAC3B,YAAI;AACF,gBAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AAGnD,gBAAM,QAAQ,MAAM;AAClB,iBAAK,IAAI,iDAAiD;AAC1D,kBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,gBAAI,aAAa,SAAS,aAAa;AACrC,mBAAK,aAAa,IAAI;AACtB,mBAAK,QAAQ,kEAAkD;AAAA,YACjE;AAAA,UACF;AAGA,gBAAM,KAAK,UAAU,OAAO,KAAK;AAAA,QACnC,SAAS,OAAO;AACd,kBAAQ,MAAM,gBAAgB,KAAK;AAAA,QAErC;AAAA,MACF;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,IAAI,iBAAiB;AAC1B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,OAAO,SAAgB;AAC7B,QAAI,CAAC,KAAK,QAAQ,SAAU;AAC5B,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,QAAQ,MAAM,KAAK;AACzB,SAAK,YAAY;AACjB,YAAQ,IAAI,WAAW,KAAK,OAAO,GAAG,OAAO;AAAA,EAC/C;AACF;;;AEpQO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
1
+ {"version":3,"sources":["../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["import * as fs from 'fs'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n public socket: WebSocket | null = null\n public config: CallConfig | null = null\n\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n // Enable speaker streaming\n private speakerStreamingEnabled = false\n\n constructor(socket: WebSocket, config: CallConfig) {\n this.socket = socket\n this.config = config\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.log(`Call started`)\n\n // Assistant speaks first\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[CallSocket]', error)\n socket?.close()\n // TODO: Implement retry\n })\n }\n\n // Listen to events\n socket.on('close', this.onClose.bind(this))\n socket.on('message', this.onMessage.bind(this))\n }\n\n // Reset conversation\n public resetConversation(conversation: Conversation) {\n this.log('Reset conversation')\n this.conversation = conversation\n }\n\n private addMessage(message: ConversationMessage) {\n if (!this.socket || !this.config) return\n this.conversation.push(message)\n this.socket.send(`${CallServerCommands.Message} ${JSON.stringify(message)}`)\n this.config.onMessage?.(message)\n }\n\n private async sendAudio(\n audio: ArrayBuffer | NodeJS.ReadableStream,\n abort?: () => void\n ) {\n if (!this.socket) return\n if (this.abortAnswer) {\n abort?.()\n return\n }\n\n if (Buffer.isBuffer(audio) || audio instanceof ArrayBuffer) {\n // Whole audio as a single buffer\n this.log(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n } else if ('paused' in audio) {\n // Enable speaker streaming if not already enabled\n if (!this.speakerStreamingEnabled) {\n this.socket.send(CallServerCommands.EnableSpeakerStreaming)\n this.speakerStreamingEnabled = true\n }\n\n // Audio as a stream\n for await (const chunk of audio) {\n if (this.abortAnswer) {\n abort?.()\n return\n }\n this.log(`Send audio chunk (${chunk.length} bytes)`)\n this.socket.send(chunk)\n }\n } else {\n this.log(`Unknown audio type: ${audio}`)\n }\n }\n\n private onClose() {\n if (!this.config) return\n this.log('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n this.socket = null\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[CallSocket] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.log(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.log(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n if (!this.config) return\n\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (this.config.debugSaveSpeech) {\n const filename = `speech-${Date.now()}.ogg`\n fs.writeFileSync(filename, Buffer.from(await blob.arrayBuffer()))\n this.log(`Saved speech: ${filename}`)\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.log('Ignoring empty transcript')\n return\n }\n\n this.log('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.log('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.log('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[CallSocket]', error)\n // TODO: Implement retry\n }\n }\n\n // Add assistant message and send to client with audio (TTS)\n public async answer(message: string | ConversationMessage) {\n if (!this.socket || !this.config) return\n let isEnd = false\n\n let content = typeof message === 'string' ? message : message.content\n const metadata = typeof message === 'string' ? undefined : message.metadata\n\n // Detect end of interview\n if (content.includes(END_INTERVIEW)) {\n content = content.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (content.length) {\n // Send answer to client\n this.log('Assistant message:', message)\n this.addMessage({ role: 'assistant', content, metadata })\n\n // TTS: Generate answer audio\n if (!this.config.disableTTS) {\n try {\n const audio = await this.config.text2Speech(content)\n\n // Remove last assistant message if aborted\n const abort = () => {\n this.log('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket?.send(CallServerCommands.CancelLastAssistantMessage)\n }\n }\n\n // Send audio to client\n await this.sendAudio(audio, abort)\n } catch (error) {\n console.error('[CallSocket]', error)\n // TODO: Implement retry\n }\n }\n }\n\n // End of call\n if (isEnd) {\n this.log('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private log(...message: any[]) {\n if (!this.config?.debugLog) return\n const now = Date.now()\n const delta = now - this.lastDebug\n this.lastDebug = now\n console.log(`[Debug +${delta}ms]`, ...message)\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n Message = 'message',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EnableSpeakerStreaming = 'enableSpeakerStreaming',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n debugLog?: boolean\n debugSaveSpeech?: boolean\n disableTTS?: boolean\n generateAnswer(\n conversation: Conversation\n ): Promise<string | ConversationMessage>\n speech2Text(audioBlob: Blob, prevMessage?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer | NodeJS.ReadableStream>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage<Data = any> {\n role: 'system' | 'user' | 'assistant'\n content: string\n metadata?: Data\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";AAAA,YAAY,QAAQ;;;ACAb,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,aAAU;AACV,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,4BAAyB;AACzB,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADIL,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAsBtB,YAAY,QAAmB,QAAoB;AArBnD,SAAO,SAA2B;AAClC,SAAO,SAA4B;AAEnC,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAM5B;AAAA,SAAQ,0BAA0B;AAGhC,SAAK,SAAS;AACd,SAAK,SAAS;AACd,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,IAAI,cAAc;AAGvB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,gBAAgB,KAAK;AACnC,gBAAQ,MAAM;AAAA,MAEhB,CAAC;AAAA,IACL;AAGA,WAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC1C,WAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EAChD;AAAA;AAAA,EAGO,kBAAkB,cAA4B;AACnD,SAAK,IAAI,oBAAoB;AAC7B,SAAK,eAAe;AAAA,EACtB;AAAA,EAEQ,WAAW,SAA8B;AAC/C,QAAI,CAAC,KAAK,UAAU,CAAC,KAAK,OAAQ;AAClC,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO,KAAK,0BAA6B,IAAI,KAAK,UAAU,OAAO,CAAC,EAAE;AAC3E,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEA,MAAc,UACZ,OACA,OACA;AACA,QAAI,CAAC,KAAK,OAAQ;AAClB,QAAI,KAAK,aAAa;AACpB,cAAQ;AACR;AAAA,IACF;AAEA,QAAI,OAAO,SAAS,KAAK,KAAK,iBAAiB,aAAa;AAE1D,WAAK,IAAI,gBAAgB,MAAM,UAAU,SAAS;AAClD,WAAK,OAAO,KAAK,KAAK;AAAA,IACxB,WAAW,YAAY,OAAO;AAE5B,UAAI,CAAC,KAAK,yBAAyB;AACjC,aAAK,OAAO,0DAA8C;AAC1D,aAAK,0BAA0B;AAAA,MACjC;AAGA,uBAAiB,SAAS,OAAO;AAC/B,YAAI,KAAK,aAAa;AACpB,kBAAQ;AACR;AAAA,QACF;AACA,aAAK,IAAI,qBAAqB,MAAM,MAAM,SAAS;AACnD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF,OAAO;AACL,WAAK,IAAI,uBAAuB,KAAK,EAAE;AAAA,IACzC;AAAA,EACF;AAAA,EAEQ,UAAU;AAChB,QAAI,CAAC,KAAK,OAAQ;AAClB,SAAK,IAAI,mBAAmB;AAC5B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAGD,SAAK,SAAS;AACd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,sCAAsC;AACnD;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,IAAI,YAAY,GAAG,EAAE;AAE1B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,IAAI,mBAAmB,QAAQ,UAAU,SAAS;AACvD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAC7B,QAAI,CAAC,KAAK,OAAQ;AAGlB,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,KAAK,OAAO,iBAAiB;AAC/B,cAAM,WAAW,UAAU,KAAK,IAAI,CAAC;AACrC,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE,aAAK,IAAI,iBAAiB,QAAQ,EAAE;AAAA,MACtC;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,IAAI,2BAA2B;AACpC;AAAA,MACF;AAEA,WAAK,IAAI,oBAAoB,UAAU;AAGvC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,qCAAqC;AAC9C;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,iCAAiC;AAC1C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,gBAAgB,KAAK;AAAA,IAErC;AAAA,EACF;AAAA;AAAA,EAGA,MAAa,OAAO,SAAuC;AACzD,QAAI,CAAC,KAAK,UAAU,CAAC,KAAK,OAAQ;AAClC,QAAI,QAAQ;AAEZ,QAAI,UAAU,OAAO,YAAY,WAAW,UAAU,QAAQ;AAC9D,UAAM,WAAW,OAAO,YAAY,WAAW,SAAY,QAAQ;AAGnE,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,IAAI,sBAAsB,OAAO;AACtC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,SAAS,CAAC;AAGxD,UAAI,CAAC,KAAK,OAAO,YAAY;AAC3B,YAAI;AACF,gBAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AAGnD,gBAAM,QAAQ,MAAM;AAClB,iBAAK,IAAI,iDAAiD;AAC1D,kBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,gBAAI,aAAa,SAAS,aAAa;AACrC,mBAAK,aAAa,IAAI;AACtB,mBAAK,QAAQ,kEAAkD;AAAA,YACjE;AAAA,UACF;AAGA,gBAAM,KAAK,UAAU,OAAO,KAAK;AAAA,QACnC,SAAS,OAAO;AACd,kBAAQ,MAAM,gBAAgB,KAAK;AAAA,QAErC;AAAA,MACF;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,IAAI,iBAAiB;AAC1B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,OAAO,SAAgB;AAC7B,QAAI,CAAC,KAAK,QAAQ,SAAU;AAC5B,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,QAAQ,MAAM,KAAK;AACzB,SAAK,YAAY;AACjB,YAAQ,IAAI,WAAW,KAAK,OAAO,GAAG,OAAO;AAAA,EAC/C;AACF;;;AE7QO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@micdrop/server",
3
- "version": "1.2.2",
3
+ "version": "1.3.0",
4
4
  "description": "A lib for Node.js that helps to use the mic and speaker for voice conversation",
5
5
  "author": "Lonestone",
6
6
  "license": "MIT",