@micdrop/server 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -1
- package/dist/index.d.mts +4 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.js +21 -26
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +20 -25
- package/dist/index.mjs.map +1 -1
- package/package.json +5 -6
package/README.md
CHANGED
|
@@ -41,6 +41,7 @@ const config: CallConfig = {
|
|
|
41
41
|
systemPrompt: 'You are a helpful assistant',
|
|
42
42
|
|
|
43
43
|
// Optional first message from assistant
|
|
44
|
+
// Omit to generate the first message
|
|
44
45
|
firstMessage: 'Hello!',
|
|
45
46
|
|
|
46
47
|
// Function to generate assistant responses
|
|
@@ -79,6 +80,18 @@ wss.on('connection', (ws) => {
|
|
|
79
80
|
})
|
|
80
81
|
```
|
|
81
82
|
|
|
83
|
+
## Demo
|
|
84
|
+
|
|
85
|
+
Check out the demo implementation in the [@micdrop/demo-server](../demo-server/README.md) package. It shows:
|
|
86
|
+
|
|
87
|
+
- Setting up a Fastify server with WebSocket support
|
|
88
|
+
- Configuring the CallSocket with custom handlers
|
|
89
|
+
- Basic authentication flow
|
|
90
|
+
- Example speech-to-text and text-to-speech implementations
|
|
91
|
+
- Error handling patterns
|
|
92
|
+
|
|
93
|
+
Here's a simplified version from the demo:
|
|
94
|
+
|
|
82
95
|
## Documentation
|
|
83
96
|
|
|
84
97
|
The server package provides several core components:
|
|
@@ -112,6 +125,15 @@ interface CallConfig {
|
|
|
112
125
|
// Optional first message from assistant
|
|
113
126
|
firstMessage?: string
|
|
114
127
|
|
|
128
|
+
// Enable debug logging with timestamps
|
|
129
|
+
debugLog?: boolean
|
|
130
|
+
|
|
131
|
+
// Save last speech audio file for debugging (speech.ogg)
|
|
132
|
+
debugSaveSpeech?: boolean
|
|
133
|
+
|
|
134
|
+
// Disable text-to-speech conversion
|
|
135
|
+
disableTTS?: boolean
|
|
136
|
+
|
|
115
137
|
// Generate assistant's response
|
|
116
138
|
generateAnswer(conversation: Conversation): Promise<string>
|
|
117
139
|
|
|
@@ -176,6 +198,24 @@ enum CallServerCommands {
|
|
|
176
198
|
4. Server processes audio and responds with text/audio
|
|
177
199
|
5. Process continues until interview ends
|
|
178
200
|
|
|
201
|
+
## Ending the call
|
|
202
|
+
|
|
203
|
+
The call has two ways to end:
|
|
204
|
+
|
|
205
|
+
- When the client closes the websocket connection.
|
|
206
|
+
- When the generated answer contains the keyword "END_INTERVIEW".
|
|
207
|
+
|
|
208
|
+
You can prompt it like this:
|
|
209
|
+
|
|
210
|
+
```typescript
|
|
211
|
+
import { END_INTERVIEW } from '@micdrop/server'
|
|
212
|
+
|
|
213
|
+
const systemPrompt = `
|
|
214
|
+
You are a voice assistant interviewing the user.
|
|
215
|
+
To end the interview, briefly thank the user and say good bye, then say "${END_INTERVIEW}".
|
|
216
|
+
`
|
|
217
|
+
```
|
|
218
|
+
|
|
179
219
|
## Error Handling
|
|
180
220
|
|
|
181
221
|
The server implements standardized error handling with specific codes:
|
|
@@ -242,4 +282,6 @@ MIT
|
|
|
242
282
|
|
|
243
283
|
## Author
|
|
244
284
|
|
|
245
|
-
[
|
|
285
|
+
Originally developed for [Raconte.ai](https://www.raconte.ai)
|
|
286
|
+
|
|
287
|
+
by [Lonestone](https://www.lonestone.io) ([GitHub](https://github.com/lonestone))
|
package/dist/index.d.mts
CHANGED
|
@@ -14,6 +14,9 @@ declare enum CallServerCommands {
|
|
|
14
14
|
interface CallConfig {
|
|
15
15
|
systemPrompt: string;
|
|
16
16
|
firstMessage?: string;
|
|
17
|
+
debugLog?: boolean;
|
|
18
|
+
debugSaveSpeech?: boolean;
|
|
19
|
+
disableTTS?: boolean;
|
|
17
20
|
generateAnswer(conversation: Conversation): Promise<string>;
|
|
18
21
|
speech2Text(blob: Blob, prompt?: string): Promise<string>;
|
|
19
22
|
text2Speech(text: string): Promise<ArrayBuffer>;
|
|
@@ -46,7 +49,7 @@ declare class CallSocket {
|
|
|
46
49
|
private onMessage;
|
|
47
50
|
private onStopSpeaking;
|
|
48
51
|
private answer;
|
|
49
|
-
private
|
|
52
|
+
private log;
|
|
50
53
|
}
|
|
51
54
|
|
|
52
55
|
declare enum CallErrorCode {
|
package/dist/index.d.ts
CHANGED
|
@@ -14,6 +14,9 @@ declare enum CallServerCommands {
|
|
|
14
14
|
interface CallConfig {
|
|
15
15
|
systemPrompt: string;
|
|
16
16
|
firstMessage?: string;
|
|
17
|
+
debugLog?: boolean;
|
|
18
|
+
debugSaveSpeech?: boolean;
|
|
19
|
+
disableTTS?: boolean;
|
|
17
20
|
generateAnswer(conversation: Conversation): Promise<string>;
|
|
18
21
|
speech2Text(blob: Blob, prompt?: string): Promise<string>;
|
|
19
22
|
text2Speech(text: string): Promise<ArrayBuffer>;
|
|
@@ -46,7 +49,7 @@ declare class CallSocket {
|
|
|
46
49
|
private onMessage;
|
|
47
50
|
private onStopSpeaking;
|
|
48
51
|
private answer;
|
|
49
|
-
private
|
|
52
|
+
private log;
|
|
50
53
|
}
|
|
51
54
|
|
|
52
55
|
declare enum CallErrorCode {
|
package/dist/index.js
CHANGED
|
@@ -27,7 +27,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
27
27
|
));
|
|
28
28
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
29
|
|
|
30
|
-
// index.ts
|
|
30
|
+
// src/index.ts
|
|
31
31
|
var index_exports = {};
|
|
32
32
|
__export(index_exports, {
|
|
33
33
|
CallClientCommands: () => CallClientCommands,
|
|
@@ -61,9 +61,6 @@ var CallServerCommands = /* @__PURE__ */ ((CallServerCommands2) => {
|
|
|
61
61
|
})(CallServerCommands || {});
|
|
62
62
|
|
|
63
63
|
// src/CallSocket.ts
|
|
64
|
-
var debugEnabled = false;
|
|
65
|
-
var debugSaveFile = false;
|
|
66
|
-
var disableTTS = false;
|
|
67
64
|
var END_INTERVIEW = "END_INTERVIEW";
|
|
68
65
|
var CallSocket = class {
|
|
69
66
|
constructor(socket, config) {
|
|
@@ -78,7 +75,7 @@ var CallSocket = class {
|
|
|
78
75
|
// Chunks of user speech since user started speaking
|
|
79
76
|
this.chunks = [];
|
|
80
77
|
this.conversation = [{ role: "system", content: config.systemPrompt }];
|
|
81
|
-
this.
|
|
78
|
+
this.log(`Call started`);
|
|
82
79
|
if (config.firstMessage) {
|
|
83
80
|
this.answer(config.firstMessage);
|
|
84
81
|
} else {
|
|
@@ -98,7 +95,7 @@ var CallSocket = class {
|
|
|
98
95
|
this.config.onMessage?.(message);
|
|
99
96
|
}
|
|
100
97
|
onClose() {
|
|
101
|
-
this.
|
|
98
|
+
this.log("Connection closed");
|
|
102
99
|
this.abortAnswer = true;
|
|
103
100
|
const duration = Math.round((Date.now() - this.startTime) / 1e3);
|
|
104
101
|
this.config.onEnd?.({
|
|
@@ -116,7 +113,7 @@ var CallSocket = class {
|
|
|
116
113
|
}
|
|
117
114
|
if (message.byteLength < 15) {
|
|
118
115
|
const cmd = message.toString();
|
|
119
|
-
this.
|
|
116
|
+
this.log(`Command: ${cmd}`);
|
|
120
117
|
if (cmd === "startSpeaking" /* StartSpeaking */) {
|
|
121
118
|
this.isSpeaking = true;
|
|
122
119
|
this.abortAnswer = true;
|
|
@@ -127,7 +124,7 @@ var CallSocket = class {
|
|
|
127
124
|
await this.onStopSpeaking();
|
|
128
125
|
}
|
|
129
126
|
} else if (Buffer.isBuffer(message) && this.isSpeaking) {
|
|
130
|
-
this.
|
|
127
|
+
this.log(`Received chunk (${message.byteLength} bytes)`);
|
|
131
128
|
this.chunks.push(message);
|
|
132
129
|
}
|
|
133
130
|
}
|
|
@@ -137,7 +134,7 @@ var CallSocket = class {
|
|
|
137
134
|
const blob = new Blob(this.chunks, { type: "audio/ogg" });
|
|
138
135
|
this.chunks.length = 0;
|
|
139
136
|
try {
|
|
140
|
-
if (
|
|
137
|
+
if (this.config.debugSaveSpeech) {
|
|
141
138
|
const filePath = path.join(__dirname, "speech.ogg");
|
|
142
139
|
fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()));
|
|
143
140
|
return;
|
|
@@ -147,18 +144,18 @@ var CallSocket = class {
|
|
|
147
144
|
this.conversation[this.conversation.length - 1]?.content
|
|
148
145
|
);
|
|
149
146
|
if (!transcript) {
|
|
150
|
-
this.
|
|
147
|
+
this.log("Ignoring empty transcript");
|
|
151
148
|
return;
|
|
152
149
|
}
|
|
153
|
-
this.
|
|
150
|
+
this.log("User transcript:", transcript);
|
|
154
151
|
this.addMessage({ role: "user", content: transcript });
|
|
155
152
|
if (this.abortAnswer) {
|
|
156
|
-
this.
|
|
153
|
+
this.log("Answer aborted, no answer generated");
|
|
157
154
|
return;
|
|
158
155
|
}
|
|
159
156
|
const answer = await this.config.generateAnswer(this.conversation);
|
|
160
157
|
if (this.abortAnswer) {
|
|
161
|
-
this.
|
|
158
|
+
this.log("Answer aborted, ignoring answer");
|
|
162
159
|
return;
|
|
163
160
|
}
|
|
164
161
|
await this.answer(answer);
|
|
@@ -174,12 +171,12 @@ var CallSocket = class {
|
|
|
174
171
|
isEnd = true;
|
|
175
172
|
}
|
|
176
173
|
if (message.length) {
|
|
177
|
-
this.
|
|
174
|
+
this.log("Assistant message:", message);
|
|
178
175
|
this.addMessage({ role: "assistant", content: message });
|
|
179
|
-
if (!disableTTS) {
|
|
176
|
+
if (!this.config.disableTTS) {
|
|
180
177
|
const audio = await this.config.text2Speech(message);
|
|
181
178
|
if (this.abortAnswer) {
|
|
182
|
-
this.
|
|
179
|
+
this.log("Answer aborted, removing last assistant message");
|
|
183
180
|
const lastMessage = this.conversation[this.conversation.length - 1];
|
|
184
181
|
if (lastMessage?.role === "assistant") {
|
|
185
182
|
this.conversation.pop();
|
|
@@ -187,23 +184,21 @@ var CallSocket = class {
|
|
|
187
184
|
}
|
|
188
185
|
return;
|
|
189
186
|
}
|
|
190
|
-
this.
|
|
187
|
+
this.log(`Send audio: (${audio.byteLength} bytes)`);
|
|
191
188
|
this.socket.send(audio);
|
|
192
189
|
}
|
|
193
190
|
}
|
|
194
191
|
if (isEnd) {
|
|
195
|
-
this.
|
|
192
|
+
this.log("Interview ended");
|
|
196
193
|
this.socket.send("endInterview" /* EndInterview */);
|
|
197
194
|
}
|
|
198
195
|
}
|
|
199
|
-
|
|
200
|
-
if (!
|
|
201
|
-
const
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
);
|
|
206
|
-
this.lastDebug = nowTime;
|
|
196
|
+
log(...message) {
|
|
197
|
+
if (!this.config.debugLog) return;
|
|
198
|
+
const now = Date.now();
|
|
199
|
+
const delta = now - this.lastDebug;
|
|
200
|
+
this.lastDebug = now;
|
|
201
|
+
console.log(`[Debug +${delta}ms]`, ...message);
|
|
207
202
|
}
|
|
208
203
|
};
|
|
209
204
|
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../index.ts","../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["export * from './src/CallSocket'\nexport * from './src/errors'\nexport * from './src/types'\nexport * from './src/waitForParams'\n","import * as fs from 'fs'\nimport * as path from 'path'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nconst debugEnabled = false\nconst debugSaveFile = false\nconst disableTTS = false\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(\n public socket: WebSocket,\n public config: CallConfig\n ) {\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.debug(`Call started`)\n\n // Assistant speaks first\n\n // LLM: Generate answer\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[WS]', error)\n this.socket.close()\n })\n }\n\n // Listen to events\n this.socket.on('close', this.onClose.bind(this))\n this.socket.on('message', this.onMessage.bind(this))\n }\n\n private addMessage(message: ConversationMessage) {\n this.conversation.push(message)\n this.socket.send(\n `${\n message.role === 'user'\n ? CallServerCommands.UserMessage\n : CallServerCommands.AssistantMessage\n } ${message.content}`\n )\n this.config.onMessage?.(message)\n }\n\n private onClose() {\n this.debug('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n // @ts-ignore\n this.socket = null\n // @ts-ignore\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[WS] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.debug(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.debug(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (debugSaveFile) {\n const filePath = path.join(__dirname, 'speech.ogg')\n fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()))\n return\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.debug('Ignoring empty transcript')\n return\n }\n\n this.debug('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.debug('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.debug('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[WS]', error)\n this.socket.close()\n }\n }\n\n private async answer(message: string) {\n let isEnd = false\n\n // Detect end of interview\n if (message.includes(END_INTERVIEW)) {\n message = message.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (message.length) {\n // Send answer to client\n this.debug('Assistant message:', message)\n this.addMessage({ role: 'assistant', content: message })\n\n // TTS: Generate answer audio\n if (!disableTTS) {\n const audio = await this.config.text2Speech(message)\n if (this.abortAnswer) {\n // Remove last assistant message if aborted\n this.debug('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket.send(CallServerCommands.CancelLastAssistantMessage)\n }\n return\n }\n\n // Send audio to client\n this.debug(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n }\n }\n\n // End of call\n if (isEnd) {\n this.debug('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private debug(...message: any[]) {\n if (!debugEnabled) return\n const nowTime = Date.now()\n console.log(\n `[WS] [${nowTime - this.startTime} | ${nowTime - this.lastDebug}ms]`,\n ...message\n )\n this.lastDebug = nowTime\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n UserMessage = 'userMessage',\n AssistantMessage = 'assistantMessage',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n generateAnswer(conversation: Conversation): Promise<string>\n speech2Text(blob: Blob, prompt?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage {\n role: 'system' | 'user' | 'assistant'\n content: string\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAoB;AACpB,WAAsB;;;ACDf,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,iBAAc;AACd,EAAAA,oBAAA,sBAAmB;AACnB,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADKZ,IAAM,eAAe;AACrB,IAAM,gBAAgB;AACtB,IAAM,aAAa;AAEZ,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAgBtB,YACS,QACA,QACP;AAFO;AACA;AAjBT,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAS1B,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,MAAM,cAAc;AAKzB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,QAAQ,KAAK;AAC3B,aAAK,OAAO,MAAM;AAAA,MACpB,CAAC;AAAA,IACL;AAGA,SAAK,OAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC/C,SAAK,OAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EACrD;AAAA,EAEQ,WAAW,SAA8B;AAC/C,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO;AAAA,MACV,GACE,QAAQ,SAAS,oFAGnB,IAAI,QAAQ,OAAO;AAAA,IACrB;AACA,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEQ,UAAU;AAChB,SAAK,MAAM,mBAAmB;AAC9B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAID,SAAK,SAAS;AAEd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,8BAA8B;AAC3C;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,MAAM,YAAY,GAAG,EAAE;AAE5B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,MAAM,mBAAmB,QAAQ,UAAU,SAAS;AACzD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAE7B,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,eAAe;AACjB,cAAM,WAAgB,UAAK,WAAW,YAAY;AAClD,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE;AAAA,MACF;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,MAAM,2BAA2B;AACtC;AAAA,MACF;AAEA,WAAK,MAAM,oBAAoB,UAAU;AAGzC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,MAAM,qCAAqC;AAChD;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,MAAM,iCAAiC;AAC5C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,QAAQ,KAAK;AAC3B,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAEA,MAAc,OAAO,SAAiB;AACpC,QAAI,QAAQ;AAGZ,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,MAAM,sBAAsB,OAAO;AACxC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,QAAQ,CAAC;AAGvD,UAAI,CAAC,YAAY;AACf,cAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AACnD,YAAI,KAAK,aAAa;AAEpB,eAAK,MAAM,iDAAiD;AAC5D,gBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,cAAI,aAAa,SAAS,aAAa;AACrC,iBAAK,aAAa,IAAI;AACtB,iBAAK,OAAO,kEAAkD;AAAA,UAChE;AACA;AAAA,QACF;AAGA,aAAK,MAAM,gBAAgB,MAAM,UAAU,SAAS;AACpD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,MAAM,iBAAiB;AAC5B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,SAAS,SAAgB;AAC/B,QAAI,CAAC,aAAc;AACnB,UAAM,UAAU,KAAK,IAAI;AACzB,YAAQ;AAAA,MACN,SAAS,UAAU,KAAK,SAAS,MAAM,UAAU,KAAK,SAAS;AAAA,MAC/D,GAAG;AAAA,IACL;AACA,SAAK,YAAY;AAAA,EACnB;AACF;;;AElOO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["export * from './CallSocket'\nexport * from './errors'\nexport * from './types'\nexport * from './waitForParams'\n","import * as fs from 'fs'\nimport * as path from 'path'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(\n public socket: WebSocket,\n public config: CallConfig\n ) {\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.log(`Call started`)\n\n // Assistant speaks first\n\n // LLM: Generate answer\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[WS]', error)\n this.socket.close()\n })\n }\n\n // Listen to events\n this.socket.on('close', this.onClose.bind(this))\n this.socket.on('message', this.onMessage.bind(this))\n }\n\n private addMessage(message: ConversationMessage) {\n this.conversation.push(message)\n this.socket.send(\n `${\n message.role === 'user'\n ? CallServerCommands.UserMessage\n : CallServerCommands.AssistantMessage\n } ${message.content}`\n )\n this.config.onMessage?.(message)\n }\n\n private onClose() {\n this.log('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n // @ts-ignore\n this.socket = null\n // @ts-ignore\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[WS] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.log(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.log(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (this.config.debugSaveSpeech) {\n const filePath = path.join(__dirname, 'speech.ogg')\n fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()))\n return\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.log('Ignoring empty transcript')\n return\n }\n\n this.log('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.log('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.log('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[WS]', error)\n this.socket.close()\n }\n }\n\n private async answer(message: string) {\n let isEnd = false\n\n // Detect end of interview\n if (message.includes(END_INTERVIEW)) {\n message = message.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (message.length) {\n // Send answer to client\n this.log('Assistant message:', message)\n this.addMessage({ role: 'assistant', content: message })\n\n // TTS: Generate answer audio\n if (!this.config.disableTTS) {\n const audio = await this.config.text2Speech(message)\n if (this.abortAnswer) {\n // Remove last assistant message if aborted\n this.log('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket.send(CallServerCommands.CancelLastAssistantMessage)\n }\n return\n }\n\n // Send audio to client\n this.log(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n }\n }\n\n // End of call\n if (isEnd) {\n this.log('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private log(...message: any[]) {\n if (!this.config.debugLog) return\n const now = Date.now()\n const delta = now - this.lastDebug\n this.lastDebug = now\n console.log(`[Debug +${delta}ms]`, ...message)\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n UserMessage = 'userMessage',\n AssistantMessage = 'assistantMessage',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n debugLog?: boolean\n debugSaveSpeech?: boolean\n disableTTS?: boolean\n generateAnswer(conversation: Conversation): Promise<string>\n speech2Text(blob: Blob, prompt?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage {\n role: 'system' | 'user' | 'assistant'\n content: string\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAoB;AACpB,WAAsB;;;ACDf,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,iBAAc;AACd,EAAAA,oBAAA,sBAAmB;AACnB,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADKL,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAgBtB,YACS,QACA,QACP;AAFO;AACA;AAjBT,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAS1B,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,IAAI,cAAc;AAKvB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,QAAQ,KAAK;AAC3B,aAAK,OAAO,MAAM;AAAA,MACpB,CAAC;AAAA,IACL;AAGA,SAAK,OAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC/C,SAAK,OAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EACrD;AAAA,EAEQ,WAAW,SAA8B;AAC/C,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO;AAAA,MACV,GACE,QAAQ,SAAS,oFAGnB,IAAI,QAAQ,OAAO;AAAA,IACrB;AACA,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEQ,UAAU;AAChB,SAAK,IAAI,mBAAmB;AAC5B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAID,SAAK,SAAS;AAEd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,8BAA8B;AAC3C;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,IAAI,YAAY,GAAG,EAAE;AAE1B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,IAAI,mBAAmB,QAAQ,UAAU,SAAS;AACvD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAE7B,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,KAAK,OAAO,iBAAiB;AAC/B,cAAM,WAAgB,UAAK,WAAW,YAAY;AAClD,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE;AAAA,MACF;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,IAAI,2BAA2B;AACpC;AAAA,MACF;AAEA,WAAK,IAAI,oBAAoB,UAAU;AAGvC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,qCAAqC;AAC9C;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,iCAAiC;AAC1C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,QAAQ,KAAK;AAC3B,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAEA,MAAc,OAAO,SAAiB;AACpC,QAAI,QAAQ;AAGZ,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,IAAI,sBAAsB,OAAO;AACtC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,QAAQ,CAAC;AAGvD,UAAI,CAAC,KAAK,OAAO,YAAY;AAC3B,cAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AACnD,YAAI,KAAK,aAAa;AAEpB,eAAK,IAAI,iDAAiD;AAC1D,gBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,cAAI,aAAa,SAAS,aAAa;AACrC,iBAAK,aAAa,IAAI;AACtB,iBAAK,OAAO,kEAAkD;AAAA,UAChE;AACA;AAAA,QACF;AAGA,aAAK,IAAI,gBAAgB,MAAM,UAAU,SAAS;AAClD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,IAAI,iBAAiB;AAC1B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,OAAO,SAAgB;AAC7B,QAAI,CAAC,KAAK,OAAO,SAAU;AAC3B,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,QAAQ,MAAM,KAAK;AACzB,SAAK,YAAY;AACjB,YAAQ,IAAI,WAAW,KAAK,OAAO,GAAG,OAAO;AAAA,EAC/C;AACF;;;AE5NO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
|
package/dist/index.mjs
CHANGED
|
@@ -18,9 +18,6 @@ var CallServerCommands = /* @__PURE__ */ ((CallServerCommands2) => {
|
|
|
18
18
|
})(CallServerCommands || {});
|
|
19
19
|
|
|
20
20
|
// src/CallSocket.ts
|
|
21
|
-
var debugEnabled = false;
|
|
22
|
-
var debugSaveFile = false;
|
|
23
|
-
var disableTTS = false;
|
|
24
21
|
var END_INTERVIEW = "END_INTERVIEW";
|
|
25
22
|
var CallSocket = class {
|
|
26
23
|
constructor(socket, config) {
|
|
@@ -35,7 +32,7 @@ var CallSocket = class {
|
|
|
35
32
|
// Chunks of user speech since user started speaking
|
|
36
33
|
this.chunks = [];
|
|
37
34
|
this.conversation = [{ role: "system", content: config.systemPrompt }];
|
|
38
|
-
this.
|
|
35
|
+
this.log(`Call started`);
|
|
39
36
|
if (config.firstMessage) {
|
|
40
37
|
this.answer(config.firstMessage);
|
|
41
38
|
} else {
|
|
@@ -55,7 +52,7 @@ var CallSocket = class {
|
|
|
55
52
|
this.config.onMessage?.(message);
|
|
56
53
|
}
|
|
57
54
|
onClose() {
|
|
58
|
-
this.
|
|
55
|
+
this.log("Connection closed");
|
|
59
56
|
this.abortAnswer = true;
|
|
60
57
|
const duration = Math.round((Date.now() - this.startTime) / 1e3);
|
|
61
58
|
this.config.onEnd?.({
|
|
@@ -73,7 +70,7 @@ var CallSocket = class {
|
|
|
73
70
|
}
|
|
74
71
|
if (message.byteLength < 15) {
|
|
75
72
|
const cmd = message.toString();
|
|
76
|
-
this.
|
|
73
|
+
this.log(`Command: ${cmd}`);
|
|
77
74
|
if (cmd === "startSpeaking" /* StartSpeaking */) {
|
|
78
75
|
this.isSpeaking = true;
|
|
79
76
|
this.abortAnswer = true;
|
|
@@ -84,7 +81,7 @@ var CallSocket = class {
|
|
|
84
81
|
await this.onStopSpeaking();
|
|
85
82
|
}
|
|
86
83
|
} else if (Buffer.isBuffer(message) && this.isSpeaking) {
|
|
87
|
-
this.
|
|
84
|
+
this.log(`Received chunk (${message.byteLength} bytes)`);
|
|
88
85
|
this.chunks.push(message);
|
|
89
86
|
}
|
|
90
87
|
}
|
|
@@ -94,7 +91,7 @@ var CallSocket = class {
|
|
|
94
91
|
const blob = new Blob(this.chunks, { type: "audio/ogg" });
|
|
95
92
|
this.chunks.length = 0;
|
|
96
93
|
try {
|
|
97
|
-
if (
|
|
94
|
+
if (this.config.debugSaveSpeech) {
|
|
98
95
|
const filePath = path.join(__dirname, "speech.ogg");
|
|
99
96
|
fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()));
|
|
100
97
|
return;
|
|
@@ -104,18 +101,18 @@ var CallSocket = class {
|
|
|
104
101
|
this.conversation[this.conversation.length - 1]?.content
|
|
105
102
|
);
|
|
106
103
|
if (!transcript) {
|
|
107
|
-
this.
|
|
104
|
+
this.log("Ignoring empty transcript");
|
|
108
105
|
return;
|
|
109
106
|
}
|
|
110
|
-
this.
|
|
107
|
+
this.log("User transcript:", transcript);
|
|
111
108
|
this.addMessage({ role: "user", content: transcript });
|
|
112
109
|
if (this.abortAnswer) {
|
|
113
|
-
this.
|
|
110
|
+
this.log("Answer aborted, no answer generated");
|
|
114
111
|
return;
|
|
115
112
|
}
|
|
116
113
|
const answer = await this.config.generateAnswer(this.conversation);
|
|
117
114
|
if (this.abortAnswer) {
|
|
118
|
-
this.
|
|
115
|
+
this.log("Answer aborted, ignoring answer");
|
|
119
116
|
return;
|
|
120
117
|
}
|
|
121
118
|
await this.answer(answer);
|
|
@@ -131,12 +128,12 @@ var CallSocket = class {
|
|
|
131
128
|
isEnd = true;
|
|
132
129
|
}
|
|
133
130
|
if (message.length) {
|
|
134
|
-
this.
|
|
131
|
+
this.log("Assistant message:", message);
|
|
135
132
|
this.addMessage({ role: "assistant", content: message });
|
|
136
|
-
if (!disableTTS) {
|
|
133
|
+
if (!this.config.disableTTS) {
|
|
137
134
|
const audio = await this.config.text2Speech(message);
|
|
138
135
|
if (this.abortAnswer) {
|
|
139
|
-
this.
|
|
136
|
+
this.log("Answer aborted, removing last assistant message");
|
|
140
137
|
const lastMessage = this.conversation[this.conversation.length - 1];
|
|
141
138
|
if (lastMessage?.role === "assistant") {
|
|
142
139
|
this.conversation.pop();
|
|
@@ -144,23 +141,21 @@ var CallSocket = class {
|
|
|
144
141
|
}
|
|
145
142
|
return;
|
|
146
143
|
}
|
|
147
|
-
this.
|
|
144
|
+
this.log(`Send audio: (${audio.byteLength} bytes)`);
|
|
148
145
|
this.socket.send(audio);
|
|
149
146
|
}
|
|
150
147
|
}
|
|
151
148
|
if (isEnd) {
|
|
152
|
-
this.
|
|
149
|
+
this.log("Interview ended");
|
|
153
150
|
this.socket.send("endInterview" /* EndInterview */);
|
|
154
151
|
}
|
|
155
152
|
}
|
|
156
|
-
|
|
157
|
-
if (!
|
|
158
|
-
const
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
);
|
|
163
|
-
this.lastDebug = nowTime;
|
|
153
|
+
log(...message) {
|
|
154
|
+
if (!this.config.debugLog) return;
|
|
155
|
+
const now = Date.now();
|
|
156
|
+
const delta = now - this.lastDebug;
|
|
157
|
+
this.lastDebug = now;
|
|
158
|
+
console.log(`[Debug +${delta}ms]`, ...message);
|
|
164
159
|
}
|
|
165
160
|
};
|
|
166
161
|
|
package/dist/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["import * as fs from 'fs'\nimport * as path from 'path'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nconst debugEnabled = false\nconst debugSaveFile = false\nconst disableTTS = false\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(\n public socket: WebSocket,\n public config: CallConfig\n ) {\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.debug(`Call started`)\n\n // Assistant speaks first\n\n // LLM: Generate answer\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[WS]', error)\n this.socket.close()\n })\n }\n\n // Listen to events\n this.socket.on('close', this.onClose.bind(this))\n this.socket.on('message', this.onMessage.bind(this))\n }\n\n private addMessage(message: ConversationMessage) {\n this.conversation.push(message)\n this.socket.send(\n `${\n message.role === 'user'\n ? CallServerCommands.UserMessage\n : CallServerCommands.AssistantMessage\n } ${message.content}`\n )\n this.config.onMessage?.(message)\n }\n\n private onClose() {\n this.debug('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n // @ts-ignore\n this.socket = null\n // @ts-ignore\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[WS] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.debug(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.debug(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (debugSaveFile) {\n const filePath = path.join(__dirname, 'speech.ogg')\n fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()))\n return\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.debug('Ignoring empty transcript')\n return\n }\n\n this.debug('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.debug('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.debug('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[WS]', error)\n this.socket.close()\n }\n }\n\n private async answer(message: string) {\n let isEnd = false\n\n // Detect end of interview\n if (message.includes(END_INTERVIEW)) {\n message = message.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (message.length) {\n // Send answer to client\n this.debug('Assistant message:', message)\n this.addMessage({ role: 'assistant', content: message })\n\n // TTS: Generate answer audio\n if (!disableTTS) {\n const audio = await this.config.text2Speech(message)\n if (this.abortAnswer) {\n // Remove last assistant message if aborted\n this.debug('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket.send(CallServerCommands.CancelLastAssistantMessage)\n }\n return\n }\n\n // Send audio to client\n this.debug(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n }\n }\n\n // End of call\n if (isEnd) {\n this.debug('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private debug(...message: any[]) {\n if (!debugEnabled) return\n const nowTime = Date.now()\n console.log(\n `[WS] [${nowTime - this.startTime} | ${nowTime - this.lastDebug}ms]`,\n ...message\n )\n this.lastDebug = nowTime\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n UserMessage = 'userMessage',\n AssistantMessage = 'assistantMessage',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n generateAnswer(conversation: Conversation): Promise<string>\n speech2Text(blob: Blob, prompt?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage {\n role: 'system' | 'user' | 'assistant'\n content: string\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";AAAA,YAAY,QAAQ;AACpB,YAAY,UAAU;;;ACDf,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,iBAAc;AACd,EAAAA,oBAAA,sBAAmB;AACnB,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADKZ,IAAM,eAAe;AACrB,IAAM,gBAAgB;AACtB,IAAM,aAAa;AAEZ,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAgBtB,YACS,QACA,QACP;AAFO;AACA;AAjBT,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAS1B,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,MAAM,cAAc;AAKzB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,QAAQ,KAAK;AAC3B,aAAK,OAAO,MAAM;AAAA,MACpB,CAAC;AAAA,IACL;AAGA,SAAK,OAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC/C,SAAK,OAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EACrD;AAAA,EAEQ,WAAW,SAA8B;AAC/C,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO;AAAA,MACV,GACE,QAAQ,SAAS,oFAGnB,IAAI,QAAQ,OAAO;AAAA,IACrB;AACA,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEQ,UAAU;AAChB,SAAK,MAAM,mBAAmB;AAC9B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAID,SAAK,SAAS;AAEd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,8BAA8B;AAC3C;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,MAAM,YAAY,GAAG,EAAE;AAE5B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,MAAM,mBAAmB,QAAQ,UAAU,SAAS;AACzD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAE7B,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,eAAe;AACjB,cAAM,WAAgB,UAAK,WAAW,YAAY;AAClD,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE;AAAA,MACF;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,MAAM,2BAA2B;AACtC;AAAA,MACF;AAEA,WAAK,MAAM,oBAAoB,UAAU;AAGzC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,MAAM,qCAAqC;AAChD;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,MAAM,iCAAiC;AAC5C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,QAAQ,KAAK;AAC3B,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAEA,MAAc,OAAO,SAAiB;AACpC,QAAI,QAAQ;AAGZ,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,MAAM,sBAAsB,OAAO;AACxC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,QAAQ,CAAC;AAGvD,UAAI,CAAC,YAAY;AACf,cAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AACnD,YAAI,KAAK,aAAa;AAEpB,eAAK,MAAM,iDAAiD;AAC5D,gBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,cAAI,aAAa,SAAS,aAAa;AACrC,iBAAK,aAAa,IAAI;AACtB,iBAAK,OAAO,kEAAkD;AAAA,UAChE;AACA;AAAA,QACF;AAGA,aAAK,MAAM,gBAAgB,MAAM,UAAU,SAAS;AACpD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,MAAM,iBAAiB;AAC5B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,SAAS,SAAgB;AAC/B,QAAI,CAAC,aAAc;AACnB,UAAM,UAAU,KAAK,IAAI;AACzB,YAAQ;AAAA,MACN,SAAS,UAAU,KAAK,SAAS,MAAM,UAAU,KAAK,SAAS;AAAA,MAC/D,GAAG;AAAA,IACL;AACA,SAAK,YAAY;AAAA,EACnB;AACF;;;AElOO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
|
|
1
|
+
{"version":3,"sources":["../src/CallSocket.ts","../src/types.ts","../src/errors.ts","../src/waitForParams.ts"],"sourcesContent":["import * as fs from 'fs'\nimport * as path from 'path'\nimport { WebSocket } from 'ws'\nimport {\n CallClientCommands,\n CallConfig,\n CallServerCommands,\n Conversation,\n ConversationMessage,\n} from './types'\n\nexport const END_INTERVIEW = 'END_INTERVIEW'\n\nexport class CallSocket {\n private startTime = Date.now()\n private lastDebug = Date.now()\n\n // An answer can be aborted if user is speaking\n private abortAnswer = false\n\n // When user is speaking, we're waiting to chunks or to stop\n private isSpeaking = false\n\n // Chunks of user speech since user started speaking\n private chunks: Buffer[] = []\n\n // Conversation history\n private conversation: Conversation\n\n constructor(\n public socket: WebSocket,\n public config: CallConfig\n ) {\n this.conversation = [{ role: 'system', content: config.systemPrompt }]\n this.log(`Call started`)\n\n // Assistant speaks first\n\n // LLM: Generate answer\n if (config.firstMessage) {\n this.answer(config.firstMessage)\n } else {\n this.config\n .generateAnswer(this.conversation)\n .then((answer) => this.answer(answer))\n .catch((error) => {\n console.error('[WS]', error)\n this.socket.close()\n })\n }\n\n // Listen to events\n this.socket.on('close', this.onClose.bind(this))\n this.socket.on('message', this.onMessage.bind(this))\n }\n\n private addMessage(message: ConversationMessage) {\n this.conversation.push(message)\n this.socket.send(\n `${\n message.role === 'user'\n ? CallServerCommands.UserMessage\n : CallServerCommands.AssistantMessage\n } ${message.content}`\n )\n this.config.onMessage?.(message)\n }\n\n private onClose() {\n this.log('Connection closed')\n this.abortAnswer = true\n const duration = Math.round((Date.now() - this.startTime) / 1000)\n\n // End call callback\n this.config.onEnd?.({\n conversation: this.conversation.slice(1), // Remove system message\n duration,\n })\n\n // Unset params\n // @ts-ignore\n this.socket = null\n // @ts-ignore\n this.config = null\n }\n\n private async onMessage(message: Buffer) {\n if (!Buffer.isBuffer(message)) {\n console.warn(`[WS] Message is not a buffer`)\n return\n }\n\n // Commands\n if (message.byteLength < 15) {\n const cmd = message.toString()\n this.log(`Command: ${cmd}`)\n\n if (cmd === CallClientCommands.StartSpeaking) {\n // User started speaking\n this.isSpeaking = true\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.Mute) {\n // User muted the call\n // Abort answer if there is generation in progress\n this.abortAnswer = true\n } else if (cmd === CallClientCommands.StopSpeaking) {\n // User stopped speaking\n this.isSpeaking = false\n await this.onStopSpeaking()\n }\n }\n\n // Audio chunk\n else if (Buffer.isBuffer(message) && this.isSpeaking) {\n this.log(`Received chunk (${message.byteLength} bytes)`)\n this.chunks.push(message)\n }\n }\n\n private async onStopSpeaking() {\n // Do nothing if there is no chunk\n if (this.chunks.length === 0) return\n\n this.abortAnswer = false\n\n // Combine audio blob\n const blob = new Blob(this.chunks, { type: 'audio/ogg' })\n\n // Reset chunks for next user speech\n this.chunks.length = 0\n\n try {\n // Save file to disk\n if (this.config.debugSaveSpeech) {\n const filePath = path.join(__dirname, 'speech.ogg')\n fs.writeFileSync(filePath, Buffer.from(await blob.arrayBuffer()))\n return\n }\n\n // STT: Get transcript and send to client\n const transcript = await this.config.speech2Text(\n blob,\n this.conversation[this.conversation.length - 1]?.content\n )\n if (!transcript) {\n this.log('Ignoring empty transcript')\n return\n }\n\n this.log('User transcript:', transcript)\n\n // Send transcript to client\n this.addMessage({ role: 'user', content: transcript })\n\n if (this.abortAnswer) {\n this.log('Answer aborted, no answer generated')\n return\n }\n\n // LLM: Generate answer\n const answer = await this.config.generateAnswer(this.conversation)\n if (this.abortAnswer) {\n this.log('Answer aborted, ignoring answer')\n return\n }\n\n await this.answer(answer)\n } catch (error) {\n console.error('[WS]', error)\n this.socket.close()\n }\n }\n\n private async answer(message: string) {\n let isEnd = false\n\n // Detect end of interview\n if (message.includes(END_INTERVIEW)) {\n message = message.replace(END_INTERVIEW, '').trim()\n isEnd = true\n }\n\n if (message.length) {\n // Send answer to client\n this.log('Assistant message:', message)\n this.addMessage({ role: 'assistant', content: message })\n\n // TTS: Generate answer audio\n if (!this.config.disableTTS) {\n const audio = await this.config.text2Speech(message)\n if (this.abortAnswer) {\n // Remove last assistant message if aborted\n this.log('Answer aborted, removing last assistant message')\n const lastMessage = this.conversation[this.conversation.length - 1]\n if (lastMessage?.role === 'assistant') {\n this.conversation.pop()\n this.socket.send(CallServerCommands.CancelLastAssistantMessage)\n }\n return\n }\n\n // Send audio to client\n this.log(`Send audio: (${audio.byteLength} bytes)`)\n this.socket.send(audio)\n }\n }\n\n // End of call\n if (isEnd) {\n this.log('Interview ended')\n this.socket.send(CallServerCommands.EndInterview)\n }\n }\n\n private log(...message: any[]) {\n if (!this.config.debugLog) return\n const now = Date.now()\n const delta = now - this.lastDebug\n this.lastDebug = now\n console.log(`[Debug +${delta}ms]`, ...message)\n }\n}\n","export enum CallClientCommands {\n StartSpeaking = 'startSpeaking',\n StopSpeaking = 'stopSpeaking',\n Mute = 'mute',\n}\n\nexport enum CallServerCommands {\n UserMessage = 'userMessage',\n AssistantMessage = 'assistantMessage',\n CancelLastAssistantMessage = 'cancelLastAssistantMessage',\n EndInterview = 'endInterview',\n}\n\nexport interface CallConfig {\n systemPrompt: string\n firstMessage?: string\n debugLog?: boolean\n debugSaveSpeech?: boolean\n disableTTS?: boolean\n generateAnswer(conversation: Conversation): Promise<string>\n speech2Text(blob: Blob, prompt?: string): Promise<string>\n text2Speech(text: string): Promise<ArrayBuffer>\n onMessage?(message: ConversationMessage): void\n onEnd?(call: CallSummary): void\n}\n\nexport interface CallSummary {\n conversation: Conversation\n duration: number\n}\n\nexport type Conversation = ConversationMessage[]\n\nexport interface ConversationMessage {\n role: 'system' | 'user' | 'assistant'\n content: string\n}\n","import WebSocket from 'ws'\n\nexport enum CallErrorCode {\n BadRequest = 4400,\n Unauthorized = 4401,\n NotFound = 4404,\n}\n\nexport class CallError extends Error {\n code: number\n\n constructor(code: number, message: string) {\n super(message)\n this.code = code\n }\n}\n\nexport function handleError(socket: WebSocket, error: unknown) {\n if (error instanceof CallError) {\n socket.close(error.code, error.message)\n } else {\n console.error(error)\n socket.close(1011)\n }\n socket.terminate()\n}\n","import { WebSocket } from 'ws'\nimport { CallError, CallErrorCode } from './errors'\n\nexport async function waitForParams<CallParams>(\n socket: WebSocket,\n validate: (params: any) => CallParams\n): Promise<CallParams> {\n return new Promise<CallParams>((resolve, reject) => {\n // Handle timeout\n const timeout = setTimeout(() => {\n reject(new CallError(CallErrorCode.BadRequest, 'Missing params'))\n }, 3000)\n\n const onParams = (payload: string) => {\n // Clear timeout and listener\n clearTimeout(timeout)\n socket.off('message', onParams)\n\n try {\n // Parse JSON payload\n const params = validate(JSON.parse(payload))\n resolve(params)\n } catch (error) {\n reject(new CallError(CallErrorCode.BadRequest, 'Invalid params'))\n }\n }\n\n // Listen for params\n socket.on('message', onParams)\n })\n}\n"],"mappings":";AAAA,YAAY,QAAQ;AACpB,YAAY,UAAU;;;ACDf,IAAK,qBAAL,kBAAKA,wBAAL;AACL,EAAAA,oBAAA,mBAAgB;AAChB,EAAAA,oBAAA,kBAAe;AACf,EAAAA,oBAAA,UAAO;AAHG,SAAAA;AAAA,GAAA;AAML,IAAK,qBAAL,kBAAKC,wBAAL;AACL,EAAAA,oBAAA,iBAAc;AACd,EAAAA,oBAAA,sBAAmB;AACnB,EAAAA,oBAAA,gCAA6B;AAC7B,EAAAA,oBAAA,kBAAe;AAJL,SAAAA;AAAA,GAAA;;;ADKL,IAAM,gBAAgB;AAEtB,IAAM,aAAN,MAAiB;AAAA,EAgBtB,YACS,QACA,QACP;AAFO;AACA;AAjBT,SAAQ,YAAY,KAAK,IAAI;AAC7B,SAAQ,YAAY,KAAK,IAAI;AAG7B;AAAA,SAAQ,cAAc;AAGtB;AAAA,SAAQ,aAAa;AAGrB;AAAA,SAAQ,SAAmB,CAAC;AAS1B,SAAK,eAAe,CAAC,EAAE,MAAM,UAAU,SAAS,OAAO,aAAa,CAAC;AACrE,SAAK,IAAI,cAAc;AAKvB,QAAI,OAAO,cAAc;AACvB,WAAK,OAAO,OAAO,YAAY;AAAA,IACjC,OAAO;AACL,WAAK,OACF,eAAe,KAAK,YAAY,EAChC,KAAK,CAAC,WAAW,KAAK,OAAO,MAAM,CAAC,EACpC,MAAM,CAAC,UAAU;AAChB,gBAAQ,MAAM,QAAQ,KAAK;AAC3B,aAAK,OAAO,MAAM;AAAA,MACpB,CAAC;AAAA,IACL;AAGA,SAAK,OAAO,GAAG,SAAS,KAAK,QAAQ,KAAK,IAAI,CAAC;AAC/C,SAAK,OAAO,GAAG,WAAW,KAAK,UAAU,KAAK,IAAI,CAAC;AAAA,EACrD;AAAA,EAEQ,WAAW,SAA8B;AAC/C,SAAK,aAAa,KAAK,OAAO;AAC9B,SAAK,OAAO;AAAA,MACV,GACE,QAAQ,SAAS,oFAGnB,IAAI,QAAQ,OAAO;AAAA,IACrB;AACA,SAAK,OAAO,YAAY,OAAO;AAAA,EACjC;AAAA,EAEQ,UAAU;AAChB,SAAK,IAAI,mBAAmB;AAC5B,SAAK,cAAc;AACnB,UAAM,WAAW,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,aAAa,GAAI;AAGhE,SAAK,OAAO,QAAQ;AAAA,MAClB,cAAc,KAAK,aAAa,MAAM,CAAC;AAAA;AAAA,MACvC;AAAA,IACF,CAAC;AAID,SAAK,SAAS;AAEd,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAc,UAAU,SAAiB;AACvC,QAAI,CAAC,OAAO,SAAS,OAAO,GAAG;AAC7B,cAAQ,KAAK,8BAA8B;AAC3C;AAAA,IACF;AAGA,QAAI,QAAQ,aAAa,IAAI;AAC3B,YAAM,MAAM,QAAQ,SAAS;AAC7B,WAAK,IAAI,YAAY,GAAG,EAAE;AAE1B,UAAI,6CAA0C;AAE5C,aAAK,aAAa;AAElB,aAAK,cAAc;AAAA,MACrB,WAAW,2BAAiC;AAG1C,aAAK,cAAc;AAAA,MACrB,WAAW,2CAAyC;AAElD,aAAK,aAAa;AAClB,cAAM,KAAK,eAAe;AAAA,MAC5B;AAAA,IACF,WAGS,OAAO,SAAS,OAAO,KAAK,KAAK,YAAY;AACpD,WAAK,IAAI,mBAAmB,QAAQ,UAAU,SAAS;AACvD,WAAK,OAAO,KAAK,OAAO;AAAA,IAC1B;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB;AAE7B,QAAI,KAAK,OAAO,WAAW,EAAG;AAE9B,SAAK,cAAc;AAGnB,UAAM,OAAO,IAAI,KAAK,KAAK,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGxD,SAAK,OAAO,SAAS;AAErB,QAAI;AAEF,UAAI,KAAK,OAAO,iBAAiB;AAC/B,cAAM,WAAgB,UAAK,WAAW,YAAY;AAClD,QAAG,iBAAc,UAAU,OAAO,KAAK,MAAM,KAAK,YAAY,CAAC,CAAC;AAChE;AAAA,MACF;AAGA,YAAM,aAAa,MAAM,KAAK,OAAO;AAAA,QACnC;AAAA,QACA,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC,GAAG;AAAA,MACnD;AACA,UAAI,CAAC,YAAY;AACf,aAAK,IAAI,2BAA2B;AACpC;AAAA,MACF;AAEA,WAAK,IAAI,oBAAoB,UAAU;AAGvC,WAAK,WAAW,EAAE,MAAM,QAAQ,SAAS,WAAW,CAAC;AAErD,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,qCAAqC;AAC9C;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,KAAK,OAAO,eAAe,KAAK,YAAY;AACjE,UAAI,KAAK,aAAa;AACpB,aAAK,IAAI,iCAAiC;AAC1C;AAAA,MACF;AAEA,YAAM,KAAK,OAAO,MAAM;AAAA,IAC1B,SAAS,OAAO;AACd,cAAQ,MAAM,QAAQ,KAAK;AAC3B,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAEA,MAAc,OAAO,SAAiB;AACpC,QAAI,QAAQ;AAGZ,QAAI,QAAQ,SAAS,aAAa,GAAG;AACnC,gBAAU,QAAQ,QAAQ,eAAe,EAAE,EAAE,KAAK;AAClD,cAAQ;AAAA,IACV;AAEA,QAAI,QAAQ,QAAQ;AAElB,WAAK,IAAI,sBAAsB,OAAO;AACtC,WAAK,WAAW,EAAE,MAAM,aAAa,SAAS,QAAQ,CAAC;AAGvD,UAAI,CAAC,KAAK,OAAO,YAAY;AAC3B,cAAM,QAAQ,MAAM,KAAK,OAAO,YAAY,OAAO;AACnD,YAAI,KAAK,aAAa;AAEpB,eAAK,IAAI,iDAAiD;AAC1D,gBAAM,cAAc,KAAK,aAAa,KAAK,aAAa,SAAS,CAAC;AAClE,cAAI,aAAa,SAAS,aAAa;AACrC,iBAAK,aAAa,IAAI;AACtB,iBAAK,OAAO,kEAAkD;AAAA,UAChE;AACA;AAAA,QACF;AAGA,aAAK,IAAI,gBAAgB,MAAM,UAAU,SAAS;AAClD,aAAK,OAAO,KAAK,KAAK;AAAA,MACxB;AAAA,IACF;AAGA,QAAI,OAAO;AACT,WAAK,IAAI,iBAAiB;AAC1B,WAAK,OAAO,sCAAoC;AAAA,IAClD;AAAA,EACF;AAAA,EAEQ,OAAO,SAAgB;AAC7B,QAAI,CAAC,KAAK,OAAO,SAAU;AAC3B,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,QAAQ,MAAM,KAAK;AACzB,SAAK,YAAY;AACjB,YAAQ,IAAI,WAAW,KAAK,OAAO,GAAG,OAAO;AAAA,EAC/C;AACF;;;AE5NO,IAAK,gBAAL,kBAAKC,mBAAL;AACL,EAAAA,8BAAA,gBAAa,QAAb;AACA,EAAAA,8BAAA,kBAAe,QAAf;AACA,EAAAA,8BAAA,cAAW,QAAX;AAHU,SAAAA;AAAA,GAAA;AAML,IAAM,YAAN,cAAwB,MAAM;AAAA,EAGnC,YAAY,MAAc,SAAiB;AACzC,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAEO,SAAS,YAAY,QAAmB,OAAgB;AAC7D,MAAI,iBAAiB,WAAW;AAC9B,WAAO,MAAM,MAAM,MAAM,MAAM,OAAO;AAAA,EACxC,OAAO;AACL,YAAQ,MAAM,KAAK;AACnB,WAAO,MAAM,IAAI;AAAA,EACnB;AACA,SAAO,UAAU;AACnB;;;ACtBA,eAAsB,cACpB,QACA,UACqB;AACrB,SAAO,IAAI,QAAoB,CAAC,SAAS,WAAW;AAElD,UAAM,UAAU,WAAW,MAAM;AAC/B,aAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,IAClE,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,YAAoB;AAEpC,mBAAa,OAAO;AACpB,aAAO,IAAI,WAAW,QAAQ;AAE9B,UAAI;AAEF,cAAM,SAAS,SAAS,KAAK,MAAM,OAAO,CAAC;AAC3C,gBAAQ,MAAM;AAAA,MAChB,SAAS,OAAO;AACd,eAAO,IAAI,iCAAoC,gBAAgB,CAAC;AAAA,MAClE;AAAA,IACF;AAGA,WAAO,GAAG,WAAW,QAAQ;AAAA,EAC/B,CAAC;AACH;","names":["CallClientCommands","CallServerCommands","CallErrorCode"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@micdrop/server",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.2",
|
|
4
4
|
"description": "A lib for Node.js that helps to use the mic and speaker for voice conversation",
|
|
5
5
|
"author": "Lonestone",
|
|
6
6
|
"license": "MIT",
|
|
@@ -33,13 +33,12 @@
|
|
|
33
33
|
"conversation"
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
|
-
"ws": "^8.18.
|
|
36
|
+
"ws": "^8.18.1"
|
|
37
37
|
},
|
|
38
38
|
"devDependencies": {
|
|
39
|
-
"@types/node": "^
|
|
39
|
+
"@types/node": "^22.13.4",
|
|
40
40
|
"@types/ws": "^8.5.14",
|
|
41
|
-
"
|
|
42
|
-
"
|
|
43
|
-
"typescript": "^5.0.0"
|
|
41
|
+
"tsup": "^8.3.6",
|
|
42
|
+
"typescript": "^5.7.3"
|
|
44
43
|
}
|
|
45
44
|
}
|