@minded-ai/mindedjs 1.0.52 → 1.0.53-patch-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/agent.d.ts +17 -9
  2. package/dist/agent.d.ts.map +1 -1
  3. package/dist/agent.js +75 -5
  4. package/dist/agent.js.map +1 -1
  5. package/dist/cli/index.js +0 -0
  6. package/dist/index.d.ts +3 -1
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/nodes/addPromptNode.d.ts.map +1 -1
  10. package/dist/nodes/addPromptNode.js +1 -0
  11. package/dist/nodes/addPromptNode.js.map +1 -1
  12. package/dist/platform/config.d.ts +3 -0
  13. package/dist/platform/config.d.ts.map +1 -1
  14. package/dist/platform/config.js +18 -1
  15. package/dist/platform/config.js.map +1 -1
  16. package/dist/platform/mindedConnection.d.ts +4 -4
  17. package/dist/platform/mindedConnection.d.ts.map +1 -1
  18. package/dist/platform/mindedConnection.js +8 -5
  19. package/dist/platform/mindedConnection.js.map +1 -1
  20. package/dist/platform/mindedConnectionTypes.d.ts +55 -2
  21. package/dist/platform/mindedConnectionTypes.d.ts.map +1 -1
  22. package/dist/platform/mindedConnectionTypes.js +11 -2
  23. package/dist/platform/mindedConnectionTypes.js.map +1 -1
  24. package/dist/types/Agent.types.d.ts +17 -0
  25. package/dist/types/Agent.types.d.ts.map +1 -1
  26. package/dist/types/Agent.types.js.map +1 -1
  27. package/dist/types/Flows.types.d.ts +12 -3
  28. package/dist/types/Flows.types.d.ts.map +1 -1
  29. package/dist/types/Flows.types.js +2 -0
  30. package/dist/types/Flows.types.js.map +1 -1
  31. package/dist/types/Voice.types.d.ts +5 -0
  32. package/dist/types/Voice.types.d.ts.map +1 -0
  33. package/dist/types/Voice.types.js +3 -0
  34. package/dist/types/Voice.types.js.map +1 -0
  35. package/dist/voice/elevenLabsUtils.d.ts +70 -0
  36. package/dist/voice/elevenLabsUtils.d.ts.map +1 -0
  37. package/dist/voice/elevenLabsUtils.js +20 -0
  38. package/dist/voice/elevenLabsUtils.js.map +1 -0
  39. package/dist/voice/voiceSession.d.ts +47 -0
  40. package/dist/voice/voiceSession.d.ts.map +1 -0
  41. package/dist/voice/voiceSession.js +208 -0
  42. package/dist/voice/voiceSession.js.map +1 -0
  43. package/package.json +5 -2
  44. package/src/agent.ts +91 -15
  45. package/src/index.ts +4 -0
  46. package/src/nodes/addPromptNode.ts +1 -0
  47. package/src/platform/config.ts +21 -1
  48. package/src/platform/mindedConnection.ts +15 -9
  49. package/src/platform/mindedConnectionTypes.ts +66 -2
  50. package/src/types/Agent.types.ts +18 -0
  51. package/src/types/Flows.types.ts +11 -1
  52. package/src/types/Voice.types.ts +4 -0
  53. package/src/voice/elevenLabsUtils.ts +101 -0
  54. package/src/voice/voiceSession.ts +251 -0
@@ -0,0 +1,251 @@
1
+ import { WebSocket } from 'ws';
2
+ import { Agent } from '../agent';
3
+ import {
4
+ MindedConnectionSocketMessageType,
5
+ } from '../platform/mindedConnectionTypes';
6
+ import { getConfig } from '../platform/config';
7
+ import {
8
+ ElevenLabsWebSocketEvent,
9
+ getSignedUrl,
10
+ } from './elevenLabsUtils';
11
+ import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
12
+
13
+ /**
14
+ * Voice Conversation class for managing individual ElevenLabs voice conversations
15
+ */
16
+ export class VoiceSession {
17
+ private agent: Agent;
18
+ private sessionId: string;
19
+ private firstMessage: string;
20
+ private voiceId?: string;
21
+ private elevenLabsSocket?: WebSocket;
22
+
23
+ private onAudioCallback?: (data: string) => void;
24
+ private onInterruptionCallback?: () => void;
25
+ private onMessageCallback?: (text: string, message: BaseMessage) => void;
26
+ private onDisconnectCallback?: () => void;
27
+
28
+ constructor({ agent, sessionId, firstMessage, voiceId }: { agent: Agent; sessionId: string; firstMessage: string; voiceId?: string }) {
29
+ console.debug('Starting voice session', { sessionId, firstMessage, voiceId });
30
+ this.agent = agent;
31
+ this.sessionId = sessionId;
32
+ this.firstMessage = firstMessage;
33
+ this.voiceId = voiceId;
34
+ }
35
+
36
+ /**
37
+ * Initialize the voice conversation connection
38
+ */
39
+ public async init(): Promise<void> {
40
+ const { elevenLabsKey, elevenLabsAgentId } = getConfig();
41
+
42
+ if (!elevenLabsKey) {
43
+ throw new Error('Missing ElevenLabs key - set ELEVEN_LABS_KEY env var');
44
+ }
45
+
46
+ if (!elevenLabsAgentId) {
47
+ throw new Error('Missing ElevenLabs agent id - set ELEVEN_LABS_AGENT_ID env var');
48
+ }
49
+
50
+ const signedUrl = await getSignedUrl({
51
+ agentId: elevenLabsAgentId,
52
+ apiKey: elevenLabsKey
53
+ });
54
+
55
+ this.elevenLabsSocket = new WebSocket(signedUrl);
56
+ this.setupSocketHandlers();
57
+ }
58
+
59
+ private async sendToElevenLabs(message: string): Promise<void> {
60
+ if (!this.elevenLabsSocket) {
61
+ throw new Error('Socket not initialized');
62
+ }
63
+ await this.waitForSocketOpen(this.elevenLabsSocket);
64
+ this.elevenLabsSocket.send(message);
65
+ }
66
+
67
+ private async waitForSocketOpen(socket: WebSocket): Promise<void> {
68
+ if (socket.readyState === WebSocket.OPEN) {
69
+ return;
70
+ }
71
+ return new Promise((resolve, reject) => {
72
+ const timeoutMs = 10000;
73
+ const intervalMs = 100;
74
+ let tries = 0;
75
+ const interval = setInterval(() => {
76
+ if (socket.readyState === WebSocket.OPEN) {
77
+ clearInterval(interval);
78
+ resolve();
79
+ }
80
+ tries++;
81
+ if (tries >= timeoutMs) {
82
+ clearInterval(interval);
83
+ reject(new Error('Socket not open'));
84
+ }
85
+ }, intervalMs);
86
+ });
87
+ }
88
+
89
+ private setupSocketHandlers(): void {
90
+ const socket = this.elevenLabsSocket!; // non-null assertion once, we ensured it's assigned in init()
91
+
92
+ socket.onopen = () => {
93
+ console.debug('Connected to voice provider');
94
+ const initiationData: ConversationInitiationClientData = {
95
+ type: 'conversation_initiation_client_data',
96
+ conversation_config_override: {
97
+ agent: {
98
+ first_message: this.firstMessage,
99
+ language: 'en',
100
+ },
101
+ ...(this.voiceId ? { tts: { voice_id: this.voiceId } } : {}),
102
+ } as ConversationInitiationClientData['conversation_config_override'],
103
+ };
104
+
105
+ this.sendToElevenLabs(JSON.stringify(initiationData));
106
+ this.sendToElevenLabs(JSON.stringify({ type: 'contextual_update', text: JSON.stringify({ sessionId: this.sessionId, mindedToken: getConfig().token! }) }));
107
+ };
108
+
109
+ socket.onclose = () => {
110
+ console.debug('Disconnected from voice provider');
111
+ this.onDisconnectCallback?.();
112
+ this.agent.voiceSessions.delete(this.sessionId);
113
+ };
114
+
115
+ socket.onerror = (err: unknown) => {
116
+ console.error('[ElevenLabsVoice] WebSocket error', err);
117
+ };
118
+
119
+ socket.onmessage = async (event: any) => {
120
+ const data: ElevenLabsWebSocketEvent = JSON.parse(event.data.toString());
121
+
122
+ switch (data.type) {
123
+ case 'ping':
124
+ setTimeout(() => {
125
+ this.sendToElevenLabs(JSON.stringify({ type: 'pong', event_id: data.ping_event.event_id }));
126
+ }, data.ping_event.ping_ms);
127
+ break;
128
+ case 'user_transcript':
129
+ console.debug('User transcript received', data.user_transcription_event.user_transcript);
130
+ if (this.onMessageCallback) {
131
+ this.onMessageCallback(data.user_transcription_event.user_transcript, new HumanMessage(data.user_transcription_event.user_transcript));
132
+ }
133
+ break;
134
+ case 'agent_response':
135
+ if (this.onMessageCallback) {
136
+ this.onMessageCallback(data.agent_response_event.agent_response, new AIMessage(data.agent_response_event.agent_response));
137
+ }
138
+ break;
139
+ case 'interruption':
140
+ console.debug('Interruption received');
141
+ this.onInterruptionCallback?.();
142
+ // Send interruption event to dashboard if connected
143
+ if (getConfig().dashboardConnected) {
144
+ try {
145
+ this.agent.mindedConnection?.emit(MindedConnectionSocketMessageType.DASHBOARD_VOICE_INTERRUPTION, {
146
+ type: MindedConnectionSocketMessageType.DASHBOARD_VOICE_INTERRUPTION,
147
+ sessionId: this.sessionId,
148
+ timestamp: Date.now(),
149
+ });
150
+ } catch (error) {
151
+ console.error('[ElevenLabsVoice] Error sending interruption to dashboard', error);
152
+ }
153
+ }
154
+ break;
155
+ case 'audio':
156
+ if (this.onAudioCallback) {
157
+ this.onAudioCallback(data.audio_event.audio_base_64);
158
+ }
159
+ if (getConfig().dashboardConnected) {
160
+ try {
161
+ await this.agent.mindedConnection?.awaitEmit(MindedConnectionSocketMessageType.DASHBOARD_VOICE_AGENT_AUDIO, {
162
+ sessionId: this.sessionId,
163
+ audioData: data.audio_event.audio_base_64,
164
+ });
165
+ } catch (error) {
166
+ console.error('[ElevenLabsVoice] Error sending audio to dashboard', error);
167
+ }
168
+ }
169
+ break;
170
+ case 'conversation_initiation_metadata':
171
+ console.debug('ElevenLabs conversation initiation metadata', data);
172
+ break;
173
+ case 'agent_response_correction':
174
+ try {
175
+ console.debug('Agent response correction received', data.agent_response_correction_event);
176
+ await this.updateAgentResponse(data.agent_response_correction_event.original_agent_response, data.agent_response_correction_event.corrected_agent_response);
177
+ } catch (error) {
178
+ console.error('[ElevenLabsVoice] Error updating agent response', error);
179
+ }
180
+ break;
181
+ default:
182
+ console.debug('Received unknown message from ElevenLabs', data);
183
+ break;
184
+ }
185
+ };
186
+ }
187
+
188
+ private async updateAgentResponse(originalAgentResponse: string, correctedAgentResponse: string): Promise<void> {
189
+ const graphState = await this.agent.compiledGraph.getState(this.agent.getLangraphConfig(this.sessionId));
190
+ const agentMessage = graphState.values.messages.find((message: BaseMessage) => message.content === originalAgentResponse && message instanceof AIMessage);
191
+ if (agentMessage) {
192
+ agentMessage.content = correctedAgentResponse;
193
+ const currentNodeId = graphState.tasks[graphState.tasks.length - 1].name;
194
+ await this.agent.compiledGraph.updateState(this.agent.getLangraphConfig(this.sessionId), {
195
+ messages: graphState.values.messages,
196
+ }, currentNodeId);
197
+ }
198
+ else {
199
+ console.warn('Agent message not found for correction', originalAgentResponse);
200
+ }
201
+ }
202
+
203
+ public hangup(): void {
204
+ this.onDisconnectCallback?.();
205
+ this.elevenLabsSocket?.close();
206
+ }
207
+
208
+ /**
209
+ * Set callback for audio data in base64 format
210
+ */
211
+ onAudio(callback: (data: string) => void): void {
212
+ this.onAudioCallback = callback;
213
+ }
214
+
215
+ /**
216
+ * Set callback for interruption events
217
+ */
218
+ onInterruption(callback: () => void): void {
219
+ this.onInterruptionCallback = callback;
220
+ }
221
+
222
+ /**
223
+ * Set callback for disconnect events
224
+ */
225
+ onDisconnect(callback: () => void): void {
226
+ this.onDisconnectCallback = callback;
227
+ }
228
+
229
+ /**
230
+ * User audio in base64 format
231
+ */
232
+ sendAudio(audioData: string): void {
233
+ this.sendToElevenLabs(JSON.stringify({ user_audio_chunk: audioData }));
234
+ }
235
+ }
236
+
237
+ // ----------------- Helper Types -----------------
238
+
239
+ /** Shape of the message sent to ElevenLabs when starting a conversation */
240
+ interface ConversationInitiationClientData {
241
+ type: 'conversation_initiation_client_data';
242
+ conversation_config_override: {
243
+ agent: {
244
+ first_message: string;
245
+ language: string;
246
+ };
247
+ tts?: {
248
+ voice_id: string;
249
+ };
250
+ };
251
+ }