@minded-ai/mindedjs 1.0.53-alpha-latest → 1.0.53-patch-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/agent.d.ts +17 -9
  2. package/dist/agent.d.ts.map +1 -1
  3. package/dist/agent.js +75 -5
  4. package/dist/agent.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js.map +1 -1
  8. package/dist/nodes/addPromptNode.d.ts.map +1 -1
  9. package/dist/nodes/addPromptNode.js +1 -0
  10. package/dist/nodes/addPromptNode.js.map +1 -1
  11. package/dist/platform/config.d.ts +3 -0
  12. package/dist/platform/config.d.ts.map +1 -1
  13. package/dist/platform/config.js +18 -1
  14. package/dist/platform/config.js.map +1 -1
  15. package/dist/platform/mindedConnection.d.ts +4 -4
  16. package/dist/platform/mindedConnection.d.ts.map +1 -1
  17. package/dist/platform/mindedConnection.js +8 -5
  18. package/dist/platform/mindedConnection.js.map +1 -1
  19. package/dist/platform/mindedConnectionTypes.d.ts +55 -2
  20. package/dist/platform/mindedConnectionTypes.d.ts.map +1 -1
  21. package/dist/platform/mindedConnectionTypes.js +11 -2
  22. package/dist/platform/mindedConnectionTypes.js.map +1 -1
  23. package/dist/types/Agent.types.d.ts +17 -0
  24. package/dist/types/Agent.types.d.ts.map +1 -1
  25. package/dist/types/Agent.types.js.map +1 -1
  26. package/dist/types/Flows.types.d.ts +10 -2
  27. package/dist/types/Flows.types.d.ts.map +1 -1
  28. package/dist/types/Flows.types.js +1 -0
  29. package/dist/types/Flows.types.js.map +1 -1
  30. package/package.json +4 -1
  31. package/src/agent.ts +91 -15
  32. package/src/index.ts +4 -0
  33. package/src/nodes/addPromptNode.ts +1 -0
  34. package/src/platform/config.ts +21 -1
  35. package/src/platform/mindedConnection.ts +15 -9
  36. package/src/platform/mindedConnectionTypes.ts +66 -2
  37. package/src/types/Agent.types.ts +18 -0
  38. package/src/types/Flows.types.ts +10 -1
  39. package/src/types/Voice.types.ts +4 -0
  40. package/src/voice/elevenLabsUtils.ts +101 -0
  41. package/src/voice/voiceSession.ts +251 -0
@@ -0,0 +1,101 @@
1
+ /** ElevenLabs LLM proxy request body shape */
2
+ export interface LLMRequestBody {
3
+ messages: Array<{
4
+ content: string;
5
+ role: string;
6
+ tool_call_id?: string;
7
+ }>;
8
+ model: string;
9
+ max_tokens: number;
10
+ stream: boolean;
11
+ temperature: number;
12
+ tools: null;
13
+ }
14
+
15
+ // ----------------- ElevenLabs WebSocket event types -----------------
16
+
17
+ interface BaseEvent {
18
+ type: string;
19
+ }
20
+
21
+ interface UserTranscriptEvent extends BaseEvent {
22
+ type: 'user_transcript';
23
+ user_transcription_event: {
24
+ user_transcript: string;
25
+ };
26
+ }
27
+
28
+ interface AgentResponseEvent extends BaseEvent {
29
+ type: 'agent_response';
30
+ agent_response_event: {
31
+ agent_response: string;
32
+ };
33
+ }
34
+
35
+ interface AudioResponseEvent extends BaseEvent {
36
+ type: 'audio';
37
+ audio_event: {
38
+ audio_base_64: string;
39
+ event_id: number;
40
+ };
41
+ }
42
+
43
+ interface InterruptionEvent extends BaseEvent {
44
+ type: 'interruption';
45
+ interruption_event: {
46
+ event_id: number;
47
+ };
48
+ }
49
+
50
+ interface PingEvent extends BaseEvent {
51
+ type: 'ping';
52
+ ping_event: {
53
+ event_id: number;
54
+ ping_ms: number;
55
+ };
56
+ }
57
+
58
+ interface ConversationInitiationMetadataEvent extends BaseEvent {
59
+ type: 'conversation_initiation_metadata';
60
+ conversation_initiation_metadata_event: {
61
+ conversation_id: string;
62
+ };
63
+ }
64
+
65
+ export interface AgentResponseCorrectionEvent extends BaseEvent {
66
+ type: 'agent_response_correction';
67
+ agent_response_correction_event: {
68
+ corrected_agent_response: string;
69
+ original_agent_response: string;
70
+ };
71
+ }
72
+
73
+ /** Union of all possible WebSocket events we care about */
74
+ export type ElevenLabsWebSocketEvent =
75
+ | UserTranscriptEvent
76
+ | AgentResponseEvent
77
+ | AudioResponseEvent
78
+ | InterruptionEvent
79
+ | PingEvent
80
+ | ConversationInitiationMetadataEvent
81
+ | AgentResponseCorrectionEvent;
82
+
83
+ // ----------------- ElevenLabs Utility Functions -----------------
84
+
85
+ /** Fetch signed URL from ElevenLabs API */
86
+ export const getSignedUrl = async ({ agentId, apiKey }: { agentId: string; apiKey: string }): Promise<string> => {
87
+ const response = await fetch(
88
+ `https://api.elevenlabs.io/v1/convai/conversation/get_signed_url?agent_id=${agentId}`,
89
+ {
90
+ method: 'GET',
91
+ headers: {
92
+ 'xi-api-key': apiKey,
93
+ },
94
+ },
95
+ );
96
+ if (!response.ok) {
97
+ throw new Error(`Failed to fetch signed url - status ${response.status}`);
98
+ }
99
+ const body = await response.json();
100
+ return body.signed_url;
101
+ };
@@ -0,0 +1,251 @@
1
+ import { WebSocket } from 'ws';
2
+ import { Agent } from '../agent';
3
+ import {
4
+ MindedConnectionSocketMessageType,
5
+ } from '../platform/mindedConnectionTypes';
6
+ import { getConfig } from '../platform/config';
7
+ import {
8
+ ElevenLabsWebSocketEvent,
9
+ getSignedUrl,
10
+ } from './elevenLabsUtils';
11
+ import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
12
+
13
+ /**
14
+ * Voice Conversation class for managing individual ElevenLabs voice conversations
15
+ */
16
+ export class VoiceSession {
17
+ private agent: Agent;
18
+ private sessionId: string;
19
+ private firstMessage: string;
20
+ private voiceId?: string;
21
+ private elevenLabsSocket?: WebSocket;
22
+
23
+ private onAudioCallback?: (data: string) => void;
24
+ private onInterruptionCallback?: () => void;
25
+ private onMessageCallback?: (text: string, message: BaseMessage) => void;
26
+ private onDisconnectCallback?: () => void;
27
+
28
+ constructor({ agent, sessionId, firstMessage, voiceId }: { agent: Agent; sessionId: string; firstMessage: string; voiceId?: string }) {
29
+ console.debug('Starting voice session', { sessionId, firstMessage, voiceId });
30
+ this.agent = agent;
31
+ this.sessionId = sessionId;
32
+ this.firstMessage = firstMessage;
33
+ this.voiceId = voiceId;
34
+ }
35
+
36
+ /**
37
+ * Initialize the voice conversation connection
38
+ */
39
+ public async init(): Promise<void> {
40
+ const { elevenLabsKey, elevenLabsAgentId } = getConfig();
41
+
42
+ if (!elevenLabsKey) {
43
+ throw new Error('Missing ElevenLabs key - set ELEVEN_LABS_KEY env var');
44
+ }
45
+
46
+ if (!elevenLabsAgentId) {
47
+ throw new Error('Missing ElevenLabs agent id - set ELEVEN_LABS_AGENT_ID env var');
48
+ }
49
+
50
+ const signedUrl = await getSignedUrl({
51
+ agentId: elevenLabsAgentId,
52
+ apiKey: elevenLabsKey
53
+ });
54
+
55
+ this.elevenLabsSocket = new WebSocket(signedUrl);
56
+ this.setupSocketHandlers();
57
+ }
58
+
59
+ private async sendToElevenLabs(message: string): Promise<void> {
60
+ if (!this.elevenLabsSocket) {
61
+ throw new Error('Socket not initialized');
62
+ }
63
+ await this.waitForSocketOpen(this.elevenLabsSocket);
64
+ this.elevenLabsSocket.send(message);
65
+ }
66
+
67
+ private async waitForSocketOpen(socket: WebSocket): Promise<void> {
68
+ if (socket.readyState === WebSocket.OPEN) {
69
+ return;
70
+ }
71
+ return new Promise((resolve, reject) => {
72
+ const timeoutMs = 10000;
73
+ const intervalMs = 100;
74
+ let tries = 0;
75
+ const interval = setInterval(() => {
76
+ if (socket.readyState === WebSocket.OPEN) {
77
+ clearInterval(interval);
78
+ resolve();
79
+ }
80
+ tries++;
81
+ if (tries >= timeoutMs) {
82
+ clearInterval(interval);
83
+ reject(new Error('Socket not open'));
84
+ }
85
+ }, intervalMs);
86
+ });
87
+ }
88
+
89
+ private setupSocketHandlers(): void {
90
+ const socket = this.elevenLabsSocket!; // non-null assertion once, we ensured it's assigned in init()
91
+
92
+ socket.onopen = () => {
93
+ console.debug('Connected to voice provider');
94
+ const initiationData: ConversationInitiationClientData = {
95
+ type: 'conversation_initiation_client_data',
96
+ conversation_config_override: {
97
+ agent: {
98
+ first_message: this.firstMessage,
99
+ language: 'en',
100
+ },
101
+ ...(this.voiceId ? { tts: { voice_id: this.voiceId } } : {}),
102
+ } as ConversationInitiationClientData['conversation_config_override'],
103
+ };
104
+
105
+ this.sendToElevenLabs(JSON.stringify(initiationData));
106
+ this.sendToElevenLabs(JSON.stringify({ type: 'contextual_update', text: JSON.stringify({ sessionId: this.sessionId, mindedToken: getConfig().token! }) }));
107
+ };
108
+
109
+ socket.onclose = () => {
110
+ console.debug('Disconnected from voice provider');
111
+ this.onDisconnectCallback?.();
112
+ this.agent.voiceSessions.delete(this.sessionId);
113
+ };
114
+
115
+ socket.onerror = (err: unknown) => {
116
+ console.error('[ElevenLabsVoice] WebSocket error', err);
117
+ };
118
+
119
+ socket.onmessage = async (event: any) => {
120
+ const data: ElevenLabsWebSocketEvent = JSON.parse(event.data.toString());
121
+
122
+ switch (data.type) {
123
+ case 'ping':
124
+ setTimeout(() => {
125
+ this.sendToElevenLabs(JSON.stringify({ type: 'pong', event_id: data.ping_event.event_id }));
126
+ }, data.ping_event.ping_ms);
127
+ break;
128
+ case 'user_transcript':
129
+ console.debug('User transcript received', data.user_transcription_event.user_transcript);
130
+ if (this.onMessageCallback) {
131
+ this.onMessageCallback(data.user_transcription_event.user_transcript, new HumanMessage(data.user_transcription_event.user_transcript));
132
+ }
133
+ break;
134
+ case 'agent_response':
135
+ if (this.onMessageCallback) {
136
+ this.onMessageCallback(data.agent_response_event.agent_response, new AIMessage(data.agent_response_event.agent_response));
137
+ }
138
+ break;
139
+ case 'interruption':
140
+ console.debug('Interruption received');
141
+ this.onInterruptionCallback?.();
142
+ // Send interruption event to dashboard if connected
143
+ if (getConfig().dashboardConnected) {
144
+ try {
145
+ this.agent.mindedConnection?.emit(MindedConnectionSocketMessageType.DASHBOARD_VOICE_INTERRUPTION, {
146
+ type: MindedConnectionSocketMessageType.DASHBOARD_VOICE_INTERRUPTION,
147
+ sessionId: this.sessionId,
148
+ timestamp: Date.now(),
149
+ });
150
+ } catch (error) {
151
+ console.error('[ElevenLabsVoice] Error sending interruption to dashboard', error);
152
+ }
153
+ }
154
+ break;
155
+ case 'audio':
156
+ if (this.onAudioCallback) {
157
+ this.onAudioCallback(data.audio_event.audio_base_64);
158
+ }
159
+ if (getConfig().dashboardConnected) {
160
+ try {
161
+ await this.agent.mindedConnection?.awaitEmit(MindedConnectionSocketMessageType.DASHBOARD_VOICE_AGENT_AUDIO, {
162
+ sessionId: this.sessionId,
163
+ audioData: data.audio_event.audio_base_64,
164
+ });
165
+ } catch (error) {
166
+ console.error('[ElevenLabsVoice] Error sending audio to dashboard', error);
167
+ }
168
+ }
169
+ break;
170
+ case 'conversation_initiation_metadata':
171
+ console.debug('ElevenLabs conversation initiation metadata', data);
172
+ break;
173
+ case 'agent_response_correction':
174
+ try {
175
+ console.debug('Agent response correction received', data.agent_response_correction_event);
176
+ await this.updateAgentResponse(data.agent_response_correction_event.original_agent_response, data.agent_response_correction_event.corrected_agent_response);
177
+ } catch (error) {
178
+ console.error('[ElevenLabsVoice] Error updating agent response', error);
179
+ }
180
+ break;
181
+ default:
182
+ console.debug('Received unknown message from ElevenLabs', data);
183
+ break;
184
+ }
185
+ };
186
+ }
187
+
188
+ private async updateAgentResponse(originalAgentResponse: string, correctedAgentResponse: string): Promise<void> {
189
+ const graphState = await this.agent.compiledGraph.getState(this.agent.getLangraphConfig(this.sessionId));
190
+ const agentMessage = graphState.values.messages.find((message: BaseMessage) => message.content === originalAgentResponse && message instanceof AIMessage);
191
+ if (agentMessage) {
192
+ agentMessage.content = correctedAgentResponse;
193
+ const currentNodeId = graphState.tasks[graphState.tasks.length - 1].name;
194
+ await this.agent.compiledGraph.updateState(this.agent.getLangraphConfig(this.sessionId), {
195
+ messages: graphState.values.messages,
196
+ }, currentNodeId);
197
+ }
198
+ else {
199
+ console.warn('Agent message not found for correction', originalAgentResponse);
200
+ }
201
+ }
202
+
203
+ public hangup(): void {
204
+ this.onDisconnectCallback?.();
205
+ this.elevenLabsSocket?.close();
206
+ }
207
+
208
+ /**
209
+ * Set callback for audio data in base64 format
210
+ */
211
+ onAudio(callback: (data: string) => void): void {
212
+ this.onAudioCallback = callback;
213
+ }
214
+
215
+ /**
216
+ * Set callback for interruption events
217
+ */
218
+ onInterruption(callback: () => void): void {
219
+ this.onInterruptionCallback = callback;
220
+ }
221
+
222
+ /**
223
+ * Set callback for disconnect events
224
+ */
225
+ onDisconnect(callback: () => void): void {
226
+ this.onDisconnectCallback = callback;
227
+ }
228
+
229
+ /**
230
+ * User audio in base64 format
231
+ */
232
+ sendAudio(audioData: string): void {
233
+ this.sendToElevenLabs(JSON.stringify({ user_audio_chunk: audioData }));
234
+ }
235
+ }
236
+
237
+ // ----------------- Helper Types -----------------
238
+
239
+ /** Shape of the message sent to ElevenLabs when starting a conversation */
240
+ interface ConversationInitiationClientData {
241
+ type: 'conversation_initiation_client_data';
242
+ conversation_config_override: {
243
+ agent: {
244
+ first_message: string;
245
+ language: string;
246
+ };
247
+ tts?: {
248
+ voice_id: string;
249
+ };
250
+ };
251
+ }