@hamsa-ai/voice-agents-sdk 0.4.0-beta.1 → 0.4.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,527 @@
1
+ /**
2
+ * LiveKitManager - Main orchestrator for voice agent communication
3
+ *
4
+ * This class serves as the primary interface for managing real-time voice communication
5
+ * with AI agents using LiveKit WebRTC infrastructure. It coordinates four specialized
6
+ * modules to provide a comprehensive voice agent SDK:
7
+ *
8
+ * - Connection Management: Handles room connections, participants, and network state
9
+ * - Analytics Engine: Processes WebRTC statistics and performance metrics
10
+ * - Audio Management: Manages audio tracks, volume control, and quality monitoring
11
+ * - Tool Registry: Handles RPC method registration and client-side tool execution
12
+ *
13
+ * Key features:
14
+ * - Real-time audio streaming with automatic quality adjustment
15
+ * - Comprehensive analytics and monitoring capabilities
16
+ * - Client-side tool integration for extended agent functionality
17
+ * - Automatic reconnection and error recovery
18
+ * - Event-driven architecture for reactive applications
19
+ *
20
+ * @example
21
+ * ```typescript
22
+ * const manager = new LiveKitManager(
23
+ * 'wss://livekit.example.com',
24
+ * 'access_token',
25
+ * [customTool1, customTool2]
26
+ * );
27
+ *
28
+ * manager.on('connected', () => console.log('Connected to voice agent'));
29
+ * manager.on('answerReceived', (text) => console.log('Agent said:', text));
30
+ *
31
+ * await manager.connect();
32
+ * ```
33
+ */
34
+ import { EventEmitter } from 'events';
35
+ import type { Room } from 'livekit-client';
36
+ import { LiveKitAnalytics } from './livekit-analytics';
37
+ import { LiveKitAudioManager } from './livekit-audio-manager';
38
+ import { LiveKitConnection } from './livekit-connection';
39
+ import { LiveKitToolRegistry } from './livekit-tool-registry';
40
+ import type { AudioLevelsResult, CallAnalyticsResult, ConnectionStatsResult, ParticipantData, PerformanceMetricsResult, Tool, TrackStatsResult } from './types';
41
+ export type { AudioLevelsResult, CallAnalyticsResult, ConnectionStatsResult, ParticipantData, PerformanceMetricsResult, TrackStatsData, TrackStatsResult, } from './types';
42
+ /**
43
+ * Main LiveKitManager class that orchestrates voice agent communication
44
+ *
45
+ * This class extends EventEmitter to provide a reactive interface for handling
46
+ * voice agent interactions, real-time analytics, and WebRTC connection management.
47
+ */
48
+ export default class LiveKitManager extends EventEmitter {
49
+ #private;
50
+ /** Connection module - manages LiveKit room connections and participants */
51
+ connection: LiveKitConnection;
52
+ /** Analytics module - processes WebRTC stats and performance metrics */
53
+ analytics: LiveKitAnalytics;
54
+ /** Audio module - manages audio tracks, volume, and quality */
55
+ audioManager: LiveKitAudioManager;
56
+ /** Tool registry - handles client-side tool registration and RPC calls */
57
+ toolRegistry: LiveKitToolRegistry;
58
+ /** LiveKit WebSocket URL for room connection */
59
+ lkUrl: string;
60
+ /** JWT access token for authentication */
61
+ accessToken: string;
62
+ /**
63
+ * Creates a new LiveKitManager instance
64
+ *
65
+ * @param lkUrl - LiveKit WebSocket URL (e.g., 'wss://your-livekit.example.com')
66
+ * @param accessToken - JWT token for room access authentication
67
+ * @param tools - Array of client-side tools that agents can call during conversations
68
+ *
69
+ * @example
70
+ * ```typescript
71
+ * const customTool = {
72
+ * function_name: "getUserData",
73
+ * description: "Retrieves user information",
74
+ * parameters: [{ name: "userId", type: "string", description: "User ID" }],
75
+ * required: ["userId"],
76
+ * fn: async (userId: string) => ({ name: "John", email: "john@example.com" })
77
+ * };
78
+ *
79
+ * const manager = new LiveKitManager(
80
+ * 'wss://livekit.example.com',
81
+ * 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...',
82
+ * [customTool]
83
+ * );
84
+ * ```
85
+ */
86
+ constructor(lkUrl: string, accessToken: string, tools?: Tool[]);
87
+ /**
88
+ * Establishes connection to the LiveKit room and initializes voice agent communication
89
+ *
90
+ * This method performs the following operations:
91
+ * - Validates connection state to prevent duplicate connections
92
+ * - Updates analytics tracking for connection attempts
93
+ * - Establishes WebRTC connection to the LiveKit room
94
+ * - Triggers module initialization once connected
95
+ *
96
+ * @throws {Error} Connection errors from LiveKit (network issues, authentication failures, etc.)
97
+ *
98
+ * @example
99
+ * ```typescript
100
+ * try {
101
+ * await manager.connect();
102
+ * console.log('Successfully connected to voice agent');
103
+ * } catch (error) {
104
+ * console.error('Failed to connect:', error.message);
105
+ * }
106
+ * ```
107
+ */
108
+ connect(): Promise<void>;
109
+ /**
110
+ * Terminates the connection to the LiveKit room and performs cleanup
111
+ *
112
+ * This method safely disconnects from the voice agent and ensures all resources
113
+ * are properly released, including audio tracks, analytics timers, and event listeners.
114
+ *
115
+ * @example
116
+ * ```typescript
117
+ * await manager.disconnect();
118
+ * console.log('Disconnected from voice agent');
119
+ * ```
120
+ */
121
+ disconnect(): Promise<void>;
122
+ /**
123
+ * Pauses the voice conversation, stopping audio transmission and reception
124
+ *
125
+ * This temporarily halts communication with the voice agent while maintaining
126
+ * the underlying connection. Audio playback is paused and microphone input
127
+ * is muted until resume() is called.
128
+ *
129
+ * @example
130
+ * ```typescript
131
+ * manager.pause();
132
+ * console.log('Conversation paused');
133
+ *
134
+ * // Resume later
135
+ * setTimeout(() => manager.resume(), 5000);
136
+ * ```
137
+ */
138
+ pause(): void;
139
+ /**
140
+ * Resumes a paused voice conversation
141
+ *
142
+ * Restores audio transmission and reception, allowing continued communication
143
+ * with the voice agent. This reverses the effects of pause().
144
+ *
145
+ * @example
146
+ * ```typescript
147
+ * manager.resume();
148
+ * console.log('Conversation resumed');
149
+ * ```
150
+ */
151
+ resume(): void;
152
+ /**
153
+ * Adjusts the volume level for audio playback from the voice agent
154
+ *
155
+ * @param volume - Volume level between 0.0 (muted) and 1.0 (full volume)
156
+ *
157
+ * @example
158
+ * ```typescript
159
+ * // Set to half volume
160
+ * manager.setVolume(0.5);
161
+ *
162
+ * // Mute completely
163
+ * manager.setVolume(0);
164
+ *
165
+ * // Full volume
166
+ * manager.setVolume(1.0);
167
+ * ```
168
+ */
169
+ setVolume(volume: number): void;
170
+ /**
171
+ * Gets the current LiveKit room instance
172
+ *
173
+ * @returns The LiveKit Room object if connected, null otherwise
174
+ *
175
+ * @example
176
+ * ```typescript
177
+ * const room = manager.room;
178
+ * if (room) {
179
+ * console.log('Connected to room:', room.name);
180
+ * console.log('Participants:', room.remoteParticipants.size);
181
+ * }
182
+ * ```
183
+ */
184
+ get room(): Room | null;
185
+ /**
186
+ * Checks if currently connected to the voice agent
187
+ *
188
+ * @returns True if connected to LiveKit room, false otherwise
189
+ *
190
+ * @example
191
+ * ```typescript
192
+ * if (manager.isConnected) {
193
+ * console.log('Ready for voice communication');
194
+ * } else {
195
+ * console.log('Not connected - call connect() first');
196
+ * }
197
+ * ```
198
+ */
199
+ get isConnected(): boolean;
200
+ /**
201
+ * Checks if the conversation is currently paused
202
+ *
203
+ * @returns True if paused, false if active or disconnected
204
+ *
205
+ * @example
206
+ * ```typescript
207
+ * if (manager.isPaused) {
208
+ * console.log('Conversation is paused');
209
+ * showResumeButton();
210
+ * }
211
+ * ```
212
+ */
213
+ get isPaused(): boolean;
214
+ /**
215
+ * Gets the current audio volume level
216
+ *
217
+ * @returns Current volume between 0.0 (muted) and 1.0 (full volume)
218
+ *
219
+ * @example
220
+ * ```typescript
221
+ * const currentVolume = manager.volume;
222
+ * updateVolumeSlider(currentVolume);
223
+ * ```
224
+ */
225
+ get volume(): number;
226
+ /**
227
+ * Gets the set of active HTML audio elements
228
+ *
229
+ * @returns Set of HTMLAudioElement instances currently playing agent audio
230
+ *
231
+ * @example
232
+ * ```typescript
233
+ * const audioElements = manager.audioElements;
234
+ * console.log(`Active audio elements: ${audioElements.size}`);
235
+ * ```
236
+ */
237
+ get audioElements(): Set<HTMLAudioElement>;
238
+ /**
239
+ * Gets the array of registered client-side tools
240
+ *
241
+ * @returns Array of Tool objects available for agent execution
242
+ *
243
+ * @example
244
+ * ```typescript
245
+ * const registeredTools = manager.tools;
246
+ * console.log(`Available tools: ${registeredTools.map(t => t.function_name).join(', ')}`);
247
+ * ```
248
+ */
249
+ get tools(): Tool[];
250
+ /**
251
+ * Gets raw call statistics from the analytics module
252
+ *
253
+ * @returns Internal call statistics object with WebRTC metrics
254
+ * @internal
255
+ */
256
+ get callStats(): import("./types").CallStats;
257
+ /**
258
+ * Gets raw connection metrics from the analytics module
259
+ *
260
+ * @returns Internal connection metrics object
261
+ * @internal
262
+ */
263
+ get connectionMetrics(): import("./types").ConnectionMetrics;
264
+ /**
265
+ * Gets raw audio metrics from the analytics module
266
+ *
267
+ * @returns Internal audio metrics object
268
+ * @internal
269
+ */
270
+ get audioMetrics(): import("./types").AudioMetrics;
271
+ /**
272
+ * Gets raw performance metrics from the analytics module
273
+ *
274
+ * @returns Internal performance metrics object
275
+ * @internal
276
+ */
277
+ get performanceMetrics(): import("./types").PerformanceMetrics;
278
+ /**
279
+ * Gets the analytics collection interval timer
280
+ *
281
+ * @returns NodeJS.Timeout for the analytics interval, or null if not collecting
282
+ * @internal
283
+ */
284
+ get analyticsInterval(): NodeJS.Timeout | null;
285
+ /**
286
+ * Gets the timestamp when the call started
287
+ *
288
+ * @returns Unix timestamp in milliseconds when call began, null if not started
289
+ *
290
+ * @example
291
+ * ```typescript
292
+ * const startTime = manager.callStartTime;
293
+ * if (startTime) {
294
+ * const duration = Date.now() - startTime;
295
+ * console.log(`Call duration: ${Math.floor(duration / 1000)}s`);
296
+ * }
297
+ * ```
298
+ */
299
+ get callStartTime(): number | null;
300
+ /**
301
+ * Gets the map of active participants in the room
302
+ *
303
+ * @returns Map of participant SIDs to Participant objects
304
+ * @internal Use getParticipants() for structured participant data
305
+ */
306
+ get participants(): Map<string, ParticipantData>;
307
+ /**
308
+ * Gets the raw track statistics map
309
+ *
310
+ * @returns Map of track IDs to track data objects
311
+ * @internal Use getTrackStats() for structured track statistics
312
+ */
313
+ get trackStats(): Map<string, import("./types").TrackStatsData>;
314
+ /**
315
+ * Retrieves current network connection statistics and quality metrics
316
+ *
317
+ * @returns Object containing latency, packet loss, bandwidth, quality rating, and connection counts
318
+ *
319
+ * @example
320
+ * ```typescript
321
+ * const stats = manager.getConnectionStats();
322
+ * console.log(`Latency: ${stats.latency}ms`);
323
+ * console.log(`Packet loss: ${stats.packetLoss}%`);
324
+ * console.log(`Connection quality: ${stats.quality}`);
325
+ *
326
+ * if (stats.quality === 'poor') {
327
+ * showNetworkWarning();
328
+ * }
329
+ * ```
330
+ */
331
+ getConnectionStats(): ConnectionStatsResult;
332
+ /**
333
+ * Retrieves current audio levels and quality metrics for both user and agent
334
+ *
335
+ * @returns Object containing audio levels, speaking times, quality metrics, pause state, and volume
336
+ *
337
+ * @example
338
+ * ```typescript
339
+ * const audio = manager.getAudioLevels();
340
+ *
341
+ * // Update audio level indicators in UI
342
+ * updateMeterBar('user-audio', audio.userAudioLevel);
343
+ * updateMeterBar('agent-audio', audio.agentAudioLevel);
344
+ *
345
+ * // Show speaking time statistics
346
+ * console.log(`User spoke for ${audio.userSpeakingTime / 1000}s`);
347
+ * console.log(`Agent spoke for ${audio.agentSpeakingTime / 1000}s`);
348
+ * ```
349
+ */
350
+ getAudioLevels(): AudioLevelsResult & {
351
+ isPaused: boolean;
352
+ volume: number;
353
+ };
354
+ /**
355
+ * Retrieves current performance metrics including response times and call duration
356
+ *
357
+ * @returns Object containing response times, network latency, call duration, and connection timing
358
+ *
359
+ * @example
360
+ * ```typescript
361
+ * const perf = manager.getPerformanceMetrics();
362
+ *
363
+ * // Monitor response time for agent interactions
364
+ * if (perf.responseTime > 3000) {
365
+ * console.warn('High response time detected:', perf.responseTime + 'ms');
366
+ * }
367
+ *
368
+ * // Display call duration
369
+ * const minutes = Math.floor(perf.callDuration / 60000);
370
+ * const seconds = Math.floor((perf.callDuration % 60000) / 1000);
371
+ * console.log(`Call duration: ${minutes}:${seconds.toString().padStart(2, '0')}`);
372
+ * ```
373
+ */
374
+ getPerformanceMetrics(): PerformanceMetricsResult;
375
+ /**
376
+ * Retrieves structured information about all participants in the room
377
+ *
378
+ * @returns Array of ParticipantData objects with identity, connection info, and metadata
379
+ *
380
+ * @example
381
+ * ```typescript
382
+ * const participants = manager.getParticipants();
383
+ *
384
+ * participants.forEach(participant => {
385
+ * console.log(`Participant: ${participant.identity}`);
386
+ * console.log(`Connected at: ${new Date(participant.connectionTime)}`);
387
+ *
388
+ * if (participant.metadata) {
389
+ * console.log(`Metadata: ${participant.metadata}`);
390
+ * }
391
+ * });
392
+ *
393
+ * // Find the agent participant
394
+ * const agent = participants.find(p => p.identity.includes('agent'));
395
+ * ```
396
+ */
397
+ getParticipants(): ParticipantData[];
398
+ /**
399
+ * Retrieves current audio track statistics and stream information
400
+ *
401
+ * @returns Object containing track counts, audio element info, and detailed track data
402
+ *
403
+ * @example
404
+ * ```typescript
405
+ * const trackStats = manager.getTrackStats();
406
+ *
407
+ * console.log(`Active tracks: ${trackStats.activeTracks}/${trackStats.totalTracks}`);
408
+ * console.log(`Audio elements: ${trackStats.audioElements}`);
409
+ *
410
+ * // Inspect individual tracks
411
+ * trackStats.trackDetails.forEach(([trackId, data]) => {
412
+ * console.log(`Track ${trackId}: ${data.kind} from ${data.participant}`);
413
+ * });
414
+ * ```
415
+ */
416
+ getTrackStats(): TrackStatsResult;
417
+ /**
418
+ * Retrieves comprehensive analytics combining all metrics into a single snapshot
419
+ *
420
+ * This is the primary method for accessing complete call analytics, combining
421
+ * connection statistics, audio metrics, performance data, participant info,
422
+ * track statistics, and call metadata into a unified result.
423
+ *
424
+ * @returns Complete analytics object with all available metrics and metadata
425
+ *
426
+ * @example
427
+ * ```typescript
428
+ * const analytics = manager.getCallAnalytics();
429
+ *
430
+ * // Log comprehensive call summary
431
+ * console.log('=== Call Analytics ===');
432
+ * console.log(`Duration: ${analytics.performanceMetrics.callDuration}ms`);
433
+ * console.log(`Quality: ${analytics.connectionStats.quality}`);
434
+ * console.log(`Participants: ${analytics.participants.length}`);
435
+ * console.log(`Tracks: ${analytics.trackStats.activeTracks}`);
436
+ *
437
+ * // Send to analytics service
438
+ * analyticsService.recordCall({
439
+ * sessionId: generateSessionId(),
440
+ * timestamp: Date.now(),
441
+ * data: analytics
442
+ * });
443
+ *
444
+ * // Check for quality issues
445
+ * if (analytics.connectionStats.packetLoss > 5) {
446
+ * reportNetworkIssue(analytics.connectionStats);
447
+ * }
448
+ * ```
449
+ */
450
+ getCallAnalytics(): CallAnalyticsResult;
451
+ /**
452
+ * Registers client-side tools that voice agents can call during conversations
453
+ *
454
+ * This method updates the available tools and registers them as RPC methods
455
+ * with the LiveKit room for remote execution by voice agents.
456
+ *
457
+ * @param tools - Optional array of Tool objects to register. If not provided,
458
+ * uses tools from constructor or previously set tools.
459
+ *
460
+ * @example
461
+ * ```typescript
462
+ * const userDataTool = {
463
+ * function_name: "getUserProfile",
464
+ * description: "Retrieves user profile information",
465
+ * parameters: [
466
+ * { name: "userId", type: "string", description: "User ID to lookup" }
467
+ * ],
468
+ * required: ["userId"],
469
+ * fn: async (userId: string) => {
470
+ * const user = await userService.getProfile(userId);
471
+ * return { name: user.name, email: user.email, plan: user.subscription };
472
+ * }
473
+ * };
474
+ *
475
+ * const weatherTool = {
476
+ * function_name: "getCurrentWeather",
477
+ * description: "Gets current weather for a location",
478
+ * parameters: [
479
+ * { name: "location", type: "string", description: "City name" }
480
+ * ],
481
+ * required: ["location"],
482
+ * fn: async (location: string) => {
483
+ * return await weatherAPI.getCurrent(location);
484
+ * }
485
+ * };
486
+ *
487
+ * // Register new tools after connection
488
+ * manager.registerTools([userDataTool, weatherTool]);
489
+ *
490
+ * // Agent can now call these tools during conversation
491
+ * manager.on('answerReceived', (text) => {
492
+ * console.log('Agent response:', text);
493
+ * // Agent might say: "I found your profile! You're on the premium plan."
494
+ * });
495
+ * ```
496
+ */
497
+ registerTools(tools?: Tool[]): void;
498
+ /**
499
+ * Performs comprehensive cleanup of all modules and resources
500
+ *
501
+ * This method ensures all resources are properly released, including:
502
+ * - WebRTC connections and media streams
503
+ * - Audio elements and playback resources
504
+ * - Analytics timers and event listeners
505
+ * - Tool registry and RPC handlers
506
+ *
507
+ * Called automatically on disconnect, but can be called manually for
508
+ * explicit resource management in complex applications.
509
+ *
510
+ * @example
511
+ * ```typescript
512
+ * // Explicit cleanup when component unmounts
513
+ * useEffect(() => {
514
+ * return () => {
515
+ * manager.cleanup();
516
+ * };
517
+ * }, []);
518
+ *
519
+ * // Cleanup before reconnecting with different configuration
520
+ * await manager.disconnect();
521
+ * manager.cleanup();
522
+ *
523
+ * const newManager = new LiveKitManager(newUrl, newToken, newTools);
524
+ * ```
525
+ */
526
+ cleanup(): void;
527
+ }