@hamsa-ai/voice-agents-sdk 0.3.1 → 0.4.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Hamsa Voice Agents Web SDK
2
2
 
3
- Hamsa Voice Agents Web SDK is a JavaScript library for integrating voice agents from <https://dashboard.tryhamsa.com>. This SDK provides a seamless way to incorporate voice interactions into your web applications.
3
+ Hamsa Voice Agents Web SDK is a JavaScript library for integrating voice agents from <https://dashboard.tryhamsa.com>. This SDK provides a seamless way to incorporate voice interactions into your web applications with high-quality real-time audio communication.
4
4
 
5
5
  ## Installation
6
6
 
@@ -17,7 +17,7 @@ npm i @hamsa-ai/voice-agents-sdk
17
17
  First, import the package in your code:
18
18
 
19
19
  ```javascript
20
- import { HamsaVoiceAgent } from '@hamsa-ai/voice-agents-sdk'
20
+ import { HamsaVoiceAgent } from "@hamsa-ai/voice-agents-sdk";
21
21
  ```
22
22
 
23
23
  Initialize the SDK with your API key:
@@ -37,9 +37,11 @@ Include the script from a CDN:
37
37
  Then, you can initialize the agent like this:
38
38
 
39
39
  ```javascript
40
- const agent = new HamsaVoiceAgent('YOUR_API_KEY');
40
+ const agent = new HamsaVoiceAgent("YOUR_API_KEY");
41
41
 
42
- agent.on("callStarted", () => { console.log("Conversation has started!"); });
42
+ agent.on("callStarted", () => {
43
+ console.log("Conversation has started!");
44
+ });
43
45
 
44
46
  // Example: Start a call
45
47
  // agent.start({ agentId: 'YOUR_AGENT_ID' });
@@ -53,11 +55,19 @@ Start a conversation with an existing agent by calling the "start" function. You
53
55
 
54
56
  ```javascript
55
57
  agent.start({
56
- agentId: YOUR_AGENT_ID,
57
- params: {
58
- param1: "NAME",
59
- param2: "NAME2"
60
- }
58
+ agentId: YOUR_AGENT_ID,
59
+ params: {
60
+ param1: "NAME",
61
+ param2: "NAME2",
62
+ },
63
+ voiceEnablement: true,
64
+ userId: "user-123", // Optional user tracking
65
+ preferHeadphonesForIosDevices: true, // iOS audio optimization
66
+ connectionDelay: {
67
+ android: 3000, // 3 second delay for Android
68
+ ios: 0,
69
+ default: 0,
70
+ },
61
71
  });
62
72
  ```
63
73
 
@@ -85,6 +95,135 @@ To end a conversation, simply call the "end" function:
85
95
  agent.end();
86
96
  ```
87
97
 
98
+ ## Advanced Audio Controls
99
+
100
+ The SDK provides comprehensive audio control features for professional voice applications:
101
+
102
+ ### Volume Management
103
+
104
+ ```javascript
105
+ // Set agent voice volume (0.0 to 1.0)
106
+ agent.setVolume(0.8);
107
+
108
+ // Get current output volume
109
+ const currentVolume = agent.getOutputVolume();
110
+ console.log(`Volume: ${Math.round(currentVolume * 100)}%`);
111
+
112
+ // Get user microphone input level
113
+ const inputLevel = agent.getInputVolume();
114
+ if (inputLevel > 0.1) {
115
+ showUserSpeakingIndicator();
116
+ }
117
+ ```
118
+
119
+ ### Microphone Control
120
+
121
+ ```javascript
122
+ // Mute/unmute microphone
123
+ agent.setMicMuted(true); // Mute
124
+ agent.setMicMuted(false); // Unmute
125
+
126
+ // Check mute status
127
+ if (agent.isMicMuted()) {
128
+ showUnmutePrompt();
129
+ }
130
+
131
+ // Toggle microphone
132
+ const currentMuted = agent.isMicMuted();
133
+ agent.setMicMuted(!currentMuted);
134
+
135
+ // Listen for microphone events
136
+ agent.on('micMuted', () => {
137
+ document.getElementById('micButton').classList.add('muted');
138
+ });
139
+
140
+ agent.on('micUnmuted', () => {
141
+ document.getElementById('micButton').classList.remove('muted');
142
+ });
143
+ ```
144
+
145
+ ### Audio Visualization
146
+
147
+ Create real-time audio visualizers using frequency data:
148
+
149
+ ```javascript
150
+ // Input visualizer (user's microphone)
151
+ function createInputVisualizer() {
152
+ const canvas = document.getElementById('inputVisualizer');
153
+ const ctx = canvas.getContext('2d');
154
+
155
+ function draw() {
156
+ const frequencyData = agent.getInputByteFrequencyData();
157
+
158
+ ctx.clearRect(0, 0, canvas.width, canvas.height);
159
+ const barWidth = canvas.width / frequencyData.length;
160
+
161
+ for (let i = 0; i < frequencyData.length; i++) {
162
+ const barHeight = (frequencyData[i] / 255) * canvas.height;
163
+ ctx.fillStyle = `hsl(${i * 2}, 70%, 60%)`;
164
+ ctx.fillRect(i * barWidth, canvas.height - barHeight, barWidth, barHeight);
165
+ }
166
+
167
+ requestAnimationFrame(draw);
168
+ }
169
+
170
+ draw();
171
+ }
172
+
173
+ // Output visualizer (agent's voice)
174
+ function createOutputVisualizer() {
175
+ const canvas = document.getElementById('outputVisualizer');
176
+ const ctx = canvas.getContext('2d');
177
+
178
+ agent.on('speaking', () => {
179
+ function draw() {
180
+ const frequencyData = agent.getOutputByteFrequencyData();
181
+
182
+ if (frequencyData.length > 0) {
183
+ ctx.clearRect(0, 0, canvas.width, canvas.height);
184
+
185
+ // Draw voice characteristics
186
+ for (let i = 0; i < frequencyData.length; i++) {
187
+ const barHeight = (frequencyData[i] / 255) * canvas.height;
188
+ ctx.fillStyle = `hsl(${240 + i}, 70%, 60%)`;
189
+ ctx.fillRect(i * 2, canvas.height - barHeight, 2, barHeight);
190
+ }
191
+
192
+ requestAnimationFrame(draw);
193
+ }
194
+ }
195
+ draw();
196
+ });
197
+ }
198
+ ```
199
+
200
+
201
+ ## Advanced Configuration Options
202
+
203
+ ### Platform-Specific Optimizations
204
+
205
+ ```javascript
206
+ agent.start({
207
+ agentId: "your-agent-id",
208
+
209
+ // Optimize audio for iOS devices
210
+ preferHeadphonesForIosDevices: true,
211
+
212
+ // Platform-specific delays to prevent audio cutoff
213
+ connectionDelay: {
214
+ android: 3000, // Android needs longer delay for audio mode switching
215
+ ios: 500, // Shorter delay for iOS
216
+ default: 1000 // Default for other platforms
217
+ },
218
+
219
+ // Disable wake lock for battery optimization
220
+ disableWakeLock: false,
221
+
222
+ // User tracking
223
+ userId: "customer-12345"
224
+ });
225
+ ```
226
+
88
227
  ## Events
89
228
 
90
229
  During the conversation, the SDK emits events to update your application about the conversation status.
@@ -92,29 +231,470 @@ During the conversation, the SDK emits events to update your application about t
92
231
  ### Conversation Status Events
93
232
 
94
233
  ```javascript
95
- agent.on("callStarted", () => { console.log("Conversation has started!"); });
96
- agent.on("callEnded", () => { console.log("Conversation has ended!"); });
97
- agent.on("callPaused", () => { console.log("The conversation is paused"); });
98
- agent.on("callResumed", () => { console.log("Conversation has resumed"); });
234
+ agent.on("callStarted", () => {
235
+ console.log("Conversation has started!");
236
+ });
237
+ agent.on("callEnded", () => {
238
+ console.log("Conversation has ended!");
239
+ });
240
+ agent.on("callPaused", () => {
241
+ console.log("The conversation is paused");
242
+ });
243
+ agent.on("callResumed", () => {
244
+ console.log("Conversation has resumed");
245
+ });
99
246
  ```
100
247
 
101
248
  ### Agent Status Events
102
249
 
103
250
  ```javascript
104
- agent.on("speaking", () => { console.log("The agent is speaking"); });
105
- agent.on("listening", () => { console.log("The agent is listening"); });
251
+ agent.on("speaking", () => {
252
+ console.log("The agent is speaking");
253
+ });
254
+ agent.on("listening", () => {
255
+ console.log("The agent is listening");
256
+ });
106
257
  ```
107
258
 
108
259
  ### Conversation Script Events
109
260
 
110
261
  ```javascript
111
- agent.on("transcriptionReceived", (text) => { console.log("User speech transcription received", text); });
112
- agent.on("answerReceived", (text) => { console.log("Agent answer received", text); });
262
+ agent.on("transcriptionReceived", (text) => {
263
+ console.log("User speech transcription received", text);
264
+ });
265
+ agent.on("answerReceived", (text) => {
266
+ console.log("Agent answer received", text);
267
+ });
113
268
  ```
114
269
 
115
270
  ### Error Events
116
271
 
117
272
  ```javascript
118
- agent.on("closed", () => { console.log("Conversation was closed"); });
119
- agent.on("error", (e) => { console.log("Error was received", e); });
273
+ agent.on("closed", () => {
274
+ console.log("Conversation was closed");
275
+ });
276
+ agent.on("error", (e) => {
277
+ console.log("Error was received", e);
278
+ });
279
+ ```
280
+
281
+ ### Advanced Analytics Events
282
+
283
+ The SDK provides comprehensive analytics for monitoring call quality, performance, and custom agent events:
284
+
285
+ ```javascript
286
+ // Real-time connection quality updates
287
+ agent.on("connectionQualityChanged", ({ quality, participant, metrics }) => {
288
+ console.log(`Connection quality: ${quality}`, metrics);
289
+ });
290
+
291
+ // Periodic analytics updates (every second during calls)
292
+ agent.on("analyticsUpdated", (analytics) => {
293
+ console.log("Call analytics:", analytics);
294
+ // Contains: connectionStats, audioMetrics, performanceMetrics, etc.
295
+ });
296
+
297
+ // Participant events
298
+ agent.on("participantConnected", (participant) => {
299
+ console.log("Participant joined:", participant.identity);
300
+ });
301
+
302
+ agent.on("participantDisconnected", (participant) => {
303
+ console.log("Participant left:", participant.identity);
304
+ });
305
+
306
+ // Track subscription events (audio/video streams)
307
+ agent.on("trackSubscribed", ({ track, participant, trackStats }) => {
308
+ console.log("New track:", track.kind, "from", participant);
309
+ });
310
+
311
+ agent.on("trackUnsubscribed", ({ track, participant }) => {
312
+ console.log("Track ended:", track.kind, "from", participant);
313
+ });
314
+
315
+ // Connection state changes
316
+ agent.on("reconnecting", () => {
317
+ console.log("Attempting to reconnect...");
318
+ });
319
+
320
+ agent.on("reconnected", () => {
321
+ console.log("Successfully reconnected");
322
+ });
323
+
324
+ // Custom events from agents
325
+ agent.on("customEvent", (eventType, eventData, metadata) => {
326
+ console.log(`Custom event: ${eventType}`, eventData);
327
+ // Examples: flow_navigation, tool_execution, agent_state_change
328
+ });
120
329
  ```
330
+
331
+ ## Analytics & Monitoring
332
+
333
+ The SDK provides comprehensive real-time analytics for monitoring call quality, performance metrics, and custom agent events. Access analytics data through both synchronous methods and event-driven updates.
334
+
335
+ ### Analytics Architecture
336
+
337
+ The SDK uses a clean modular design with four specialized components:
338
+
339
+ - **Connection Management**: Handles room connections, participants, and network state
340
+ - **Analytics Engine**: Processes WebRTC statistics and performance metrics
341
+ - **Audio Management**: Manages audio tracks, volume control, and quality monitoring
342
+ - **Tool Registry**: Handles RPC method registration and client-side tool execution
343
+
344
+ Access analytics data through both synchronous methods and event-driven updates.
345
+
346
+ ### Synchronous Analytics Methods
347
+
348
+ Get real-time analytics data instantly for dashboards and monitoring:
349
+
350
+ ```javascript
351
+ // Connection quality and network statistics
352
+ const connectionStats = agent.getConnectionStats();
353
+ console.log(connectionStats);
354
+ /*
355
+ {
356
+ latency: 45, // Network latency in ms
357
+ packetLoss: 0.1, // Packet loss percentage
358
+ bandwidth: 128000, // Current bandwidth usage
359
+ quality: 'good', // Connection quality: excellent/good/poor/lost
360
+ jitter: 2, // Network jitter
361
+ connectionAttempts: 1, // Total connection attempts
362
+ reconnectionAttempts: 0, // Reconnection attempts
363
+ isConnected: true // Current connection status
364
+ }
365
+ */
366
+
367
+ // Audio levels and quality metrics
368
+ const audioLevels = agent.getAudioLevels();
369
+ console.log(audioLevels);
370
+ /*
371
+ {
372
+ userAudioLevel: 0.8, // Current user audio level
373
+ agentAudioLevel: 0.3, // Current agent audio level
374
+ userSpeakingTime: 30000, // User speaking duration (ms)
375
+ agentSpeakingTime: 20000, // Agent speaking duration (ms)
376
+ audioDropouts: 0, // Audio interruption count
377
+ echoCancellationActive: true,// Echo cancellation status
378
+ volume: 1.0, // Current volume setting
379
+ isPaused: false // Pause state
380
+ }
381
+ */
382
+
383
+ // Performance metrics
384
+ const performance = agent.getPerformanceMetrics();
385
+ console.log(performance);
386
+ /*
387
+ {
388
+ responseTime: 1200, // Total response time
389
+ networkLatency: 45, // Network round-trip time
390
+ callDuration: 60000, // Current call duration (ms)
391
+ connectionEstablishedTime: 250, // Time to establish connection
392
+ reconnectionCount: 0 // Number of reconnections
393
+ }
394
+ */
395
+
396
+ // Participant information
397
+ const participants = agent.getParticipants();
398
+ console.log(participants);
399
+ /*
400
+ [
401
+ {
402
+ identity: "agent",
403
+ sid: "participant-sid",
404
+ connectionTime: 1638360000000,
405
+ metadata: "agent-metadata"
406
+ }
407
+ ]
408
+ */
409
+
410
+ // Track statistics (audio/video streams)
411
+ const trackStats = agent.getTrackStats();
412
+ console.log(trackStats);
413
+ /*
414
+ {
415
+ totalTracks: 2,
416
+ activeTracks: 2,
417
+ audioElements: 1,
418
+ trackDetails: [
419
+ ["track-id", { trackId: "track-id", kind: "audio", participant: "agent" }]
420
+ ]
421
+ }
422
+ */
423
+
424
+ // Complete analytics snapshot
425
+ const analytics = agent.getCallAnalytics();
426
+ console.log(analytics);
427
+ /*
428
+ {
429
+ connectionStats: { latency: 45, packetLoss: 0.1, quality: 'good', ... },
430
+ audioMetrics: { userAudioLevel: 0.8, agentAudioLevel: 0.3, ... },
431
+ performanceMetrics: { callDuration: 60000, responseTime: 1200, ... },
432
+ participants: [{ identity: 'agent', sid: 'participant-sid', ... }],
433
+ trackStats: { totalTracks: 2, activeTracks: 2, ... },
434
+ callStats: { connectionAttempts: 1, packetsLost: 0, ... },
435
+ metadata: {
436
+ callStartTime: 1638360000000,
437
+ isConnected: true,
438
+ isPaused: false,
439
+ volume: 1.0
440
+ }
441
+ }
442
+ */
443
+ ```
444
+
445
+ ### Real-time Dashboard Example
446
+
447
+ Build live monitoring dashboards using the analytics data:
448
+
449
+ ```javascript
450
+ // Update dashboard every second
451
+ const updateDashboard = () => {
452
+ const stats = agent.getConnectionStats();
453
+ const audio = agent.getAudioLevels();
454
+ const performance = agent.getPerformanceMetrics();
455
+
456
+ // Update UI elements
457
+ document.getElementById("latency").textContent = `${stats.latency}ms`;
458
+ document.getElementById("quality").textContent = stats.quality;
459
+ document.getElementById("duration").textContent = `${Math.floor(
460
+ performance.callDuration / 1000
461
+ )}s`;
462
+ document.getElementById("user-audio").style.width = `${
463
+ audio.userAudioLevel * 100
464
+ }%`;
465
+ document.getElementById("agent-audio").style.width = `${
466
+ audio.agentAudioLevel * 100
467
+ }%`;
468
+ };
469
+
470
+ // Start dashboard updates when call begins
471
+ agent.on("callStarted", () => {
472
+ const dashboardInterval = setInterval(updateDashboard, 1000);
473
+
474
+ agent.on("callEnded", () => {
475
+ clearInterval(dashboardInterval);
476
+ });
477
+ });
478
+ ```
479
+
480
+ ### Custom Event Tracking
481
+
482
+ Track custom events from your voice agents:
483
+
484
+ ```javascript
485
+ agent.on("customEvent", (eventType, eventData, metadata) => {
486
+ switch (eventType) {
487
+ case "flow_navigation":
488
+ console.log("Agent navigated:", eventData.from, "->", eventData.to);
489
+ // Track conversation flow
490
+ break;
491
+
492
+ case "tool_execution":
493
+ console.log(
494
+ "Tool called:",
495
+ eventData.toolName,
496
+ "Result:",
497
+ eventData.success
498
+ );
499
+ // Monitor tool usage
500
+ break;
501
+
502
+ case "agent_state_change":
503
+ console.log("Agent state:", eventData.state);
504
+ // Track agent behavior
505
+ break;
506
+
507
+ case "user_intent_detected":
508
+ console.log(
509
+ "User intent:",
510
+ eventData.intent,
511
+ "Confidence:",
512
+ eventData.confidence
513
+ );
514
+ // Analyze user intent
515
+ break;
516
+
517
+ default:
518
+ console.log("Custom event:", eventType, eventData);
519
+ }
520
+ });
521
+ ```
522
+
523
+ ## Configuration Options
524
+
525
+ The SDK accepts optional configuration parameters:
526
+
527
+ ```javascript
528
+ const agent = new HamsaVoiceAgent("YOUR_API_KEY", {
529
+ API_URL: "https://api.tryhamsa.com", // API endpoint (default)
530
+ });
531
+ ```
532
+
533
+ ## Client-Side Tools
534
+
535
+ You can register client-side tools that the agent can call during conversations:
536
+
537
+ ```javascript
538
+ const tools = [
539
+ {
540
+ function_name: "getUserInfo",
541
+ description: "Get user information",
542
+ parameters: [
543
+ {
544
+ name: "userId",
545
+ type: "string",
546
+ description: "User ID to look up",
547
+ },
548
+ ],
549
+ required: ["userId"],
550
+ fn: async (userId) => {
551
+ // Your tool implementation
552
+ const userInfo = await fetchUserInfo(userId);
553
+ return userInfo;
554
+ },
555
+ },
556
+ ];
557
+
558
+ agent.start({
559
+ agentId: "YOUR_AGENT_ID",
560
+ tools: tools,
561
+ voiceEnablement: true,
562
+ });
563
+ ```
564
+
565
+ ## Migration from Previous Versions
566
+
567
+ If you're upgrading from a previous version, see the [Migration Guide](./MIGRATION_GUIDE.md) for detailed instructions. Connection details are now automatically managed and no longer need to be configured.
568
+
569
+ ## Browser Compatibility
570
+
571
+ This SDK supports modern browsers with WebRTC capabilities:
572
+
573
+ - Chrome 60+
574
+ - Firefox 60+
575
+ - Safari 12+
576
+ - Edge 79+
577
+
578
+ ## TypeScript Support
579
+
580
+ The SDK includes comprehensive TypeScript definitions with detailed analytics interfaces:
581
+
582
+ ```typescript
583
+ import {
584
+ HamsaVoiceAgent,
585
+ CallAnalyticsResult,
586
+ ParticipantData,
587
+ CustomEventMetadata,
588
+ } from "@hamsa-ai/voice-agents-sdk";
589
+
590
+ // All analytics methods return strongly typed data
591
+ const agent = new HamsaVoiceAgent("API_KEY");
592
+
593
+ // TypeScript will provide full autocomplete and type checking for all methods
594
+ const connectionStats = agent.getConnectionStats(); // ConnectionStatsResult | null
595
+ const audioLevels = agent.getAudioLevels(); // AudioLevelsResult | null
596
+ const performance = agent.getPerformanceMetrics(); // PerformanceMetricsResult | null
597
+ const participants = agent.getParticipants(); // ParticipantData[]
598
+ const trackStats = agent.getTrackStats(); // TrackStatsResult | null
599
+ const analytics = agent.getCallAnalytics(); // CallAnalyticsResult | null
600
+
601
+ // Advanced audio control methods
602
+ const outputVolume = agent.getOutputVolume(); // number
603
+ const inputVolume = agent.getInputVolume(); // number
604
+ const isMuted = agent.isMicMuted(); // boolean
605
+ const inputFreqData = agent.getInputByteFrequencyData(); // Uint8Array
606
+ const outputFreqData = agent.getOutputByteFrequencyData(); // Uint8Array
607
+
608
+ // Strongly typed start options with all advanced features
609
+ await agent.start({
610
+ agentId: "agent-id",
611
+ voiceEnablement: true,
612
+ userId: "user-123",
613
+ params: {
614
+ userName: "John Doe",
615
+ sessionId: "session-456"
616
+ },
617
+ preferHeadphonesForIosDevices: true,
618
+ connectionDelay: {
619
+ android: 3000,
620
+ ios: 500,
621
+ default: 1000
622
+ },
623
+ disableWakeLock: false
624
+ });
625
+
626
+ // Strongly typed event handlers
627
+ agent.on("analyticsUpdated", (analytics: CallAnalyticsResult) => {
628
+ console.log(analytics.connectionStats.latency); // number
629
+ console.log(analytics.audioMetrics.userAudioLevel); // number
630
+ console.log(analytics.performanceMetrics.callDuration); // number
631
+ console.log(analytics.participants.length); // number
632
+ });
633
+
634
+ // Audio control events
635
+ agent.on("micMuted", () => {
636
+ console.log("Microphone was muted");
637
+ });
638
+
639
+ agent.on("micUnmuted", () => {
640
+ console.log("Microphone was unmuted");
641
+ });
642
+
643
+ // Strongly typed custom events
644
+ agent.on(
645
+ "customEvent",
646
+ (eventType: string, eventData: any, metadata: CustomEventMetadata) => {
647
+ console.log(metadata.timestamp); // number
648
+ console.log(metadata.participant); // string
649
+ }
650
+ );
651
+
652
+ // Strongly typed participant events
653
+ agent.on("participantConnected", (participant: ParticipantData) => {
654
+ console.log(participant.identity); // string
655
+ console.log(participant.connectionTime); // number
656
+ });
657
+ ```
658
+
659
+ ## Use Cases
660
+
661
+ ### Real-time Call Quality Monitoring
662
+
663
+ ```javascript
664
+ agent.on("connectionQualityChanged", ({ quality, metrics }) => {
665
+ if (quality === "poor") {
666
+ showNetworkWarning();
667
+ logQualityIssue(metrics);
668
+ }
669
+ });
670
+ ```
671
+
672
+ ### Analytics Dashboard
673
+
674
+ ```javascript
675
+ const analytics = agent.getCallAnalytics();
676
+ sendToAnalytics({
677
+ callDuration: analytics.callDuration,
678
+ audioQuality: analytics.audioMetrics,
679
+ participantCount: analytics.participants.length,
680
+ performance: analytics.performanceMetrics,
681
+ });
682
+ ```
683
+
684
+ ### Conversation Flow Analysis
685
+
686
+ ```javascript
687
+ agent.on("customEvent", (eventType, data) => {
688
+ if (eventType === "flow_navigation") {
689
+ trackConversationFlow(data.from, data.to);
690
+ optimizeAgentResponses(data);
691
+ }
692
+ });
693
+ ```
694
+
695
+ ## Dependencies
696
+
697
+ - **livekit-client v2.15.4**: Real-time communication infrastructure
698
+ - **events v3.3.0**: EventEmitter for browser compatibility
699
+
700
+ The SDK uses LiveKit's native WebRTC capabilities for high-quality real-time audio communication and comprehensive analytics.