@hamsa-ai/voice-agents-sdk 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hamsa-ai/voice-agents-sdk",
3
- "version": "0.5.1",
3
+ "version": "0.5.3",
4
4
  "description": "Hamsa AI - Voice Agents JavaScript SDK",
5
5
  "main": "dist/index.cjs.js",
6
6
  "module": "dist/index.esm.js",
@@ -182,6 +182,8 @@ export declare class LiveKitAnalytics extends EventEmitter {
182
182
  private lastUserInputTime;
183
183
  /** Previous connection quality for jitter change detection (internal) */
184
184
  private previousConnectionQuality;
185
+ /** Debug logger instance for conditional logging */
186
+ private readonly logger;
185
187
  /**
186
188
  * Creates a new LiveKitAnalytics instance
187
189
  *
@@ -203,7 +205,7 @@ export declare class LiveKitAnalytics extends EventEmitter {
203
205
  * analytics.startAnalyticsCollection();
204
206
  * ```
205
207
  */
206
- constructor();
208
+ constructor(debug?: boolean);
207
209
  /**
208
210
  * Sets the LiveKit room reference for analytics data collection
209
211
  *
@@ -154,7 +154,7 @@
154
154
  * - Manages DOM element lifecycle to prevent memory leaks
155
155
  */
156
156
  import { EventEmitter } from 'events';
157
- import { type RemoteParticipant, type RemoteTrack, type RemoteTrackPublication, type Room } from 'livekit-client';
157
+ import { type LocalTrack, type LocalTrackPublication, type Participant, type RemoteParticipant, type RemoteTrack, type RemoteTrackPublication, type Room, Track, type TrackPublication } from 'livekit-client';
158
158
  import type { AudioCaptureOptions, TrackStatsData, TrackStatsResult } from './types';
159
159
  /**
160
160
  * LiveKitAudioManager class for comprehensive audio stream management
@@ -181,8 +181,14 @@ export declare class LiveKitAudioManager extends EventEmitter {
181
181
  private audioCaptureOptions;
182
182
  private readonly recorders;
183
183
  private readonly processors;
184
+ private readonly sourceNodes;
185
+ /** Map of track IDs to cloned MediaStreamTracks for capture */
186
+ private readonly clonedTracks;
184
187
  /** Map of track IDs to their capture state */
185
188
  private readonly trackCaptureMap;
189
+ /** Debug logger instance for conditional logging */
190
+ private readonly logger;
191
+ constructor(debug?: boolean);
186
192
  /**
187
193
  * Provides the LiveKit Room to the audio manager for microphone control.
188
194
  */
@@ -459,6 +465,13 @@ export declare class LiveKitAudioManager extends EventEmitter {
459
465
  * ```
460
466
  */
461
467
  handleTrackSubscribed(track: RemoteTrack, publication: RemoteTrackPublication, participant: RemoteParticipant): void;
468
+ /**
469
+ * Processes local audio track publications
470
+ * @param track - The local audio track
471
+ * @param publication - Local track publication metadata
472
+ * @param participant - The local participant who published the track
473
+ */
474
+ handleLocalTrackPublished(track: LocalTrack, publication: LocalTrackPublication, participant: Participant): void;
462
475
  /**
463
476
  * Processes audio track unsubscription and cleanup
464
477
  *
@@ -495,7 +508,14 @@ export declare class LiveKitAudioManager extends EventEmitter {
495
508
  * });
496
509
  * ```
497
510
  */
498
- handleTrackUnsubscribed(track: RemoteTrack, publication: RemoteTrackPublication, participant: RemoteParticipant): void;
511
+ handleTrackUnsubscribed(track: Track, publication: TrackPublication, participant: Participant): void;
512
+ /**
513
+ * Processes local audio track unpublications
514
+ * @param track - The local audio track
515
+ * @param publication - Local track publication metadata
516
+ * @param participant - The local participant who unpublished the track
517
+ */
518
+ handleLocalTrackUnsubscribed(track: LocalTrack, publication: LocalTrackPublication, participant: Participant): void;
499
519
  /**
500
520
  * Pauses playback of all active audio streams
501
521
  *
@@ -780,6 +800,11 @@ export declare class LiveKitAudioManager extends EventEmitter {
780
800
  * ```
781
801
  */
782
802
  enableAudioCapture(options: AudioCaptureOptions): void;
803
+ /**
804
+ * Internal state for AudioWorklet registration to prevent race conditions
805
+ * @private
806
+ */
807
+ private workletReady;
783
808
  /**
784
809
  * Disables audio capture and cleans up all capture resources
785
810
  *
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Shared types and interfaces for LiveKit modules
3
3
  */
4
- import type { ConnectionQuality, RemoteTrack, RemoteTrackPublication } from 'livekit-client';
4
+ import type { ConnectionQuality, Track, TrackPublication } from 'livekit-client';
5
5
  /**
6
6
  * Agent state as defined by LiveKit
7
7
  * Represents the current state of the voice agent
@@ -62,7 +62,7 @@ export type TrackStatsData = {
62
62
  /** Unix timestamp when this track was subscribed to */
63
63
  subscriptionTime: number;
64
64
  /** LiveKit track publication object containing track details */
65
- publication: RemoteTrackPublication;
65
+ publication: TrackPublication;
66
66
  /** Track source (microphone, screen_share, etc.) */
67
67
  source?: string;
68
68
  /** Whether the track is currently muted */
@@ -270,9 +270,9 @@ export type ConnectionQualityData = {
270
270
  */
271
271
  export type TrackSubscriptionData = {
272
272
  /** The LiveKit track object that was subscribed to */
273
- track: RemoteTrack;
273
+ track: Track;
274
274
  /** The track publication containing metadata */
275
- publication: RemoteTrackPublication;
275
+ publication: TrackPublication;
276
276
  /** Identity of the participant who owns this track */
277
277
  participant: string;
278
278
  /** Optional statistics about this track subscription */
@@ -284,9 +284,9 @@ export type TrackSubscriptionData = {
284
284
  */
285
285
  export type TrackUnsubscriptionData = {
286
286
  /** The LiveKit track object that was unsubscribed from */
287
- track: RemoteTrack;
287
+ track: Track;
288
288
  /** The track publication that was removed */
289
- publication: RemoteTrackPublication;
289
+ publication: TrackPublication;
290
290
  /** Identity of the participant who owned this track */
291
291
  participant: string;
292
292
  };
@@ -336,6 +336,8 @@ export type AudioCaptureMetadata = {
336
336
  timestamp: number;
337
337
  /** Track ID associated with this audio */
338
338
  trackId: string;
339
+ /** Source of this specific track (e.g. 'microphone', 'screen_share') */
340
+ trackSource?: string;
339
341
  /** Audio format of this chunk */
340
342
  format: AudioCaptureFormat;
341
343
  /** Sample rate in Hz (for PCM formats) */
@@ -353,6 +355,9 @@ export type AudioCaptureCallback = (audioData: ArrayBuffer | Float32Array | Int1
353
355
  export type AudioCaptureOptions = {
354
356
  /** Source of audio to capture (default: 'agent') */
355
357
  source?: AudioCaptureSource;
358
+ /** Specific track source to capture (default: 'microphone').
359
+ * Set to 'all' to capture everything including screen share audio. */
360
+ trackSourceFilter?: 'microphone' | 'screen_share' | 'all';
356
361
  /** Audio format to deliver (default: 'opus-webm') */
357
362
  format?: AudioCaptureFormat;
358
363
  /** Chunk size in milliseconds for encoded formats (default: 100ms) */
@@ -403,3 +408,8 @@ export type LiveKitAgentMetadata = {
403
408
  voiceAgentId: string;
404
409
  apiKey: string;
405
410
  };
411
+ /**
412
+ * Valid DTMF (Dual-Tone Multi-Frequency) digits that can be sent during a call.
413
+ * Includes digits 0-9, asterisk (*), and pound (#) characters.
414
+ */
415
+ export type DTMFDigit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '*' | '#';
package/types/main.d.ts CHANGED
@@ -2,9 +2,9 @@ import { EventEmitter } from 'events';
2
2
  import type { ConnectionState, LocalTrack, LocalTrackPublication, Participant, RemoteParticipant, RemoteTrack, Room } from 'livekit-client';
3
3
  import LiveKitManager, { type AgentState, type AudioLevelsResult, type CallAnalyticsResult, type ConnectionStatsResult, type ParticipantData, type PerformanceMetricsResult, type TrackStatsResult } from './classes/livekit-manager';
4
4
  import ScreenWakeLock from './classes/screen-wake-lock';
5
- import type { AudioCaptureCallback, AudioCaptureOptions, ConnectionQualityData, TrackSubscriptionData, TrackUnsubscriptionData } from './classes/types';
5
+ import type { AudioCaptureCallback, AudioCaptureOptions, ConnectionQualityData, DTMFDigit, TrackSubscriptionData, TrackUnsubscriptionData } from './classes/types';
6
6
  export type { AgentState } from './classes/livekit-manager';
7
- export type { AudioCaptureCallback, AudioCaptureFormat, AudioCaptureMetadata, AudioCaptureOptions, AudioCaptureSource, } from './classes/types';
7
+ export type { AudioCaptureCallback, AudioCaptureFormat, AudioCaptureMetadata, AudioCaptureOptions, AudioCaptureSource, DTMFDigit, } from './classes/types';
8
8
  /**
9
9
  * Custom error class that includes both human-readable message and machine-readable messageKey
10
10
  * for internationalization and programmatic error handling
@@ -190,6 +190,8 @@ type HamsaVoiceAgentEvents = {
190
190
  listening: () => void;
191
191
  /** Emitted when agent state changes (idle, initializing, listening, thinking, speaking) */
192
192
  agentStateChanged: (state: AgentState) => void;
193
+ /** Emitted when a DTMF digit is successfully sent */
194
+ dtmfSent: (digit: DTMFDigit) => void;
193
195
  /** Emitted when an error occurs */
194
196
  error: (error: Error | HamsaApiError) => void;
195
197
  /** Emitted when a remote track is subscribed */
@@ -550,6 +552,73 @@ declare class HamsaVoiceAgent extends EventEmitter {
550
552
  * ```
551
553
  */
552
554
  sendContextualUpdate(context: string): void;
555
+ /**
556
+ * Sends a DTMF (Dual-Tone Multi-Frequency) digit to the voice agent
557
+ *
558
+ * Simulates pressing a key on a phone keypad during the call. This enables
559
+ * browser-based call testing with DTMF input simulation, allowing users to
560
+ * test IVR flows and DTMF transitions without making actual phone calls.
561
+ *
562
+ * The DTMF digit is sent through the LiveKit data channel to the server,
563
+ * which processes it as a DTMF input event that can trigger DTMF transitions
564
+ * in the agent flow.
565
+ *
566
+ * @param digit - A single DTMF digit: '0'-'9', '*', or '#'
567
+ * @throws {Error} If called when not connected (no active call)
568
+ * @throws {Error} If the digit is not a valid DTMF character
569
+ * @fires dtmfSent When a DTMF digit is successfully sent to the agent
570
+ *
571
+ * @example Basic usage
572
+ * ```typescript
573
+ * const agent = new HamsaVoiceAgent(apiKey, config);
574
+ * await agent.start({ agentId, params });
575
+ *
576
+ * // Listen for DTMF send confirmations
577
+ * agent.on('dtmfSent', (digit) => {
578
+ * console.log(`Sent DTMF digit: ${digit}`);
579
+ * highlightKeypadButton(digit);
580
+ * });
581
+ *
582
+ * // Later, when user presses a key on the UI keypad:
583
+ * agent.sendDTMF('1'); // Simulates pressing "1"
584
+ * agent.sendDTMF('*'); // Simulates pressing "*"
585
+ * agent.sendDTMF('#'); // Simulates pressing "#"
586
+ * ```
587
+ *
588
+ * @example With UI keypad
589
+ * ```typescript
590
+ * // Create keypad buttons
591
+ * const digits = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '*', '0', '#'];
592
+ *
593
+ * digits.forEach(digit => {
594
+ * const button = document.createElement('button');
595
+ * button.textContent = digit;
596
+ * button.onclick = () => {
597
+ * try {
598
+ * agent.sendDTMF(digit);
599
+ * playKeyTone(digit); // Optional: play local tone feedback
600
+ * } catch (error) {
601
+ * console.error('Failed to send DTMF:', error.message);
602
+ * }
603
+ * };
604
+ * keypadContainer.appendChild(button);
605
+ * });
606
+ * ```
607
+ *
608
+ * @example Error handling
609
+ * ```typescript
610
+ * try {
611
+ * agent.sendDTMF('1');
612
+ * } catch (error) {
613
+ * if (error.message.includes('not connected')) {
614
+ * showConnectionError();
615
+ * } else if (error.message.includes('Invalid DTMF')) {
616
+ * showInvalidInputError();
617
+ * }
618
+ * }
619
+ * ```
620
+ */
621
+ sendDTMF(digit: DTMFDigit): void;
553
622
  /**
554
623
  * Gets frequency data from the user's microphone input
555
624
  *