@hamsa-ai/voice-agents-sdk 0.5.8 → 0.6.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hamsa-ai/voice-agents-sdk",
3
- "version": "0.5.8",
3
+ "version": "0.6.0-beta.0",
4
4
  "description": "Hamsa AI - Voice Agents JavaScript SDK",
5
5
  "main": "dist/index.cjs.js",
6
6
  "module": "dist/index.esm.js",
@@ -56,7 +56,7 @@
56
56
  "dependencies": {
57
57
  "events": "^3.3.0",
58
58
  "jwt-decode": "^4.0.0",
59
- "livekit-client": "^2.18.0"
59
+ "livekit-client": "^2.20.0"
60
60
  },
61
61
  "repository": {
62
62
  "type": "git",
@@ -199,6 +199,8 @@ export declare class LiveKitToolRegistry extends EventEmitter {
199
199
  private readonly registeredMethods;
200
200
  /** Debug logger instance for conditional logging */
201
201
  private readonly logger;
202
+ /** Monotonic counter for synthesizing message ids when a segment lacks one */
203
+ private fallbackMessageIdCounter;
202
204
  /**
203
205
  * Creates a new LiveKitToolRegistry instance
204
206
  *
@@ -463,9 +465,17 @@ export declare class LiveKitToolRegistry extends EventEmitter {
463
465
  * ```
464
466
  */
465
467
  handleTranscriptionReceived(transcriptions: Array<{
468
+ id?: string;
466
469
  text?: string;
467
470
  final?: boolean;
468
471
  }>, participantIdentity?: string): void;
472
+ /**
473
+ * Emits a structured, streaming-aware `messageReceived` event.
474
+ *
475
+ * Synthesizes a stable id when the source segment lacks one so consumers can
476
+ * still key the message, and stamps the observation time.
477
+ */
478
+ private emitMessageReceived;
469
479
  /**
470
480
  * Returns the count of currently registered tools
471
481
  *
@@ -7,6 +7,31 @@ import type { ConnectionQuality, Track, TrackPublication } from 'livekit-client'
7
7
  * Represents the current state of the voice agent
8
8
  */
9
9
  export type AgentState = 'idle' | 'initializing' | 'listening' | 'thinking' | 'speaking';
10
+ /** Identifies who authored a conversation message. */
11
+ export type MessageRole = 'user' | 'agent';
12
+ /**
13
+ * A structured conversation message surfaced to chat UIs.
14
+ *
15
+ * Emitted via the `messageReceived` event for both agent replies and user
16
+ * transcriptions. Unlike the plain-string `answerReceived`/`transcriptionReceived`
17
+ * events, this carries the metadata a streaming chat UI needs:
18
+ * - `id` is stable across a message's streaming updates, so the UI can update the
19
+ * same bubble in place instead of appending a new one for each partial.
20
+ * - `isFinal` distinguishes a streaming partial (`false`) from the completed
21
+ * message (`true`).
22
+ */
23
+ export type ReceivedMessage = {
24
+ /** Stable id for this message; identical across streaming updates of the same message. */
25
+ id: string;
26
+ /** Who authored the message. */
27
+ role: MessageRole;
28
+ /** The message text. For streaming segments this is the full text received so far. */
29
+ text: string;
30
+ /** True for the completed message, false for an in-progress streaming partial. */
31
+ isFinal: boolean;
32
+ /** Unix epoch milliseconds when the SDK observed this update. */
33
+ timestamp: number;
34
+ };
10
35
  /**
11
36
  * Function signature for client-side tools that can be executed by the agent.
12
37
  * Tools can be synchronous or asynchronous and accept variable arguments.
package/types/main.d.ts CHANGED
@@ -2,11 +2,11 @@ import { EventEmitter } from 'events';
2
2
  import type { ConnectionState, LocalTrack, LocalTrackPublication, Participant, RemoteParticipant, RemoteTrack, Room } from 'livekit-client';
3
3
  import LiveKitManager, { type AgentState, type AudioLevelsResult, type CallAnalyticsResult, type ConnectionStatsResult, type ParticipantData, type PerformanceMetricsResult, type TrackStatsResult } from './classes/livekit-manager';
4
4
  import ScreenWakeLock from './classes/screen-wake-lock';
5
- import type { AudioCaptureCallback, AudioCaptureOptions, ConnectionQualityData, DTMFDigit, TrackSubscriptionData, TrackUnsubscriptionData } from './classes/types';
5
+ import type { AudioCaptureCallback, AudioCaptureOptions, ConnectionQualityData, DTMFDigit, ReceivedMessage, TrackSubscriptionData, TrackUnsubscriptionData } from './classes/types';
6
6
  export type { RpcInvocationData } from 'livekit-client';
7
7
  export { RpcError } from 'livekit-client';
8
8
  export type { AgentState } from './classes/livekit-manager';
9
- export type { AudioCaptureCallback, AudioCaptureFormat, AudioCaptureMetadata, AudioCaptureOptions, AudioCaptureSource, DTMFDigit, } from './classes/types';
9
+ export type { AudioCaptureCallback, AudioCaptureFormat, AudioCaptureMetadata, AudioCaptureOptions, AudioCaptureSource, DTMFDigit, MessageRole, ReceivedMessage, } from './classes/types';
10
10
  /**
11
11
  * Custom error class that includes both human-readable message and machine-readable messageKey
12
12
  * for internationalization and programmatic error handling
@@ -63,6 +63,12 @@ type StartOptions = {
63
63
  params?: Record<string, unknown>;
64
64
  /** Whether to enable voice interactions. If false, agent runs in text-only mode */
65
65
  voiceEnablement?: boolean;
66
+ /**
67
+ * Whether the conversation runs in chat-only mode (no audio media).
68
+ * When true, the SDK requests a chat-only session from the backend via the
69
+ * participant-token and conversation-init endpoints.
70
+ */
71
+ isChatOnly?: boolean;
66
72
  /** Array of client-side tools that the agent can call during conversations */
67
73
  tools?: Tool[];
68
74
  /** Optional user identifier for tracking and analytics */
@@ -205,6 +211,13 @@ type HamsaVoiceAgentEvents = {
205
211
  transcriptionReceived: (text: string) => void;
206
212
  /** Emitted when agent response is received */
207
213
  answerReceived: (text: string) => void;
214
+ /**
215
+ * Emitted for every conversation message (agent reply or user transcription)
216
+ * with structured, streaming-aware metadata (id, role, isFinal, timestamp).
217
+ * Use this to drive a chat UI; prefer it over the plain-string
218
+ * `answerReceived`/`transcriptionReceived` events when rendering message bubbles.
219
+ */
220
+ messageReceived: (message: ReceivedMessage) => void;
208
221
  /** Emitted when agent starts speaking */
209
222
  speaking: () => void;
210
223
  /** Emitted when agent is listening */
@@ -213,6 +226,8 @@ type HamsaVoiceAgentEvents = {
213
226
  agentStateChanged: (state: AgentState) => void;
214
227
  /** Emitted when a DTMF digit is successfully sent */
215
228
  dtmfSent: (digit: DTMFDigit) => void;
229
+ /** Emitted when a chat message is successfully sent to the agent */
230
+ messageSent: (text: string) => void;
216
231
  /** Emitted when an error occurs */
217
232
  error: (error: Error | HamsaApiError) => void;
218
233
  /** Emitted when a remote track is subscribed */
@@ -647,6 +662,37 @@ declare class HamsaVoiceAgent extends EventEmitter {
647
662
  * ```
648
663
  */
649
664
  sendDTMF(digit: DTMFDigit): void;
665
+ /**
666
+ * Sends a text chat message to the agent
667
+ *
668
+ * Publishes the user's typed message to the agent over LiveKit's text-stream
669
+ * channel ({@link LIVEKIT_CHAT_TOPIC}). Use this to drive a text/chat UI,
670
+ * typically alongside a chat-only session started with `start({ isChatOnly: true })`.
671
+ *
672
+ * The agent's reply arrives asynchronously through the `answerReceived` event.
673
+ * This method does not return the reply.
674
+ *
675
+ * @param text - The message to send. Must be a non-empty string.
676
+ * @throws {Error} If called when not connected (no active session)
677
+ * @throws {Error} If `text` is empty or not a string
678
+ * @fires messageSent When the message is successfully sent to the agent
679
+ *
680
+ * @example
681
+ * ```typescript
682
+ * await agent.start({ agentId: 'support_agent', isChatOnly: true });
683
+ *
684
+ * // Render the agent's replies
685
+ * agent.on('answerReceived', (reply) => appendToChat('agent', reply));
686
+ *
687
+ * // Send the user's typed message
688
+ * sendButton.onclick = async () => {
689
+ * const text = input.value;
690
+ * appendToChat('user', text);
691
+ * await agent.sendMessage(text);
692
+ * };
693
+ * ```
694
+ */
695
+ sendMessage(text: string): Promise<void>;
650
696
  /**
651
697
  * Gets frequency data from the user's microphone input
652
698
  *
@@ -890,7 +936,7 @@ declare class HamsaVoiceAgent extends EventEmitter {
890
936
  * await agent.start({ agentId: 'my_agent', voiceEnablement: true });
891
937
  * ```
892
938
  */
893
- start({ agentId, params, voiceEnablement, tools, userId: _userId, preferHeadphonesForIosDevices: _preferHeadphonesForIosDevices, connectionDelay: _connectionDelay, disableWakeLock: _disableWakeLock, onAudioData, captureAudio, avatarContainerSelector, }: StartOptions): Promise<void>;
939
+ start({ agentId, params, voiceEnablement, isChatOnly, tools, userId: _userId, preferHeadphonesForIosDevices: _preferHeadphonesForIosDevices, connectionDelay: _connectionDelay, disableWakeLock: _disableWakeLock, onAudioData, captureAudio, avatarContainerSelector, }: StartOptions): Promise<void>;
894
940
  /**
895
941
  * Terminates the current voice agent conversation
896
942
  *