@hamsa-ai/voice-agents-sdk 0.5.8 → 0.6.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.js +1 -1
- package/dist/index.cjs.js.map +1 -1
- package/dist/index.esm.js +1 -1
- package/dist/index.esm.js.map +1 -1
- package/dist/index.umd.js +1 -1
- package/dist/index.umd.js.map +1 -1
- package/package.json +2 -2
- package/types/classes/livekit-connection.d.ts +4 -1
- package/types/classes/livekit-manager.d.ts +2 -1
- package/types/classes/livekit-tool-registry.d.ts +10 -0
- package/types/classes/types.d.ts +25 -0
- package/types/main.d.ts +49 -3
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hamsa-ai/voice-agents-sdk",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0-beta.1",
|
|
4
4
|
"description": "Hamsa AI - Voice Agents JavaScript SDK",
|
|
5
5
|
"main": "dist/index.cjs.js",
|
|
6
6
|
"module": "dist/index.esm.js",
|
|
@@ -56,7 +56,7 @@
|
|
|
56
56
|
"dependencies": {
|
|
57
57
|
"events": "^3.3.0",
|
|
58
58
|
"jwt-decode": "^4.0.0",
|
|
59
|
-
"livekit-client": "^2.
|
|
59
|
+
"livekit-client": "^2.20.0"
|
|
60
60
|
},
|
|
61
61
|
"repository": {
|
|
62
62
|
"type": "git",
|
|
@@ -190,6 +190,8 @@ export declare class LiveKitConnection extends EventEmitter {
|
|
|
190
190
|
private hasEmittedConnected;
|
|
191
191
|
/** Debug logger instance for conditional logging */
|
|
192
192
|
private readonly logger;
|
|
193
|
+
/** When true, the session is text/chat-only and the microphone is never enabled */
|
|
194
|
+
private readonly isChatOnly;
|
|
193
195
|
/**
|
|
194
196
|
* Creates a new LiveKitConnection instance
|
|
195
197
|
*
|
|
@@ -200,6 +202,7 @@ export declare class LiveKitConnection extends EventEmitter {
|
|
|
200
202
|
* @param lkUrl - LiveKit WebSocket URL (e.g., 'wss://livekit.example.com')
|
|
201
203
|
* @param accessToken - JWT token for room authentication and authorization
|
|
202
204
|
* @param debug - Enable debug logging (defaults to false)
|
|
205
|
+
* @param isChatOnly - When true, the microphone is never enabled (text/chat-only session)
|
|
203
206
|
*
|
|
204
207
|
* @example
|
|
205
208
|
* ```typescript
|
|
@@ -213,7 +216,7 @@ export declare class LiveKitConnection extends EventEmitter {
|
|
|
213
216
|
* await connection.connect();
|
|
214
217
|
* ```
|
|
215
218
|
*/
|
|
216
|
-
constructor(lkUrl: string, accessToken: string, debug?: boolean);
|
|
219
|
+
constructor(lkUrl: string, accessToken: string, debug?: boolean, isChatOnly?: boolean);
|
|
217
220
|
/**
|
|
218
221
|
* Provides access to the underlying LiveKit room instance
|
|
219
222
|
*
|
|
@@ -90,9 +90,10 @@ export default class LiveKitManager extends EventEmitter {
|
|
|
90
90
|
* );
|
|
91
91
|
* ```
|
|
92
92
|
*/
|
|
93
|
-
constructor(lkUrl: string, accessToken: string, tools?: Tool[], { debug, avatarContainerSelector, }?: {
|
|
93
|
+
constructor(lkUrl: string, accessToken: string, tools?: Tool[], { debug, avatarContainerSelector, isChatOnly, }?: {
|
|
94
94
|
debug?: boolean;
|
|
95
95
|
avatarContainerSelector?: string;
|
|
96
|
+
isChatOnly?: boolean;
|
|
96
97
|
});
|
|
97
98
|
/**
|
|
98
99
|
* Establishes connection to the LiveKit room and initializes voice agent communication
|
|
@@ -199,6 +199,8 @@ export declare class LiveKitToolRegistry extends EventEmitter {
|
|
|
199
199
|
private readonly registeredMethods;
|
|
200
200
|
/** Debug logger instance for conditional logging */
|
|
201
201
|
private readonly logger;
|
|
202
|
+
/** Monotonic counter for synthesizing message ids when a segment lacks one */
|
|
203
|
+
private fallbackMessageIdCounter;
|
|
202
204
|
/**
|
|
203
205
|
* Creates a new LiveKitToolRegistry instance
|
|
204
206
|
*
|
|
@@ -463,9 +465,17 @@ export declare class LiveKitToolRegistry extends EventEmitter {
|
|
|
463
465
|
* ```
|
|
464
466
|
*/
|
|
465
467
|
handleTranscriptionReceived(transcriptions: Array<{
|
|
468
|
+
id?: string;
|
|
466
469
|
text?: string;
|
|
467
470
|
final?: boolean;
|
|
468
471
|
}>, participantIdentity?: string): void;
|
|
472
|
+
/**
|
|
473
|
+
* Emits a structured, streaming-aware `messageReceived` event.
|
|
474
|
+
*
|
|
475
|
+
* Synthesizes a stable id when the source segment lacks one so consumers can
|
|
476
|
+
* still key the message, and stamps the observation time.
|
|
477
|
+
*/
|
|
478
|
+
private emitMessageReceived;
|
|
469
479
|
/**
|
|
470
480
|
* Returns the count of currently registered tools
|
|
471
481
|
*
|
package/types/classes/types.d.ts
CHANGED
|
@@ -7,6 +7,31 @@ import type { ConnectionQuality, Track, TrackPublication } from 'livekit-client'
|
|
|
7
7
|
* Represents the current state of the voice agent
|
|
8
8
|
*/
|
|
9
9
|
export type AgentState = 'idle' | 'initializing' | 'listening' | 'thinking' | 'speaking';
|
|
10
|
+
/** Identifies who authored a conversation message. */
|
|
11
|
+
export type MessageRole = 'user' | 'agent';
|
|
12
|
+
/**
|
|
13
|
+
* A structured conversation message surfaced to chat UIs.
|
|
14
|
+
*
|
|
15
|
+
* Emitted via the `messageReceived` event for both agent replies and user
|
|
16
|
+
* transcriptions. Unlike the plain-string `answerReceived`/`transcriptionReceived`
|
|
17
|
+
* events, this carries the metadata a streaming chat UI needs:
|
|
18
|
+
* - `id` is stable across a message's streaming updates, so the UI can update the
|
|
19
|
+
* same bubble in place instead of appending a new one for each partial.
|
|
20
|
+
* - `isFinal` distinguishes a streaming partial (`false`) from the completed
|
|
21
|
+
* message (`true`).
|
|
22
|
+
*/
|
|
23
|
+
export type ReceivedMessage = {
|
|
24
|
+
/** Stable id for this message; identical across streaming updates of the same message. */
|
|
25
|
+
id: string;
|
|
26
|
+
/** Who authored the message. */
|
|
27
|
+
role: MessageRole;
|
|
28
|
+
/** The message text. For streaming segments this is the full text received so far. */
|
|
29
|
+
text: string;
|
|
30
|
+
/** True for the completed message, false for an in-progress streaming partial. */
|
|
31
|
+
isFinal: boolean;
|
|
32
|
+
/** Unix epoch milliseconds when the SDK observed this update. */
|
|
33
|
+
timestamp: number;
|
|
34
|
+
};
|
|
10
35
|
/**
|
|
11
36
|
* Function signature for client-side tools that can be executed by the agent.
|
|
12
37
|
* Tools can be synchronous or asynchronous and accept variable arguments.
|
package/types/main.d.ts
CHANGED
|
@@ -2,11 +2,11 @@ import { EventEmitter } from 'events';
|
|
|
2
2
|
import type { ConnectionState, LocalTrack, LocalTrackPublication, Participant, RemoteParticipant, RemoteTrack, Room } from 'livekit-client';
|
|
3
3
|
import LiveKitManager, { type AgentState, type AudioLevelsResult, type CallAnalyticsResult, type ConnectionStatsResult, type ParticipantData, type PerformanceMetricsResult, type TrackStatsResult } from './classes/livekit-manager';
|
|
4
4
|
import ScreenWakeLock from './classes/screen-wake-lock';
|
|
5
|
-
import type { AudioCaptureCallback, AudioCaptureOptions, ConnectionQualityData, DTMFDigit, TrackSubscriptionData, TrackUnsubscriptionData } from './classes/types';
|
|
5
|
+
import type { AudioCaptureCallback, AudioCaptureOptions, ConnectionQualityData, DTMFDigit, ReceivedMessage, TrackSubscriptionData, TrackUnsubscriptionData } from './classes/types';
|
|
6
6
|
export type { RpcInvocationData } from 'livekit-client';
|
|
7
7
|
export { RpcError } from 'livekit-client';
|
|
8
8
|
export type { AgentState } from './classes/livekit-manager';
|
|
9
|
-
export type { AudioCaptureCallback, AudioCaptureFormat, AudioCaptureMetadata, AudioCaptureOptions, AudioCaptureSource, DTMFDigit, } from './classes/types';
|
|
9
|
+
export type { AudioCaptureCallback, AudioCaptureFormat, AudioCaptureMetadata, AudioCaptureOptions, AudioCaptureSource, DTMFDigit, MessageRole, ReceivedMessage, } from './classes/types';
|
|
10
10
|
/**
|
|
11
11
|
* Custom error class that includes both human-readable message and machine-readable messageKey
|
|
12
12
|
* for internationalization and programmatic error handling
|
|
@@ -63,6 +63,12 @@ type StartOptions = {
|
|
|
63
63
|
params?: Record<string, unknown>;
|
|
64
64
|
/** Whether to enable voice interactions. If false, agent runs in text-only mode */
|
|
65
65
|
voiceEnablement?: boolean;
|
|
66
|
+
/**
|
|
67
|
+
* Whether the conversation runs in chat-only mode (no audio media).
|
|
68
|
+
* When true, the SDK requests a chat-only session from the backend via the
|
|
69
|
+
* participant-token and conversation-init endpoints.
|
|
70
|
+
*/
|
|
71
|
+
isChatOnly?: boolean;
|
|
66
72
|
/** Array of client-side tools that the agent can call during conversations */
|
|
67
73
|
tools?: Tool[];
|
|
68
74
|
/** Optional user identifier for tracking and analytics */
|
|
@@ -205,6 +211,13 @@ type HamsaVoiceAgentEvents = {
|
|
|
205
211
|
transcriptionReceived: (text: string) => void;
|
|
206
212
|
/** Emitted when agent response is received */
|
|
207
213
|
answerReceived: (text: string) => void;
|
|
214
|
+
/**
|
|
215
|
+
* Emitted for every conversation message (agent reply or user transcription)
|
|
216
|
+
* with structured, streaming-aware metadata (id, role, isFinal, timestamp).
|
|
217
|
+
* Use this to drive a chat UI; prefer it over the plain-string
|
|
218
|
+
* `answerReceived`/`transcriptionReceived` events when rendering message bubbles.
|
|
219
|
+
*/
|
|
220
|
+
messageReceived: (message: ReceivedMessage) => void;
|
|
208
221
|
/** Emitted when agent starts speaking */
|
|
209
222
|
speaking: () => void;
|
|
210
223
|
/** Emitted when agent is listening */
|
|
@@ -213,6 +226,8 @@ type HamsaVoiceAgentEvents = {
|
|
|
213
226
|
agentStateChanged: (state: AgentState) => void;
|
|
214
227
|
/** Emitted when a DTMF digit is successfully sent */
|
|
215
228
|
dtmfSent: (digit: DTMFDigit) => void;
|
|
229
|
+
/** Emitted when a chat message is successfully sent to the agent */
|
|
230
|
+
messageSent: (text: string) => void;
|
|
216
231
|
/** Emitted when an error occurs */
|
|
217
232
|
error: (error: Error | HamsaApiError) => void;
|
|
218
233
|
/** Emitted when a remote track is subscribed */
|
|
@@ -647,6 +662,37 @@ declare class HamsaVoiceAgent extends EventEmitter {
|
|
|
647
662
|
* ```
|
|
648
663
|
*/
|
|
649
664
|
sendDTMF(digit: DTMFDigit): void;
|
|
665
|
+
/**
|
|
666
|
+
* Sends a text chat message to the agent
|
|
667
|
+
*
|
|
668
|
+
* Publishes the user's typed message to the agent over LiveKit's text-stream
|
|
669
|
+
* channel ({@link LIVEKIT_CHAT_TOPIC}). Use this to drive a text/chat UI,
|
|
670
|
+
* typically alongside a chat-only session started with `start({ isChatOnly: true })`.
|
|
671
|
+
*
|
|
672
|
+
* The agent's reply arrives asynchronously through the `answerReceived` event.
|
|
673
|
+
* This method does not return the reply.
|
|
674
|
+
*
|
|
675
|
+
* @param text - The message to send. Must be a non-empty string.
|
|
676
|
+
* @throws {Error} If called when not connected (no active session)
|
|
677
|
+
* @throws {Error} If `text` is empty or not a string
|
|
678
|
+
* @fires messageSent When the message is successfully sent to the agent
|
|
679
|
+
*
|
|
680
|
+
* @example
|
|
681
|
+
* ```typescript
|
|
682
|
+
* await agent.start({ agentId: 'support_agent', isChatOnly: true });
|
|
683
|
+
*
|
|
684
|
+
* // Render the agent's replies
|
|
685
|
+
* agent.on('answerReceived', (reply) => appendToChat('agent', reply));
|
|
686
|
+
*
|
|
687
|
+
* // Send the user's typed message
|
|
688
|
+
* sendButton.onclick = async () => {
|
|
689
|
+
* const text = input.value;
|
|
690
|
+
* appendToChat('user', text);
|
|
691
|
+
* await agent.sendMessage(text);
|
|
692
|
+
* };
|
|
693
|
+
* ```
|
|
694
|
+
*/
|
|
695
|
+
sendMessage(text: string): Promise<void>;
|
|
650
696
|
/**
|
|
651
697
|
* Gets frequency data from the user's microphone input
|
|
652
698
|
*
|
|
@@ -890,7 +936,7 @@ declare class HamsaVoiceAgent extends EventEmitter {
|
|
|
890
936
|
* await agent.start({ agentId: 'my_agent', voiceEnablement: true });
|
|
891
937
|
* ```
|
|
892
938
|
*/
|
|
893
|
-
start({ agentId, params, voiceEnablement, tools, userId: _userId, preferHeadphonesForIosDevices: _preferHeadphonesForIosDevices, connectionDelay: _connectionDelay, disableWakeLock: _disableWakeLock, onAudioData, captureAudio, avatarContainerSelector, }: StartOptions): Promise<void>;
|
|
939
|
+
start({ agentId, params, voiceEnablement, isChatOnly, tools, userId: _userId, preferHeadphonesForIosDevices: _preferHeadphonesForIosDevices, connectionDelay: _connectionDelay, disableWakeLock: _disableWakeLock, onAudioData, captureAudio, avatarContainerSelector, }: StartOptions): Promise<void>;
|
|
894
940
|
/**
|
|
895
941
|
* Terminates the current voice agent conversation
|
|
896
942
|
*
|