@hamsa-ai/voice-agents-sdk 0.3.0 → 4.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -19
- package/dist/index.cjs.js +1 -2
- package/dist/index.esm.js +1 -2
- package/dist/index.umd.js +1 -2
- package/package.json +10 -2
- package/types/classes/livekit_manager.d.ts +52 -0
- package/types/main.d.ts +6 -9
- package/dist/index.cjs.js.LICENSE.txt +0 -8
- package/dist/index.esm.js.LICENSE.txt +0 -8
- package/dist/index.umd.js.LICENSE.txt +0 -8
- package/types/classes/audio-player-processor.worklet.d.ts +0 -8
- package/types/classes/audio-processor.worklet.d.ts +0 -4
- package/types/classes/audio_player.d.ts +0 -67
- package/types/classes/audio_recorder.d.ts +0 -37
- package/types/classes/websocket_manager.d.ts +0 -115
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hamsa-ai/voice-agents-sdk",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "4.0.0-beta.1",
|
|
4
4
|
"description": "Hamsa AI - Voice Agents JavaScript SDK",
|
|
5
5
|
"main": "dist/index.cjs.js",
|
|
6
6
|
"module": "dist/index.esm.js",
|
|
@@ -24,16 +24,23 @@
|
|
|
24
24
|
"build:types": "tsc",
|
|
25
25
|
"build:webpack": "webpack --config webpack.config.js",
|
|
26
26
|
"build": "npm run clean && npm run build:types && npm run build:webpack",
|
|
27
|
+
"test": "jest",
|
|
28
|
+
"test:watch": "jest --watch",
|
|
27
29
|
"prepare": "npm run build"
|
|
28
30
|
},
|
|
29
31
|
"author": "Hamsa AI Inc.",
|
|
30
32
|
"license": "MIT",
|
|
31
33
|
"devDependencies": {
|
|
34
|
+
"@babel/core": "^7.23.0",
|
|
35
|
+
"@babel/preset-env": "^7.23.0",
|
|
32
36
|
"audio-worklet-loader": "^1.1.0",
|
|
37
|
+
"babel-jest": "^29.7.0",
|
|
33
38
|
"babel-loader": "^9.1.3",
|
|
34
39
|
"buffer": "^6.0.3",
|
|
35
40
|
"css-loader": "^7.1.2",
|
|
36
41
|
"file-loader": "^6.2.0",
|
|
42
|
+
"jest": "^29.7.0",
|
|
43
|
+
"jest-environment-jsdom": "^29.7.0",
|
|
37
44
|
"rimraf": "^6.0.1",
|
|
38
45
|
"style-loader": "^4.0.0",
|
|
39
46
|
"typescript": "^5.5.4",
|
|
@@ -42,7 +49,8 @@
|
|
|
42
49
|
"webpack-node-externals": "^3.0.0"
|
|
43
50
|
},
|
|
44
51
|
"dependencies": {
|
|
45
|
-
"events": "^3.3.0"
|
|
52
|
+
"events": "^3.3.0",
|
|
53
|
+
"livekit-client": "^2.15.4"
|
|
46
54
|
},
|
|
47
55
|
"repository": {
|
|
48
56
|
"type": "git",
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LiveKitManager encapsulates all direct interactions with the livekit-client SDK
|
|
3
|
+
* and replaces the WebSocketManager functionality.
|
|
4
|
+
*/
|
|
5
|
+
export default class LiveKitManager extends EventEmitter<[never]> {
|
|
6
|
+
/**
|
|
7
|
+
* Creates a new LiveKitManager instance.
|
|
8
|
+
* @param {string} lkUrl - The LiveKit WebSocket URL
|
|
9
|
+
* @param {string} accessToken - The LiveKit access token
|
|
10
|
+
* @param {Array} tools - Array of tools/functions to be used
|
|
11
|
+
*/
|
|
12
|
+
constructor(lkUrl: string, accessToken: string, tools?: any[]);
|
|
13
|
+
lkUrl: string;
|
|
14
|
+
accessToken: string;
|
|
15
|
+
tools: any[];
|
|
16
|
+
room: Room;
|
|
17
|
+
isConnected: boolean;
|
|
18
|
+
volume: number;
|
|
19
|
+
audioElements: Set<any>;
|
|
20
|
+
isPaused: boolean;
|
|
21
|
+
/**
|
|
22
|
+
* Connects to the LiveKit room.
|
|
23
|
+
* @returns {Promise<void>}
|
|
24
|
+
*/
|
|
25
|
+
connect(): Promise<void>;
|
|
26
|
+
/**
|
|
27
|
+
* Disconnects from the LiveKit room.
|
|
28
|
+
* @returns {Promise<void>}
|
|
29
|
+
*/
|
|
30
|
+
disconnect(): Promise<void>;
|
|
31
|
+
/**
|
|
32
|
+
* Pauses the call by muting local microphone and pausing remote audio.
|
|
33
|
+
*/
|
|
34
|
+
pause(): void;
|
|
35
|
+
/**
|
|
36
|
+
* Resumes the call by unmuting local microphone and resuming remote audio.
|
|
37
|
+
*/
|
|
38
|
+
resume(): void;
|
|
39
|
+
/**
|
|
40
|
+
* Sets the volume for all audio elements.
|
|
41
|
+
* @param {number} volume - Volume level between 0.0 and 1.0
|
|
42
|
+
*/
|
|
43
|
+
setVolume(volume: number): void;
|
|
44
|
+
/**
|
|
45
|
+
* Registers client-side tools as RPC methods.
|
|
46
|
+
* @param {Array} tools - Array of tool objects
|
|
47
|
+
*/
|
|
48
|
+
registerTools(tools: any[]): void;
|
|
49
|
+
#private;
|
|
50
|
+
}
|
|
51
|
+
import { EventEmitter } from 'events';
|
|
52
|
+
import { Room } from 'livekit-client';
|
package/types/main.d.ts
CHANGED
|
@@ -6,17 +6,14 @@ export class HamsaVoiceAgent extends EventEmitter<[never]> {
|
|
|
6
6
|
* @param {string} apiKey - API key.
|
|
7
7
|
* @param {object} [config] - Optional config.
|
|
8
8
|
* @param {string} [config.API_URL="https://api.tryhamsa.com"] - API URL.
|
|
9
|
-
* @param {string} [config.WS_URL="wss://bots.tryhamsa.com/stream"] - WebSocket URL.
|
|
10
9
|
*/
|
|
11
|
-
constructor(apiKey: string, { API_URL,
|
|
10
|
+
constructor(apiKey: string, { API_URL, }?: {
|
|
12
11
|
API_URL?: string;
|
|
13
|
-
WS_URL?: string;
|
|
14
12
|
});
|
|
15
|
-
|
|
13
|
+
liveKitManager: LiveKitManager;
|
|
16
14
|
apiKey: string;
|
|
17
15
|
API_URL: string;
|
|
18
|
-
|
|
19
|
-
jobId: string;
|
|
16
|
+
jobId: any;
|
|
20
17
|
wakeLockManager: ScreenWakeLock;
|
|
21
18
|
/**
|
|
22
19
|
* Sets the volume for the audio playback.
|
|
@@ -27,7 +24,7 @@ export class HamsaVoiceAgent extends EventEmitter<[never]> {
|
|
|
27
24
|
* Starts a new voice agent call.
|
|
28
25
|
* @param {object} options - Configuration options for the call.
|
|
29
26
|
*/
|
|
30
|
-
start({ agentId, params, voiceEnablement, tools }: object): Promise<void>;
|
|
27
|
+
start({ agentId, params, voiceEnablement, tools, }: object): Promise<void>;
|
|
31
28
|
/**
|
|
32
29
|
* Ends the current voice agent call.
|
|
33
30
|
*/
|
|
@@ -51,6 +48,6 @@ export class HamsaVoiceAgent extends EventEmitter<[never]> {
|
|
|
51
48
|
getJobDetails(maxRetries?: number, initialRetryInterval?: number, backoffFactor?: number): Promise<any>;
|
|
52
49
|
#private;
|
|
53
50
|
}
|
|
54
|
-
import { EventEmitter } from
|
|
55
|
-
import
|
|
51
|
+
import { EventEmitter } from "events";
|
|
52
|
+
import LiveKitManager from "./classes/livekit_manager";
|
|
56
53
|
import ScreenWakeLock from "./classes/screen_wake_lock";
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
export default class AudioPlayer {
|
|
2
|
-
/**
|
|
3
|
-
* Constructs a new AudioPlayer instance.
|
|
4
|
-
* @param {WebSocket} ws - The WebSocket connection.
|
|
5
|
-
* @param {function} onSpeaking - Callback when speaking starts.
|
|
6
|
-
* @param {function} onListening - Callback when listening starts.
|
|
7
|
-
* @param {function} onStreamReady - Callback when MediaStream is ready.
|
|
8
|
-
*/
|
|
9
|
-
constructor(ws: WebSocket, onSpeaking: Function, onListening: Function, onStreamReady: Function);
|
|
10
|
-
audioContext: any;
|
|
11
|
-
ws: WebSocket;
|
|
12
|
-
isPaused: boolean;
|
|
13
|
-
onSpeakingCB: Function;
|
|
14
|
-
onListeningCB: Function;
|
|
15
|
-
isPlaying: boolean;
|
|
16
|
-
gainNode: any;
|
|
17
|
-
mediaStreamDestination: any;
|
|
18
|
-
onStreamReady: Function;
|
|
19
|
-
/**
|
|
20
|
-
* Initializes the AudioWorklet and sets up the processor.
|
|
21
|
-
*/
|
|
22
|
-
initAudioWorklet(): Promise<void>;
|
|
23
|
-
processor: AudioWorkletNode;
|
|
24
|
-
/**
|
|
25
|
-
* Enqueues audio data to be played.
|
|
26
|
-
* @param {string} base64Data - Base64 encoded PCM16 audio data.
|
|
27
|
-
*/
|
|
28
|
-
enqueueAudio(base64Data: string): void;
|
|
29
|
-
/**
|
|
30
|
-
* Pauses audio playback.
|
|
31
|
-
*/
|
|
32
|
-
pause(): void;
|
|
33
|
-
/**
|
|
34
|
-
* Resumes audio playback.
|
|
35
|
-
*/
|
|
36
|
-
resume(): void;
|
|
37
|
-
/**
|
|
38
|
-
* Stops audio playback and clears the buffer.
|
|
39
|
-
*/
|
|
40
|
-
stopAndClear(): void;
|
|
41
|
-
/**
|
|
42
|
-
* Adds a mark to the audio stream.
|
|
43
|
-
* @param {string} markName - Name of the mark.
|
|
44
|
-
*/
|
|
45
|
-
addMark(markName: string): void;
|
|
46
|
-
/**
|
|
47
|
-
* Converts PCM16 data to Float32.
|
|
48
|
-
* @param {Uint8Array} pcm16Array - PCM16 audio data.
|
|
49
|
-
* @returns {Float32Array} Float32 audio samples.
|
|
50
|
-
*/
|
|
51
|
-
pcm16ToFloat32(pcm16Array: Uint8Array): Float32Array;
|
|
52
|
-
/**
|
|
53
|
-
* Updates the playing state and triggers callbacks.
|
|
54
|
-
* @param {boolean} isPlaying - Indicates whether audio is playing.
|
|
55
|
-
*/
|
|
56
|
-
updatePlayingState(isPlaying: boolean): void;
|
|
57
|
-
/**
|
|
58
|
-
* Sets the volume of the audio playback.
|
|
59
|
-
* @param {number} volume - Volume level between 0.0 and 1.0.
|
|
60
|
-
*/
|
|
61
|
-
setVolume(volume: number): void;
|
|
62
|
-
/**
|
|
63
|
-
* Returns the MediaStream capturing the audio being played.
|
|
64
|
-
* @returns {MediaStream}
|
|
65
|
-
*/
|
|
66
|
-
getMediaStream(): MediaStream;
|
|
67
|
-
}
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
export default class AudioRecorder {
|
|
2
|
-
/**
|
|
3
|
-
* Constructs a new AudioRecorder instance.
|
|
4
|
-
* @param {function} onStreamReady - Callback invoked when MediaStream is ready.
|
|
5
|
-
*/
|
|
6
|
-
constructor(onStreamReady: Function);
|
|
7
|
-
audioContext: any;
|
|
8
|
-
mediaStreamSource: any;
|
|
9
|
-
audioWorkletNode: AudioWorkletNode;
|
|
10
|
-
mediaStream: MediaStream;
|
|
11
|
-
isPaused: boolean;
|
|
12
|
-
mediaStreamDestination: any;
|
|
13
|
-
onStreamReady: Function;
|
|
14
|
-
/**
|
|
15
|
-
* Starts streaming audio by capturing from the microphone and sending it over WebSocket.
|
|
16
|
-
* @param {WebSocket} ws - The WebSocket connection.
|
|
17
|
-
* @returns {Promise<void>}
|
|
18
|
-
*/
|
|
19
|
-
startStreaming(ws: WebSocket): Promise<void>;
|
|
20
|
-
/**
|
|
21
|
-
* Pauses audio streaming.
|
|
22
|
-
*/
|
|
23
|
-
pause(): void;
|
|
24
|
-
/**
|
|
25
|
-
* Resumes audio streaming.
|
|
26
|
-
*/
|
|
27
|
-
resume(): void;
|
|
28
|
-
/**
|
|
29
|
-
* Stops audio streaming and releases resources.
|
|
30
|
-
*/
|
|
31
|
-
stop(): void;
|
|
32
|
-
/**
|
|
33
|
-
* Returns the MediaStream capturing the local audio being recorded.
|
|
34
|
-
* @returns {MediaStream | null}
|
|
35
|
-
*/
|
|
36
|
-
getMediaStream(): MediaStream | null;
|
|
37
|
-
}
|
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
export default class WebSocketManager {
|
|
2
|
-
/**
|
|
3
|
-
* Constructs a new WebSocketManager instance.
|
|
4
|
-
* @param {string} url - The WebSocket URL.
|
|
5
|
-
* @param {string} conversationId - The conversation ID.
|
|
6
|
-
* @param {function} onError - Callback for error events.
|
|
7
|
-
* @param {function} onStart - Callback when the WebSocket starts.
|
|
8
|
-
* @param {function} onTranscriptionReceived - Callback for received transcriptions.
|
|
9
|
-
* @param {function} onAnswerReceived - Callback for received answers.
|
|
10
|
-
* @param {function} onSpeaking - Callback when speaking starts.
|
|
11
|
-
* @param {function} onListening - Callback when listening starts.
|
|
12
|
-
* @param {function} onClosed - Callback when the WebSocket is closed.
|
|
13
|
-
* @param {boolean} voiceEnablement - Flag to enable voice features.
|
|
14
|
-
* @param {Array} tools - Array of tools/functions to be used.
|
|
15
|
-
* @param {string} apiKey - API key for authentication.
|
|
16
|
-
* @param {function} onRemoteStreamAvailable - Callback when remote MediaStream is available.
|
|
17
|
-
* @param {function} onLocalStreamAvailable - Callback when local MediaStream is available.
|
|
18
|
-
* @param {function} onInfo - Callback for info events.
|
|
19
|
-
*/
|
|
20
|
-
constructor(url: string, conversationId: string, onError: Function, onStart: Function, onTranscriptionReceived: Function, onAnswerReceived: Function, onSpeaking: Function, onListening: Function, onClosed: Function, voiceEnablement: boolean, tools: any[], apiKey: string, onRemoteStreamAvailable: Function, onLocalStreamAvailable: Function, onInfo: Function);
|
|
21
|
-
url: string;
|
|
22
|
-
ws: WebSocket;
|
|
23
|
-
isConnected: boolean;
|
|
24
|
-
audioPlayer: AudioPlayer;
|
|
25
|
-
audioRecorder: AudioRecorder;
|
|
26
|
-
last_transcription_date: Date;
|
|
27
|
-
last_voice_byte_date: Date;
|
|
28
|
-
is_media: boolean;
|
|
29
|
-
onErrorCB: Function;
|
|
30
|
-
onStartCB: Function;
|
|
31
|
-
onTranscriptionReceivedCB: Function;
|
|
32
|
-
onAnswerReceivedCB: Function;
|
|
33
|
-
onSpeakingCB: Function;
|
|
34
|
-
onListeningCB: Function;
|
|
35
|
-
onClosedCB: Function;
|
|
36
|
-
voiceEnablement: boolean;
|
|
37
|
-
tools: any[];
|
|
38
|
-
apiKey: string;
|
|
39
|
-
onRemoteStreamAvailable: Function;
|
|
40
|
-
onLocalStreamAvailable: Function;
|
|
41
|
-
onInfoCB: Function;
|
|
42
|
-
/**
|
|
43
|
-
* Sets the volume for AudioPlayer.
|
|
44
|
-
* @param {number} volume - Volume level between 0.0 and 1.0.
|
|
45
|
-
*/
|
|
46
|
-
setVolume(volume: number): void;
|
|
47
|
-
/**
|
|
48
|
-
* Initializes and starts the WebSocket connection, AudioPlayer, and AudioRecorder.
|
|
49
|
-
*/
|
|
50
|
-
startCall(): void;
|
|
51
|
-
/**
|
|
52
|
-
* Handles the WebSocket 'open' event.
|
|
53
|
-
*/
|
|
54
|
-
onOpen(): void;
|
|
55
|
-
/**
|
|
56
|
-
* Handles incoming WebSocket messages.
|
|
57
|
-
* @param {MessageEvent} event - The message event.
|
|
58
|
-
*/
|
|
59
|
-
onMessage(event: MessageEvent): void;
|
|
60
|
-
/**
|
|
61
|
-
* Handles the WebSocket 'close' event.
|
|
62
|
-
* @param {CloseEvent} event - The close event.
|
|
63
|
-
*/
|
|
64
|
-
onClose(event: CloseEvent): void;
|
|
65
|
-
/**
|
|
66
|
-
* Handles the WebSocket 'error' event.
|
|
67
|
-
* @param {Event} error - The error event.
|
|
68
|
-
*/
|
|
69
|
-
onError(error: Event): void;
|
|
70
|
-
/**
|
|
71
|
-
* Ends the WebSocket call, stops AudioPlayer and AudioRecorder.
|
|
72
|
-
*/
|
|
73
|
-
endCall(): void;
|
|
74
|
-
/**
|
|
75
|
-
* Pauses the WebSocket call by pausing AudioPlayer and AudioRecorder.
|
|
76
|
-
*/
|
|
77
|
-
pauseCall(): void;
|
|
78
|
-
/**
|
|
79
|
-
* Resumes the WebSocket call by resuming AudioPlayer and AudioRecorder.
|
|
80
|
-
*/
|
|
81
|
-
resumeCall(): void;
|
|
82
|
-
/**
|
|
83
|
-
* Executes the provided tool functions, supporting both synchronous and asynchronous calls.
|
|
84
|
-
*
|
|
85
|
-
* This function iterates over the given array of tool objects. For each tool of type
|
|
86
|
-
* "function", it attempts to locate the corresponding function by its name in the tools list.
|
|
87
|
-
* The function arguments are parsed from a JSON string and passed to the function.
|
|
88
|
-
* If the function executes successfully, its response is captured; otherwise, an error
|
|
89
|
-
* message is returned. The results are returned in the same order as the input array.
|
|
90
|
-
*
|
|
91
|
-
* @param {Array} tools_array - An array of tool objects to execute. Each object should
|
|
92
|
-
* have the following structure:
|
|
93
|
-
* {
|
|
94
|
-
* id: <unique identifier>,
|
|
95
|
-
* type: "function",
|
|
96
|
-
* function: {
|
|
97
|
-
* name: <string>, // The function's name.
|
|
98
|
-
* arguments: <string> // A JSON string representing the function arguments.
|
|
99
|
-
* }
|
|
100
|
-
* }
|
|
101
|
-
* @returns {Promise<Array>} A promise that resolves to an array of results. Each result object has
|
|
102
|
-
* the structure:
|
|
103
|
-
* {
|
|
104
|
-
* id: <tool id>,
|
|
105
|
-
* function: {
|
|
106
|
-
* name: <function name>,
|
|
107
|
-
* response: <result of the function call or an error message>
|
|
108
|
-
* }
|
|
109
|
-
* }
|
|
110
|
-
*/
|
|
111
|
-
run_tools(tools_array: any[]): Promise<any[]>;
|
|
112
|
-
#private;
|
|
113
|
-
}
|
|
114
|
-
import AudioPlayer from './audio_player';
|
|
115
|
-
import AudioRecorder from './audio_recorder';
|