sarvam-conv-ai-sdk 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +265 -147
- package/dist/package.json +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,207 +1,325 @@
|
|
|
1
|
-
# Sarvam
|
|
1
|
+
# Sarvam Conv AI SDK
|
|
2
2
|
|
|
3
|
-
TypeScript SDK for building real-time voice-to-voice conversational AI applications in
|
|
3
|
+
TypeScript SDK for building real-time voice-to-voice and text-based conversational AI applications in browser and Node.js environments.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- Real-time voice-to-voice conversations in the browser
|
|
8
|
+
- Text-based chat with streaming responses
|
|
9
|
+
- Automatic microphone capture and speaker playback
|
|
10
|
+
- Multi-language support (11 Indian languages + English)
|
|
11
|
+
- WebSocket-based real-time communication
|
|
12
|
+
- Browser and Node.js compatible
|
|
4
13
|
|
|
5
14
|
## Installation
|
|
6
15
|
|
|
7
16
|
```bash
|
|
8
|
-
npm install
|
|
17
|
+
npm install sarvam-conv-ai-sdk
|
|
9
18
|
```
|
|
10
19
|
|
|
11
|
-
For
|
|
20
|
+
For Node.js, install `ws` peer dependency:
|
|
21
|
+
|
|
12
22
|
```bash
|
|
13
|
-
npm install
|
|
14
|
-
npm install --save-dev @types/react @types/react-dom
|
|
23
|
+
npm install sarvam-conv-ai-sdk ws
|
|
15
24
|
```
|
|
16
25
|
|
|
17
26
|
## Quick Start
|
|
18
27
|
|
|
19
|
-
|
|
28
|
+
### Voice-to-Voice Conversation (Browser)
|
|
20
29
|
|
|
21
|
-
```
|
|
22
|
-
import React, {
|
|
30
|
+
```typescript
|
|
31
|
+
import React, { useRef, useState } from 'react';
|
|
23
32
|
import {
|
|
24
|
-
|
|
33
|
+
ConversationAgent,
|
|
25
34
|
BrowserAudioInterface,
|
|
26
|
-
InteractionConfig,
|
|
27
35
|
InteractionType,
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
} from '@sarvam/conv-ai-sdk';
|
|
31
|
-
|
|
32
|
-
interface VoiceChatProps {
|
|
33
|
-
apiKey: string;
|
|
34
|
-
appId: string;
|
|
35
|
-
orgId: string;
|
|
36
|
-
workspaceId: string;
|
|
37
|
-
userId?: string;
|
|
38
|
-
}
|
|
36
|
+
ServerTextMsgType,
|
|
37
|
+
} from 'sarvam-conv-ai-sdk';
|
|
39
38
|
|
|
40
|
-
|
|
41
|
-
apiKey,
|
|
42
|
-
appId,
|
|
43
|
-
orgId,
|
|
44
|
-
workspaceId,
|
|
45
|
-
userId = 'react_user',
|
|
46
|
-
}) => {
|
|
39
|
+
function VoiceChat() {
|
|
47
40
|
const [isConnected, setIsConnected] = useState(false);
|
|
48
|
-
const [
|
|
49
|
-
const
|
|
50
|
-
const agentRef = useRef<AsyncSamvaadAgent | null>(null);
|
|
51
|
-
|
|
52
|
-
const handleText = async (msg: ServerTextChunkMsg) => {
|
|
53
|
-
setTranscript(prev => prev + msg.text);
|
|
54
|
-
};
|
|
41
|
+
const [transcript, setTranscript] = useState('');
|
|
42
|
+
const agentRef = useRef<ConversationAgent | null>(null);
|
|
55
43
|
|
|
56
44
|
const startConversation = async () => {
|
|
57
|
-
if (isConnecting || isConnected) return;
|
|
58
|
-
|
|
59
|
-
setIsConnecting(true);
|
|
60
|
-
|
|
61
45
|
try {
|
|
62
|
-
const config: InteractionConfig = {
|
|
63
|
-
user_identifier_type: UserIdentifierType.CUSTOM,
|
|
64
|
-
user_identifier: userId,
|
|
65
|
-
org_id: orgId,
|
|
66
|
-
workspace_id: workspaceId,
|
|
67
|
-
app_id: appId,
|
|
68
|
-
interaction_type: InteractionType.CALL,
|
|
69
|
-
sample_rate: 16000,
|
|
70
|
-
};
|
|
71
|
-
|
|
72
46
|
const audioInterface = new BrowserAudioInterface();
|
|
73
47
|
|
|
74
|
-
const agent = new
|
|
75
|
-
apiKey,
|
|
76
|
-
|
|
77
|
-
|
|
48
|
+
const agent = new ConversationAgent({
|
|
49
|
+
apiKey: 'your_api_key',
|
|
50
|
+
platform: 'browser',
|
|
51
|
+
config: {
|
|
52
|
+
user_identifier_type: 'custom',
|
|
53
|
+
user_identifier: 'user123',
|
|
54
|
+
org_id: 'your_org_id',
|
|
55
|
+
workspace_id: 'your_workspace_id',
|
|
56
|
+
app_id: 'your_app_id',
|
|
57
|
+
interaction_type: InteractionType.CALL,
|
|
58
|
+
sample_rate: 16000,
|
|
59
|
+
},
|
|
78
60
|
audioInterface,
|
|
61
|
+
textCallback: async (msg: ServerTextMsgType) => {
|
|
62
|
+
setTranscript(prev => prev + msg.text);
|
|
63
|
+
},
|
|
64
|
+
startCallback: async () => {
|
|
65
|
+
setIsConnected(true);
|
|
66
|
+
},
|
|
67
|
+
endCallback: async () => {
|
|
68
|
+
setIsConnected(false);
|
|
69
|
+
},
|
|
79
70
|
});
|
|
80
71
|
|
|
81
72
|
agentRef.current = agent;
|
|
82
|
-
|
|
83
73
|
await agent.start();
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if (connected) {
|
|
87
|
-
setIsConnected(true);
|
|
88
|
-
setIsConnecting(false);
|
|
89
|
-
|
|
90
|
-
agent.waitForDisconnect().then(() => {
|
|
91
|
-
setIsConnected(false);
|
|
92
|
-
});
|
|
93
|
-
} else {
|
|
94
|
-
setIsConnecting(false);
|
|
95
|
-
alert('Failed to connect. Please check your credentials.');
|
|
96
|
-
}
|
|
74
|
+
await agent.waitForConnect(10);
|
|
97
75
|
} catch (error) {
|
|
98
|
-
|
|
99
|
-
console.error('Error starting conversation:', error);
|
|
100
|
-
alert(`Error: ${error instanceof Error ? error.message : String(error)}`);
|
|
76
|
+
console.error('Error:', error);
|
|
101
77
|
}
|
|
102
78
|
};
|
|
103
79
|
|
|
104
80
|
const stopConversation = async () => {
|
|
105
81
|
if (agentRef.current) {
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
agentRef.current = null;
|
|
109
|
-
setIsConnected(false);
|
|
110
|
-
setTranscript('');
|
|
111
|
-
} catch (error) {
|
|
112
|
-
console.error('Error stopping conversation:', error);
|
|
113
|
-
}
|
|
82
|
+
await agentRef.current.stop();
|
|
83
|
+
agentRef.current = null;
|
|
114
84
|
}
|
|
115
85
|
};
|
|
116
86
|
|
|
117
|
-
useEffect(() => {
|
|
118
|
-
return () => {
|
|
119
|
-
if (agentRef.current) {
|
|
120
|
-
agentRef.current.stop().catch(console.error);
|
|
121
|
-
}
|
|
122
|
-
};
|
|
123
|
-
}, []);
|
|
124
|
-
|
|
125
87
|
return (
|
|
126
|
-
<div
|
|
88
|
+
<div>
|
|
127
89
|
<h2>Voice Chat</h2>
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
Start Voice Chat
|
|
133
|
-
</button>
|
|
134
|
-
)}
|
|
135
|
-
|
|
136
|
-
{isConnecting && <div>Connecting...</div>}
|
|
137
|
-
|
|
138
|
-
{isConnected && (
|
|
139
|
-
<button onClick={stopConversation}>
|
|
140
|
-
Stop Voice Chat
|
|
141
|
-
</button>
|
|
142
|
-
)}
|
|
143
|
-
</div>
|
|
144
|
-
|
|
145
|
-
{isConnected && (
|
|
146
|
-
<div>
|
|
147
|
-
<strong>Agent Response:</strong>
|
|
148
|
-
<div>{transcript || 'Listening...'}</div>
|
|
149
|
-
</div>
|
|
90
|
+
{!isConnected ? (
|
|
91
|
+
<button onClick={startConversation}>Start Voice Chat</button>
|
|
92
|
+
) : (
|
|
93
|
+
<button onClick={stopConversation}>Stop Voice Chat</button>
|
|
150
94
|
)}
|
|
95
|
+
<div>Transcript: {transcript}</div>
|
|
151
96
|
</div>
|
|
152
97
|
);
|
|
153
|
-
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export default VoiceChat;
|
|
154
101
|
```
|
|
155
102
|
|
|
156
|
-
|
|
103
|
+
### Text-Based Conversation (Node.js)
|
|
157
104
|
|
|
158
|
-
|
|
105
|
+
```javascript
|
|
106
|
+
const { ConversationAgent, InteractionType } = require('sarvam-conv-ai-sdk');
|
|
107
|
+
|
|
108
|
+
async function main() {
|
|
109
|
+
const agent = new ConversationAgent({
|
|
110
|
+
apiKey: 'your_api_key',
|
|
111
|
+
config: {
|
|
112
|
+
org_id: 'your_org_id',
|
|
113
|
+
workspace_id: 'your_workspace_id',
|
|
114
|
+
app_id: 'your_app_id',
|
|
115
|
+
user_identifier: 'user@example.com',
|
|
116
|
+
user_identifier_type: 'email',
|
|
117
|
+
interaction_type: InteractionType.TEXT,
|
|
118
|
+
sample_rate: 16000,
|
|
119
|
+
},
|
|
120
|
+
textCallback: async (msg) => {
|
|
121
|
+
console.log('Agent:', msg.text);
|
|
122
|
+
},
|
|
123
|
+
startCallback: async () => {
|
|
124
|
+
console.log('Conversation started!');
|
|
125
|
+
},
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
await agent.start();
|
|
129
|
+
const connected = await agent.waitForConnect(10);
|
|
130
|
+
|
|
131
|
+
if (connected) {
|
|
132
|
+
await agent.sendText('Hello, how are you?');
|
|
133
|
+
await agent.waitForDisconnect();
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
main().catch(console.error);
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## API Reference
|
|
141
|
+
|
|
142
|
+
### ConversationAgent
|
|
143
|
+
|
|
144
|
+
The main class for managing conversational AI sessions.
|
|
145
|
+
|
|
146
|
+
#### Constructor Parameters
|
|
159
147
|
|
|
160
148
|
| Parameter | Type | Required | Description |
|
|
161
149
|
| --- | --- | --- | --- |
|
|
162
150
|
| apiKey | string | Yes | API key for authentication |
|
|
163
151
|
| config | InteractionConfig | Yes | Interaction configuration |
|
|
164
|
-
|
|
|
165
|
-
|
|
|
166
|
-
|
|
|
167
|
-
| audioCallback | (msg: ServerAudioChunkMsg) => Promise
|
|
152
|
+
| platform | 'browser' \| 'node' | No | Platform type (auto-detected) |
|
|
153
|
+
| audioInterface | AsyncAudioInterface | No | Audio interface for voice interactions |
|
|
154
|
+
| textCallback | (msg: ServerTextMsgType) => Promise\<void\> | No | Receives streaming text chunks |
|
|
155
|
+
| audioCallback | (msg: ServerAudioChunkMsg) => Promise\<void\> | No | Receives audio chunks |
|
|
156
|
+
| eventCallback | (event: ServerEventBase) => Promise\<void\> | No | Receives events |
|
|
157
|
+
| startCallback | () => Promise\<void\> | No | Called when conversation starts |
|
|
158
|
+
| endCallback | () => Promise\<void\> | No | Called when conversation ends |
|
|
159
|
+
| baseUrl | string | No | Override base URL |
|
|
160
|
+
| proxy | boolean | No | Enable proxy mode |
|
|
161
|
+
|
|
162
|
+
#### Methods
|
|
163
|
+
|
|
164
|
+
- `async start()` - Start the conversation session
|
|
165
|
+
- `async stop()` - Stop the conversation and cleanup
|
|
166
|
+
- `async waitForConnect(timeout?)` - Wait for connection (returns boolean)
|
|
167
|
+
- `async waitForDisconnect()` - Wait until disconnected
|
|
168
|
+
- `isConnected()` - Check connection status
|
|
169
|
+
- `getInteractionId()` - Get current interaction ID
|
|
170
|
+
- `async sendAudio(audioData)` - Send raw audio (voice mode only)
|
|
171
|
+
- `async sendText(text)` - Send text message (text mode only)
|
|
172
|
+
- `getAgentType()` - Get agent type ('voice' or 'text')
|
|
168
173
|
|
|
169
174
|
### InteractionConfig
|
|
170
175
|
|
|
171
|
-
Required
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
176
|
+
#### Required Fields
|
|
177
|
+
|
|
178
|
+
| Field | Type | Description |
|
|
179
|
+
| --- | --- | --- |
|
|
180
|
+
| user_identifier_type | string | One of: 'custom', 'email', 'phone_number', 'unknown' |
|
|
181
|
+
| user_identifier | string | User identifier value |
|
|
182
|
+
| org_id | string | Your organization ID |
|
|
183
|
+
| workspace_id | string | Your workspace ID |
|
|
184
|
+
| app_id | string | The target application ID |
|
|
185
|
+
| interaction_type | InteractionType | InteractionType.CALL or InteractionType.TEXT |
|
|
186
|
+
| sample_rate | number | Audio sample rate: 8000 or 16000 |
|
|
187
|
+
|
|
188
|
+
#### Optional Fields
|
|
189
|
+
|
|
190
|
+
| Field | Type | Description |
|
|
191
|
+
| --- | --- | --- |
|
|
192
|
+
| version | number | App version (uses latest if not provided) |
|
|
193
|
+
| agent_variables | Record\<string, any\> | Key-value pairs for agent context |
|
|
194
|
+
| initial_language_name | SarvamToolLanguageName | Starting language |
|
|
195
|
+
| initial_state_name | string | Starting state name |
|
|
196
|
+
| initial_bot_message | string | First message from agent |
|
|
197
|
+
|
|
198
|
+
### BrowserAudioInterface
|
|
199
|
+
|
|
200
|
+
Handles microphone capture and speaker playback in browser environments.
|
|
201
|
+
|
|
202
|
+
```typescript
|
|
203
|
+
import { BrowserAudioInterface } from 'sarvam-conv-ai-sdk';
|
|
204
|
+
|
|
205
|
+
const audioInterface = new BrowserAudioInterface();
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
**Features:**
|
|
209
|
+
- Automatic microphone access and audio capture
|
|
210
|
+
- Real-time audio streaming at 16kHz
|
|
211
|
+
- Automatic speaker playback
|
|
212
|
+
- Handles user interruptions
|
|
213
|
+
|
|
214
|
+
**Requirements:**
|
|
215
|
+
- HTTPS connection (required for microphone access)
|
|
216
|
+
- Modern browser with WebAudio API support
|
|
217
|
+
- User permission for microphone access
|
|
218
|
+
|
|
219
|
+
## Event Handling
|
|
220
|
+
|
|
221
|
+
### Text Callback
|
|
222
|
+
|
|
223
|
+
Receives streaming text chunks from the agent:
|
|
224
|
+
|
|
225
|
+
```typescript
|
|
226
|
+
textCallback: async (msg: ServerTextMsgType) => {
|
|
227
|
+
console.log('Agent says:', msg.text);
|
|
228
|
+
}
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### Event Callback
|
|
232
|
+
|
|
233
|
+
Receives various events during conversation:
|
|
234
|
+
|
|
235
|
+
```typescript
|
|
236
|
+
eventCallback: async (event: ServerEventBase) => {
|
|
237
|
+
switch (event.type) {
|
|
238
|
+
case 'server.action.interaction_connected':
|
|
239
|
+
console.log('Connected');
|
|
240
|
+
break;
|
|
241
|
+
case 'server.event.user_interrupt':
|
|
242
|
+
console.log('User interrupted');
|
|
243
|
+
break;
|
|
244
|
+
case 'server.action.interaction_end':
|
|
245
|
+
console.log('Conversation ended');
|
|
246
|
+
break;
|
|
247
|
+
case 'server.event.user_speech_start':
|
|
248
|
+
console.log('User started speaking');
|
|
249
|
+
break;
|
|
250
|
+
case 'server.event.user_speech_end':
|
|
251
|
+
console.log('User stopped speaking');
|
|
252
|
+
break;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
## Supported Languages
|
|
258
|
+
|
|
259
|
+
The SDK supports 11 Indian languages plus English:
|
|
260
|
+
|
|
261
|
+
```typescript
|
|
262
|
+
import { SarvamToolLanguageName } from 'sarvam-conv-ai-sdk';
|
|
263
|
+
|
|
264
|
+
// Available: BENGALI, GUJARATI, KANNADA, MALAYALAM, TAMIL,
|
|
265
|
+
// TELUGU, PUNJABI, ODIA, MARATHI, HINDI, ENGLISH
|
|
266
|
+
|
|
267
|
+
const config = {
|
|
268
|
+
initial_language_name: SarvamToolLanguageName.HINDI,
|
|
269
|
+
};
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## Best Practices
|
|
273
|
+
|
|
274
|
+
**Resource Cleanup:** Always cleanup resources when component unmounts
|
|
275
|
+
|
|
276
|
+
```typescript
|
|
277
|
+
useEffect(() => {
|
|
278
|
+
return () => agentRef.current?.stop().catch(console.error);
|
|
279
|
+
}, []);
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
**Connection Timeout:** Specify timeout when waiting for connection
|
|
283
|
+
|
|
284
|
+
```typescript
|
|
285
|
+
const connected = await agent.waitForConnect(10); // 10 seconds
|
|
286
|
+
if (!connected) console.error('Connection timeout');
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
**Error Handling:** Wrap agent operations in try-catch blocks
|
|
290
|
+
|
|
291
|
+
```typescript
|
|
292
|
+
try {
|
|
293
|
+
await agent.start();
|
|
294
|
+
await agent.waitForConnect(10);
|
|
295
|
+
} catch (error) {
|
|
296
|
+
console.error('Error:', error);
|
|
297
|
+
await agent.stop();
|
|
298
|
+
}
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
**Secure API Keys:** Use environment variables or backend proxy
|
|
302
|
+
|
|
303
|
+
```typescript
|
|
304
|
+
// Use environment variables
|
|
305
|
+
const apiKey = import.meta.env.VITE_SARVAM_API_KEY;
|
|
306
|
+
|
|
307
|
+
// Or use backend proxy
|
|
308
|
+
const agent = new ConversationAgent({ proxy: true, baseUrl: '/api/proxy/' });
|
|
309
|
+
```
|
|
194
310
|
|
|
195
311
|
## Examples
|
|
196
312
|
|
|
197
|
-
|
|
198
|
-
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
313
|
+
- **Web Example** - See `examples/web` for a complete React + TypeScript application
|
|
314
|
+
- **Node.js Example** - See `examples/nodejs/simple-text-chat.js` for a command-line text chat
|
|
315
|
+
|
|
316
|
+
## Troubleshooting
|
|
317
|
+
|
|
318
|
+
**Microphone Not Working:** Ensure HTTPS connection, check browser permissions, verify microphone is not in use by another app
|
|
319
|
+
|
|
320
|
+
**Connection Timeout:** Check network connectivity, verify API key is valid, ensure app_id exists and has a committed version
|
|
203
321
|
|
|
204
|
-
|
|
322
|
+
**Audio Quality Issues:** Verify sample rate matches configuration (8000 or 16000), ensure audio format is LINEAR16 (16-bit PCM mono)
|
|
205
323
|
|
|
206
324
|
## License
|
|
207
325
|
|
package/dist/package.json
CHANGED