@unith-ai/core-client 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +429 -0
- package/package.json +1 -1
package/README.md
ADDED
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
# Unith Core Client Typescript SDK
|
|
2
|
+
|
|
3
|
+
An SDK library for building complex digital human experiences using javascript/typescript.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Install the package in your project through package manager.
|
|
8
|
+
```shell
|
|
9
|
+
npm install @unith-ai/core-client
|
|
10
|
+
# or
|
|
11
|
+
yarn add @unith-ai/core-client
|
|
12
|
+
# or
|
|
13
|
+
pnpm install @unith-ai/core-client
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Usage
|
|
17
|
+
|
|
18
|
+
This library is designed for use in plain JavaScript applications or to serve as a foundation for framework-specific implementations. Before integrating it, verify if a dedicated library exists for your particular framework. That said, it's compatible with any project built on JavaScript.
|
|
19
|
+
|
|
20
|
+
### Initialize Digital Human
|
|
21
|
+
|
|
22
|
+
First, initialize the Conversation instance:
|
|
23
|
+
```js
|
|
24
|
+
const conversation = await Conversation.startDigitalHuman(options);
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
This will establish a WebSocket connection and initialize the digital human avatar with audio/video streaming capabilities.
|
|
28
|
+
|
|
29
|
+
#### Session Configuration
|
|
30
|
+
|
|
31
|
+
The options passed to `startDigitalHuman` specify how the session is established:
|
|
32
|
+
```js
|
|
33
|
+
const conversation = await Conversation.startDigitalHuman({
|
|
34
|
+
orgId: "your-org-id",
|
|
35
|
+
headId: "your-head-id",
|
|
36
|
+
username: "anonymous",
|
|
37
|
+
password: "Password1",
|
|
38
|
+
environment: "production", // or "development"
|
|
39
|
+
element: document.getElementById("video-container"), // HTML element for video output
|
|
40
|
+
apiKey: "your-api-key",
|
|
41
|
+
mode: "default",
|
|
42
|
+
frameRate: 30,
|
|
43
|
+
streamType: "jpg", // or "vp8"
|
|
44
|
+
quality: "high",
|
|
45
|
+
crop: false,
|
|
46
|
+
showIdle: false,
|
|
47
|
+
language: "en",
|
|
48
|
+
allowWakeLock: true,
|
|
49
|
+
fadeTransitionsType: VideoTransitionType.NONE,
|
|
50
|
+
});
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
#### Required Parameters
|
|
54
|
+
|
|
55
|
+
- **orgId** - Your organization ID
|
|
56
|
+
- **headId** - The digital human head ID to use
|
|
57
|
+
- **element** - HTML element where the video will be rendered
|
|
58
|
+
- **username** - Authentication username (default: "anonymous")
|
|
59
|
+
- **password** - Authentication password (default: "Password1")
|
|
60
|
+
|
|
61
|
+
#### Optional Parameters
|
|
62
|
+
|
|
63
|
+
- **environment** - API environment ("production" or "development", default: "production")
|
|
64
|
+
- **apiKey** - API key for authentication (default: "")
|
|
65
|
+
- **mode** - Conversation mode (default: "default")
|
|
66
|
+
- **frameRate** - Video frame rate (default: 30)
|
|
67
|
+
- **streamType** - Video stream format ("jpg" or "vp8", default: "jpg")
|
|
68
|
+
- **quality** - Video quality ("high", "medium", or "low", default: "high")
|
|
69
|
+
- **crop** - Whether to crop the video (default: false)
|
|
70
|
+
- **showIdle** - Whether to show idle video when not speaking (default: false)
|
|
71
|
+
- **language** - Language code for the conversation (default: browser language)
|
|
72
|
+
- **allowWakeLock** - Prevent screen from sleeping during conversation (default: true)
|
|
73
|
+
- **fadeTransitionsType** - Video transition type (default: VideoTransitionType.NONE)
|
|
74
|
+
|
|
75
|
+
#### Optional Callbacks
|
|
76
|
+
|
|
77
|
+
Register callbacks to handle various events:
|
|
78
|
+
```js
|
|
79
|
+
const conversation = await Conversation.startDigitalHuman({
|
|
80
|
+
// ... required options
|
|
81
|
+
onConnect: ({ userId, headInfo, microphoneAccess }) => {
|
|
82
|
+
console.log("Connected:", userId);
|
|
83
|
+
},
|
|
84
|
+
onDisconnect: (details) => {
|
|
85
|
+
console.log("Disconnected:", details.reason);
|
|
86
|
+
},
|
|
87
|
+
onStatusChange: ({ status }) => {
|
|
88
|
+
console.log("Status changed:", status); // "connecting", "connected", "disconnecting", "disconnected"
|
|
89
|
+
},
|
|
90
|
+
onText: (event) => {
|
|
91
|
+
console.log("User message:", event.text);
|
|
92
|
+
},
|
|
93
|
+
onResponse: (event) => {
|
|
94
|
+
console.log("AI response:", event.text);
|
|
95
|
+
},
|
|
96
|
+
onJoin: (event) => {
|
|
97
|
+
console.log("Joined conversation:", event);
|
|
98
|
+
},
|
|
99
|
+
onStreaming: (event) => {
|
|
100
|
+
console.log("Streaming event:", event.type); // "audio_frame", "video_frame", "metadata", "cache", "error"
|
|
101
|
+
},
|
|
102
|
+
onMuteStatusChange: ({ isMuted }) => {
|
|
103
|
+
console.log("Mute status:", isMuted);
|
|
104
|
+
},
|
|
105
|
+
onSpeakingStart: () => {
|
|
106
|
+
console.log("Digital human started speaking");
|
|
107
|
+
},
|
|
108
|
+
onSpeakingEnd: () => {
|
|
109
|
+
console.log("Digital human stopped speaking");
|
|
110
|
+
},
|
|
111
|
+
onStoppingEnd: () => {
|
|
112
|
+
console.log("Response stopped");
|
|
113
|
+
},
|
|
114
|
+
onTimeout: () => {
|
|
115
|
+
console.log("Session timed out");
|
|
116
|
+
},
|
|
117
|
+
onTimeoutWarning: () => {
|
|
118
|
+
console.log("Session will timeout soon");
|
|
119
|
+
},
|
|
120
|
+
onKeepSession: ({ granted }) => {
|
|
121
|
+
console.log("Keep session request:", granted);
|
|
122
|
+
},
|
|
123
|
+
onError: ({ message, endConversation, type }) => {
|
|
124
|
+
console.error("Error:", message);
|
|
125
|
+
// type: "toast" or "modal"
|
|
126
|
+
// endConversation: true if session should be restarted
|
|
127
|
+
},
|
|
128
|
+
});
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
#### Event Types
|
|
132
|
+
|
|
133
|
+
- **onConnect** - Called when the WebSocket connection is established
|
|
134
|
+
- **onDisconnect** - Called when the connection is closed
|
|
135
|
+
- **onStatusChange** - Called when connection status changes
|
|
136
|
+
- **onText** - Called when a user text message is received
|
|
137
|
+
- **onResponse** - Called when the AI generates a response
|
|
138
|
+
- **onJoin** - Called when successfully joining the conversation
|
|
139
|
+
- **onStreaming** - Called for audio/video frame events
|
|
140
|
+
- **onMuteStatusChange** - Called when mute status changes
|
|
141
|
+
- **onSpeakingStart** - Called when the digital human starts speaking
|
|
142
|
+
- **onSpeakingEnd** - Called when the digital human finishes speaking
|
|
143
|
+
- **onStoppingEnd** - Called when a response is manually stopped
|
|
144
|
+
- **onTimeout** - Called when the session times out due to inactivity
|
|
145
|
+
- **onTimeoutWarning** - Called before the session times out
|
|
146
|
+
- **onKeepSession** - Called when a keep-alive request is processed
|
|
147
|
+
- **onError** - Called when an error occurs
|
|
148
|
+
|
|
149
|
+
### Getting Background Video
|
|
150
|
+
|
|
151
|
+
Retrieve the idle background video URL for use in welcome screens or widget mode:
|
|
152
|
+
```js
|
|
153
|
+
const videoUrl = await Conversation.getBackgroundVideo({
|
|
154
|
+
orgId: "your-org-id",
|
|
155
|
+
headId: "your-head-id",
|
|
156
|
+
environment: "production",
|
|
157
|
+
});
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Instance Methods
|
|
161
|
+
|
|
162
|
+
#### startSession()
|
|
163
|
+
|
|
164
|
+
Start the conversation session and begin audio playback:
|
|
165
|
+
```js
|
|
166
|
+
await conversation.startSession();
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
This method should be called after user interaction to ensure audio context is properly initialized, especially on mobile browsers.
|
|
170
|
+
|
|
171
|
+
#### sendMessage(message)
|
|
172
|
+
|
|
173
|
+
Send a text message to the digital human:
|
|
174
|
+
```js
|
|
175
|
+
conversation.sendMessage({
|
|
176
|
+
id: 1,
|
|
177
|
+
timestamp: new Date().toISOString(),
|
|
178
|
+
speaker: "user",
|
|
179
|
+
text: "Hello, how are you?",
|
|
180
|
+
isSent: false,
|
|
181
|
+
user_id: "user-123",
|
|
182
|
+
username: "John Doe",
|
|
183
|
+
event: EventType.TEXT,
|
|
184
|
+
visible: true,
|
|
185
|
+
});
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
#### keepSession(message)
|
|
189
|
+
|
|
190
|
+
Send a keep-alive message to prevent session timeout:
|
|
191
|
+
```js
|
|
192
|
+
conversation.keepSession({
|
|
193
|
+
id: 1,
|
|
194
|
+
timestamp: new Date().toISOString(),
|
|
195
|
+
speaker: "user",
|
|
196
|
+
text: "",
|
|
197
|
+
isSent: false,
|
|
198
|
+
user_id: "user-123",
|
|
199
|
+
username: "John Doe",
|
|
200
|
+
event: EventType.KEEP_SESSION,
|
|
201
|
+
visible: true,
|
|
202
|
+
});
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
#### stopCurrentResponse()
|
|
206
|
+
|
|
207
|
+
Stop the current response from the digital human:
|
|
208
|
+
```js
|
|
209
|
+
conversation.stopCurrentResponse();
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
This clears both audio and video queues and returns the digital human to idle state.
|
|
213
|
+
|
|
214
|
+
#### toggleMuteStatus()
|
|
215
|
+
|
|
216
|
+
Toggle the mute status of the audio output:
|
|
217
|
+
```js
|
|
218
|
+
const volume = await conversation.toggleMuteStatus();
|
|
219
|
+
console.log("New volume:", volume); // 0 for muted, 1 for unmuted
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
#### getUserId()
|
|
223
|
+
|
|
224
|
+
Get the current user's ID:
|
|
225
|
+
```js
|
|
226
|
+
const userId = conversation.getUserId();
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
#### endSession()
|
|
230
|
+
|
|
231
|
+
End the conversation session and clean up resources:
|
|
232
|
+
```js
|
|
233
|
+
await conversation.endSession();
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
This closes the WebSocket connection, releases the wake lock, and destroys audio/video outputs.
|
|
237
|
+
|
|
238
|
+
#### initializeMicrophone()
|
|
239
|
+
|
|
240
|
+
Initialize microphone for speech recognition (ASR):
|
|
241
|
+
```js
|
|
242
|
+
const asrToken = await conversation.initializeMicrophone(
|
|
243
|
+
(result) => {
|
|
244
|
+
console.log("Speech result:", result);
|
|
245
|
+
},
|
|
246
|
+
(error) => {
|
|
247
|
+
console.error("Microphone error:", error);
|
|
248
|
+
},
|
|
249
|
+
(status) => {
|
|
250
|
+
console.log("Microphone status:", status);
|
|
251
|
+
}
|
|
252
|
+
);
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
### Message Structure
|
|
256
|
+
|
|
257
|
+
Messages sent to and from the digital human follow this structure:
|
|
258
|
+
```typescript
|
|
259
|
+
interface Message {
|
|
260
|
+
id: number;
|
|
261
|
+
timestamp: string; // ISO format
|
|
262
|
+
speaker: "user" | "backend";
|
|
263
|
+
text: string;
|
|
264
|
+
isSent: boolean;
|
|
265
|
+
user_id: string;
|
|
266
|
+
username: string;
|
|
267
|
+
event: EventType.TEXT | EventType.KEEP_SESSION;
|
|
268
|
+
visible: boolean;
|
|
269
|
+
session_id?: string; // Auto-generated
|
|
270
|
+
}
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### Event Types
|
|
274
|
+
|
|
275
|
+
The SDK defines the following event types:
|
|
276
|
+
```typescript
|
|
277
|
+
enum EventType {
|
|
278
|
+
TEXT = "text",
|
|
279
|
+
RESPONSE = "response",
|
|
280
|
+
JOIN = "join",
|
|
281
|
+
STREAMING = "streaming",
|
|
282
|
+
BINARY = "binary",
|
|
283
|
+
TIMEOUT_WARNING = "timeout_warning",
|
|
284
|
+
TIME_OUT = "timeout",
|
|
285
|
+
KEEP_SESSION = "keep_session",
|
|
286
|
+
}
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
### Streaming Event Types
|
|
290
|
+
|
|
291
|
+
Streaming events can have the following types:
|
|
292
|
+
|
|
293
|
+
- **audio_frame** - Audio data for playback
|
|
294
|
+
- **video_frame** - Video frame data
|
|
295
|
+
- **metadata** - Stream control metadata (start/end)
|
|
296
|
+
- **cache** - Cached video response
|
|
297
|
+
- **error** - Streaming error occurred
|
|
298
|
+
|
|
299
|
+
### Error Handling
|
|
300
|
+
|
|
301
|
+
Always handle errors appropriately:
|
|
302
|
+
```js
|
|
303
|
+
try {
|
|
304
|
+
const conversation = await Conversation.startDigitalHuman({
|
|
305
|
+
orgId: "your-org-id",
|
|
306
|
+
headId: "your-head-id",
|
|
307
|
+
element: videoElement,
|
|
308
|
+
onError: ({ message, endConversation, type }) => {
|
|
309
|
+
if (type === "toast") {
|
|
310
|
+
// Show toast notification
|
|
311
|
+
showToast(message);
|
|
312
|
+
if (endConversation) {
|
|
313
|
+
// Restart the session
|
|
314
|
+
restartSession();
|
|
315
|
+
}
|
|
316
|
+
} else if (type === "modal") {
|
|
317
|
+
// Show modal dialog
|
|
318
|
+
showModal(message);
|
|
319
|
+
}
|
|
320
|
+
},
|
|
321
|
+
});
|
|
322
|
+
} catch (error) {
|
|
323
|
+
console.error("Failed to start digital human:", error);
|
|
324
|
+
}
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
### Common Error Types
|
|
328
|
+
|
|
329
|
+
The SDK handles several error scenarios:
|
|
330
|
+
|
|
331
|
+
- **resource_exhausted** - Server at capacity
|
|
332
|
+
- **deadline_exceeded** - Request timeout
|
|
333
|
+
- **inactivity_timeout** - Session inactive for too long
|
|
334
|
+
- **connection** - WebSocket connection failed
|
|
335
|
+
|
|
336
|
+
## Framework Integration Examples
|
|
337
|
+
|
|
338
|
+
### React/Preact Example
|
|
339
|
+
```jsx
|
|
340
|
+
import { useEffect, useRef, useState } from "react";
|
|
341
|
+
import { Conversation, EventType } from "@unith-ai/core-client";
|
|
342
|
+
|
|
343
|
+
function DigitalHuman() {
|
|
344
|
+
const videoRef = useRef(null);
|
|
345
|
+
const conversationRef = useRef(null);
|
|
346
|
+
const [status, setStatus] = useState("disconnected");
|
|
347
|
+
const [messages, setMessages] = useState([]);
|
|
348
|
+
|
|
349
|
+
useEffect(() => {
|
|
350
|
+
const startConversation = async () => {
|
|
351
|
+
try {
|
|
352
|
+
const conversation = await Conversation.startDigitalHuman({
|
|
353
|
+
orgId: "your-org-id",
|
|
354
|
+
headId: "your-head-id",
|
|
355
|
+
element: videoRef.current,
|
|
356
|
+
onStatusChange: ({ status }) => setStatus(status),
|
|
357
|
+
onText: (event) => {
|
|
358
|
+
setMessages((prev) => [...prev, event]);
|
|
359
|
+
},
|
|
360
|
+
onResponse: (event) => {
|
|
361
|
+
setMessages((prev) => [...prev, event]);
|
|
362
|
+
},
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
conversationRef.current = conversation;
|
|
366
|
+
await conversation.startSession();
|
|
367
|
+
} catch (error) {
|
|
368
|
+
console.error("Failed to start:", error);
|
|
369
|
+
}
|
|
370
|
+
};
|
|
371
|
+
|
|
372
|
+
startConversation();
|
|
373
|
+
|
|
374
|
+
return () => {
|
|
375
|
+
conversationRef.current?.endSession();
|
|
376
|
+
};
|
|
377
|
+
}, []);
|
|
378
|
+
|
|
379
|
+
const sendMessage = async (text) => {
|
|
380
|
+
if (!conversationRef.current) return;
|
|
381
|
+
|
|
382
|
+
await conversationRef.current.sendMessage({
|
|
383
|
+
id: messages.length,
|
|
384
|
+
timestamp: new Date().toISOString(),
|
|
385
|
+
speaker: "user",
|
|
386
|
+
text,
|
|
387
|
+
isSent: false,
|
|
388
|
+
user_id: conversationRef.current.getUserId(),
|
|
389
|
+
username: "User",
|
|
390
|
+
event: EventType.TEXT,
|
|
391
|
+
visible: true,
|
|
392
|
+
});
|
|
393
|
+
};
|
|
394
|
+
|
|
395
|
+
return (
|
|
396
|
+
<div>
|
|
397
|
+
<div ref={videoRef} style={{ width: "100%", height: "600px" }} />
|
|
398
|
+
<div>Status: {status}</div>
|
|
399
|
+
<button onClick={() => sendMessage("Hello!")}>Send Message</button>
|
|
400
|
+
</div>
|
|
401
|
+
);
|
|
402
|
+
}
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
## TypeScript Support
|
|
406
|
+
|
|
407
|
+
Full TypeScript types are included:
|
|
408
|
+
```typescript
|
|
409
|
+
import {
|
|
410
|
+
Conversation,
|
|
411
|
+
EventType,
|
|
412
|
+
Status,
|
|
413
|
+
HeadType,
|
|
414
|
+
VideoTransitionType,
|
|
415
|
+
type ConversationOptions,
|
|
416
|
+
type Message,
|
|
417
|
+
type IncomingSocketEvent,
|
|
418
|
+
} from "@unith-ai/core-client";
|
|
419
|
+
```
|
|
420
|
+
|
|
421
|
+
## Development
|
|
422
|
+
|
|
423
|
+
Please refer to the README.md file in the root of this repository.
|
|
424
|
+
|
|
425
|
+
## Contributing
|
|
426
|
+
|
|
427
|
+
Please create an issue first to discuss proposed changes. Any contributions are welcome!
|
|
428
|
+
|
|
429
|
+
Remember, if merged, your code will be used as part of a MIT licensed project. By submitting a Pull Request, you are giving your consent for your code to be integrated into this library.
|