@spatialwalk/avatarkit 1.0.0-beta.21 → 1.0.0-beta.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -2
- package/README.md +53 -190
- package/dist/{StreamingAudioPlayer-DEXcuhRW.js → StreamingAudioPlayer-PkzxBP93.js} +61 -42
- package/dist/StreamingAudioPlayer-PkzxBP93.js.map +1 -0
- package/dist/audio/AnimationPlayer.d.ts +11 -0
- package/dist/audio/AnimationPlayer.d.ts.map +1 -1
- package/dist/audio/StreamingAudioPlayer.d.ts +13 -0
- package/dist/audio/StreamingAudioPlayer.d.ts.map +1 -1
- package/dist/core/AvatarController.d.ts +21 -22
- package/dist/core/AvatarController.d.ts.map +1 -1
- package/dist/core/AvatarKit.d.ts.map +1 -1
- package/dist/core/AvatarView.d.ts +0 -6
- package/dist/core/AvatarView.d.ts.map +1 -1
- package/dist/{index-ChKhyUK4.js → index-DYf1u8L7.js} +475 -429
- package/dist/index-DYf1u8L7.js.map +1 -0
- package/dist/index.js +11 -11
- package/dist/types/index.d.ts +10 -19
- package/dist/types/index.d.ts.map +1 -1
- package/dist/vanilla/vite.config.d.ts.map +1 -1
- package/package.json +1 -1
- package/dist/StreamingAudioPlayer-DEXcuhRW.js.map +0 -1
- package/dist/index-ChKhyUK4.js.map +0 -1
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.0.0-beta.23] - 2025-01-26
|
|
9
|
+
|
|
10
|
+
### 🔧 API Changes
|
|
11
|
+
- **Breaking Change** - `playback()` method is no longer supported and has been removed from public API
|
|
12
|
+
|
|
13
|
+
## [1.0.0-beta.22] - 2025-01-26
|
|
14
|
+
|
|
15
|
+
### 🔧 API Changes
|
|
16
|
+
- **State Callback Renamed** - `onAvatarState` has been renamed to `onConversationState` for better clarity
|
|
17
|
+
- The callback now uses `ConversationState` enum with states: `idle` and `playing`
|
|
18
|
+
- **Environment Enum Updated** - `Environment.us` has been renamed to `Environment.intl` for better internationalization support
|
|
19
|
+
- All references to `Environment.us` should be updated to `Environment.intl`
|
|
20
|
+
- Remote config endpoints now use `intl` instead of `us`
|
|
21
|
+
|
|
22
|
+
### ✨ New Features
|
|
23
|
+
- **Volume Control** - Added volume control API for audio playback
|
|
24
|
+
- `setVolume(volume: number)` - Set audio volume (0.0 to 1.0)
|
|
25
|
+
- `getVolume(): number` - Get current audio volume
|
|
26
|
+
- Volume control only affects the avatar's audio player, not system volume
|
|
27
|
+
- Volume changes take effect immediately, including for currently playing audio
|
|
28
|
+
|
|
8
29
|
## [1.0.0-beta.21] - 2025-01-25
|
|
9
30
|
|
|
10
31
|
### ✨ New Features
|
|
@@ -221,7 +242,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
221
242
|
## [1.0.0-beta.5] - 2025-11-14
|
|
222
243
|
|
|
223
244
|
### 🐛 Bug Fixes
|
|
224
|
-
- Fixed missing `
|
|
245
|
+
- Fixed missing `DrivingServiceMode` enum export in published package
|
|
225
246
|
|
|
226
247
|
---
|
|
227
248
|
|
|
@@ -286,7 +307,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
286
307
|
// New API
|
|
287
308
|
new AvatarView(avatar, {
|
|
288
309
|
container: container,
|
|
289
|
-
playbackMode:
|
|
310
|
+
playbackMode: DrivingServiceMode.sdk // or DrivingServiceMode.host
|
|
290
311
|
})
|
|
291
312
|
```
|
|
292
313
|
|
package/README.md
CHANGED
|
@@ -28,11 +28,11 @@ import {
|
|
|
28
28
|
AvatarManager,
|
|
29
29
|
AvatarView,
|
|
30
30
|
Configuration,
|
|
31
|
-
Environment
|
|
31
|
+
Environment,
|
|
32
|
+
DrivingServiceMode
|
|
32
33
|
} from '@spatialwalk/avatarkit'
|
|
33
34
|
|
|
34
35
|
// 1. Initialize SDK
|
|
35
|
-
import { DrivingServiceMode } from '@spatialwalk/avatarkit'
|
|
36
36
|
|
|
37
37
|
const configuration: Configuration = {
|
|
38
38
|
environment: Environment.test,
|
|
@@ -62,19 +62,15 @@ const avatarView = new AvatarView(avatar, container)
|
|
|
62
62
|
// 4. Start real-time communication (SDK mode only)
|
|
63
63
|
await avatarView.avatarController.start()
|
|
64
64
|
|
|
65
|
-
// 5. Send audio data (SDK mode)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
const audioUint8 = new Uint8Array(1024) // Example: 16kHz PCM16 audio data (512 samples = 1024 bytes)
|
|
69
|
-
const audioData = audioUint8.slice().buffer // Simplified conversion, works for ArrayBuffer and SharedArrayBuffer
|
|
70
|
-
avatarView.avatarController.send(audioData, false) // Send audio data, will automatically start playing after accumulating enough data
|
|
65
|
+
// 5. Send audio data (SDK mode, must be 16kHz mono PCM16 format)
|
|
66
|
+
const audioData = new ArrayBuffer(1024) // Example: 16kHz PCM16 audio data
|
|
67
|
+
avatarView.avatarController.send(audioData, false) // Send audio data
|
|
71
68
|
avatarView.avatarController.send(audioData, true) // end=true marks the end of current conversation round
|
|
72
69
|
```
|
|
73
70
|
|
|
74
71
|
### Host Mode Example
|
|
75
72
|
|
|
76
73
|
```typescript
|
|
77
|
-
import { AvatarPlaybackMode } from '@spatialwalk/avatarkit'
|
|
78
74
|
|
|
79
75
|
// 1-3. Same as SDK mode (initialize SDK, load character)
|
|
80
76
|
|
|
@@ -83,22 +79,9 @@ const container = document.getElementById('avatar-container')
|
|
|
83
79
|
const avatarView = new AvatarView(avatar, container)
|
|
84
80
|
|
|
85
81
|
// 4. Host Mode Workflow:
|
|
86
|
-
//
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
// Option A: Playback existing audio and animation data (replay mode)
|
|
91
|
-
const initialAudioChunks = [{ data: audioData1, isLast: false }, { data: audioData2, isLast: false }]
|
|
92
|
-
const initialKeyframes = animationData1 // Animation keyframes from your service
|
|
93
|
-
// Step 1: Send audio first to get conversationId
|
|
94
|
-
const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
|
|
95
|
-
|
|
96
|
-
// Option B: Stream new audio and animation data (start a new session directly)
|
|
97
|
-
// Step 1: Send audio data first to get conversationId (automatically generates conversationId if starting new session)
|
|
98
|
-
const currentConversationId = avatarView.avatarController.yieldAudioData(audioData3, false)
|
|
99
|
-
// Step 2: Use the conversationId to send animation data (mismatched conversationId will be discarded)
|
|
100
|
-
avatarView.avatarController.yieldFramesData(animationData2, currentConversationId || conversationId)
|
|
101
|
-
// Note: To start playback, you need to call playback() with the accumulated data, or ensure enough audio data is sent
|
|
82
|
+
// Send audio data first to get conversationId, then use it to send animation data
|
|
83
|
+
const conversationId = avatarView.avatarController.yieldAudioData(audioData, false)
|
|
84
|
+
avatarView.avatarController.yieldFramesData(animationData, conversationId)
|
|
102
85
|
```
|
|
103
86
|
|
|
104
87
|
### Complete Examples
|
|
@@ -186,15 +169,9 @@ RenderSystem → WebGPU/WebGL → Canvas rendering
|
|
|
186
169
|
```
|
|
187
170
|
External data source (audio + animation)
|
|
188
171
|
↓
|
|
189
|
-
Step 1: Send audio data FIRST to get conversationId
|
|
190
|
-
↓
|
|
191
|
-
AvatarController.playback(initialAudio, initialKeyframes) // Returns conversationId
|
|
192
|
-
OR
|
|
193
172
|
AvatarController.yieldAudioData(audioChunk) // Returns conversationId
|
|
194
173
|
↓
|
|
195
|
-
|
|
196
|
-
↓
|
|
197
|
-
AvatarController.yieldFramesData(keyframes, conversationId) // Requires conversationId
|
|
174
|
+
AvatarController.yieldFramesData(keyframes, conversationId)
|
|
198
175
|
↓
|
|
199
176
|
AvatarController → AnimationPlayer (synchronized playback)
|
|
200
177
|
↓
|
|
@@ -205,10 +182,6 @@ AvatarController (playback loop) → AvatarView.renderRealtimeFrame()
|
|
|
205
182
|
RenderSystem → WebGPU/WebGL → Canvas rendering
|
|
206
183
|
```
|
|
207
184
|
|
|
208
|
-
**Note:**
|
|
209
|
-
- In SDK mode, users provide audio data, SDK handles network communication and animation data reception
|
|
210
|
-
- In Host mode, users provide both audio and animation data, SDK handles synchronized playback only
|
|
211
|
-
|
|
212
185
|
### Audio Format Requirements
|
|
213
186
|
|
|
214
187
|
**⚠️ Important:** The SDK requires audio data to be in **16kHz mono PCM16** format:
|
|
@@ -288,21 +261,28 @@ manager.clearCache()
|
|
|
288
261
|
|
|
289
262
|
3D rendering view (rendering layer), responsible for 3D rendering only. Internally automatically creates and manages `AvatarController`.
|
|
290
263
|
|
|
291
|
-
|
|
264
|
+
```typescript
|
|
265
|
+
constructor(avatar: Avatar, container: HTMLElement)
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
**Parameters:**
|
|
269
|
+
- `avatar`: Avatar 实例
|
|
270
|
+
- `container`: Canvas 容器元素(必选)
|
|
271
|
+
- Canvas 自动使用容器的完整尺寸(宽度和高度)
|
|
272
|
+
- Canvas 宽高比适应容器尺寸 - 设置容器尺寸以控制宽高比
|
|
273
|
+
- Canvas 会自动添加到容器中
|
|
274
|
+
- SDK automatically handles resize events via ResizeObserver
|
|
275
|
+
|
|
276
|
+
**Playback Mode:**
|
|
277
|
+
- The playback mode is determined by `drivingServiceMode` in `AvatarKit.initialize()` configuration
|
|
292
278
|
- The playback mode is fixed when creating `AvatarView` and persists throughout its lifecycle
|
|
293
279
|
- Cannot be changed after creation
|
|
294
280
|
|
|
295
281
|
```typescript
|
|
296
|
-
import { AvatarPlaybackMode } from '@spatialwalk/avatarkit'
|
|
297
|
-
|
|
298
282
|
// Create view (Canvas is automatically added to container)
|
|
299
|
-
// Create view (playback mode is determined by drivingServiceMode in AvatarKit configuration)
|
|
300
283
|
const container = document.getElementById('avatar-container')
|
|
301
284
|
const avatarView = new AvatarView(avatar, container)
|
|
302
285
|
|
|
303
|
-
// Get playback mode
|
|
304
|
-
const mode = avatarView.playbackMode // 'network' | 'external'
|
|
305
|
-
|
|
306
286
|
// Wait for first frame to render
|
|
307
287
|
await avatarView.ready // Promise that resolves when the first frame is rendered
|
|
308
288
|
|
|
@@ -324,10 +304,8 @@ const newAvatar = await avatarManager.load('new-character-id')
|
|
|
324
304
|
// Create new AvatarView
|
|
325
305
|
currentAvatarView = new AvatarView(newAvatar, container)
|
|
326
306
|
|
|
327
|
-
// SDK mode: start connection
|
|
328
|
-
if (currentAvatarView.playbackMode === AvatarPlaybackMode.network) {
|
|
307
|
+
// SDK mode: start connection (will throw error if not in SDK mode)
|
|
329
308
|
await currentAvatarView.controller.start()
|
|
330
|
-
}
|
|
331
309
|
```
|
|
332
310
|
|
|
333
311
|
### AvatarController
|
|
@@ -342,14 +320,9 @@ Audio/animation playback controller (playback layer), manages synchronized playb
|
|
|
342
320
|
// Start WebSocket service
|
|
343
321
|
await avatarView.avatarController.start()
|
|
344
322
|
|
|
345
|
-
// Send audio data
|
|
323
|
+
// Send audio data (must be 16kHz mono PCM16 format)
|
|
346
324
|
const conversationId = avatarView.avatarController.send(audioData: ArrayBuffer, end: boolean)
|
|
347
|
-
// Returns: conversationId - Conversation ID for this conversation session
|
|
348
|
-
// audioData: Audio data (ArrayBuffer format, must be 16kHz mono PCM16)
|
|
349
|
-
// - Sample rate: 16kHz (16000 Hz) - backend requirement
|
|
350
|
-
// - Format: PCM16 (16-bit signed integer, little-endian)
|
|
351
|
-
// - Channels: Mono (single channel)
|
|
352
|
-
// - Example: 1 second = 16000 samples × 2 bytes = 32000 bytes
|
|
325
|
+
// Returns: conversationId - Conversation ID for this conversation session
|
|
353
326
|
// end: false (default) - Continue sending audio data for current conversation
|
|
354
327
|
// end: true - Mark the end of current conversation round. After end=true, sending new audio data will interrupt any ongoing playback from the previous conversation round
|
|
355
328
|
|
|
@@ -360,25 +333,17 @@ avatarView.avatarController.close()
|
|
|
360
333
|
#### Host Mode Methods
|
|
361
334
|
|
|
362
335
|
```typescript
|
|
363
|
-
//
|
|
364
|
-
const conversationId = await avatarView.avatarController.playback(
|
|
365
|
-
initialAudioChunks?: Array<{ data: Uint8Array, isLast: boolean }>, // Existing audio chunks (16kHz mono PCM16)
|
|
366
|
-
initialKeyframes?: any[] // Existing animation keyframes (obtained from your service)
|
|
367
|
-
)
|
|
368
|
-
// Returns: conversationId - New conversation ID for this conversation session
|
|
369
|
-
|
|
370
|
-
// Stream audio chunks (can be called directly to start a new session, or after playback() to add more data)
|
|
336
|
+
// Stream audio chunks (must be 16kHz mono PCM16 format)
|
|
371
337
|
const conversationId = avatarView.avatarController.yieldAudioData(
|
|
372
338
|
data: Uint8Array, // Audio chunk data
|
|
373
339
|
isLast: boolean = false // Whether this is the last chunk
|
|
374
340
|
)
|
|
375
341
|
// Returns: conversationId - Conversation ID for this audio session
|
|
376
|
-
// Note: If no conversationId exists, a new one will be automatically generated
|
|
377
342
|
|
|
378
343
|
// Stream animation keyframes (requires conversationId from audio data)
|
|
379
344
|
avatarView.avatarController.yieldFramesData(
|
|
380
345
|
keyframes: any[], // Animation keyframes (obtained from your service)
|
|
381
|
-
conversationId: string
|
|
346
|
+
conversationId: string // Conversation ID (required)
|
|
382
347
|
)
|
|
383
348
|
```
|
|
384
349
|
|
|
@@ -386,36 +351,14 @@ avatarView.avatarController.yieldFramesData(
|
|
|
386
351
|
|
|
387
352
|
**SDK Mode:**
|
|
388
353
|
- `send()` returns a conversationId to distinguish each conversation round
|
|
389
|
-
- `end=true` marks the end of a conversation round
|
|
354
|
+
- `end=true` marks the end of a conversation round
|
|
390
355
|
|
|
391
356
|
**Host Mode:**
|
|
392
|
-
|
|
393
|
-
1. **First send audio data** to get a conversationId (used to distinguish each conversation round):
|
|
394
|
-
- `playback()` returns a conversationId when playback existing audio and animation data (replay mode)
|
|
395
|
-
- `yieldAudioData()` returns a conversationId for streaming new audio data
|
|
396
|
-
2. **Then use that conversationId** to send animation data:
|
|
357
|
+
- `yieldAudioData()` returns a conversationId (automatically generates if starting new session)
|
|
397
358
|
- `yieldFramesData()` requires a valid conversationId parameter
|
|
398
359
|
- Animation data with mismatched conversationId will be **discarded**
|
|
399
360
|
- Use `getCurrentConversationId()` to retrieve the current active conversationId
|
|
400
361
|
|
|
401
|
-
**Example Flow (Host Mode):**
|
|
402
|
-
```typescript
|
|
403
|
-
// Option A: Playback existing complete data (replay mode)
|
|
404
|
-
const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
|
|
405
|
-
|
|
406
|
-
// Option B: Start streaming new data directly
|
|
407
|
-
// Step 1: Send audio data first to get conversationId (automatically generates if starting new session)
|
|
408
|
-
const conversationId = avatarView.avatarController.yieldAudioData(audioChunk, false)
|
|
409
|
-
// Step 2: Use the conversationId to send animation data
|
|
410
|
-
avatarView.avatarController.yieldFramesData(keyframes, conversationId)
|
|
411
|
-
// Note: To start playback with Option B, call playback() with accumulated data or ensure enough audio is sent
|
|
412
|
-
```
|
|
413
|
-
|
|
414
|
-
**Why conversationId is required:**
|
|
415
|
-
- Ensures audio and animation data belong to the same conversation session
|
|
416
|
-
- Prevents data from different sessions from being mixed
|
|
417
|
-
- Automatically discards mismatched animation data for data integrity
|
|
418
|
-
|
|
419
362
|
#### Common Methods (Both Modes)
|
|
420
363
|
|
|
421
364
|
```typescript
|
|
@@ -435,18 +378,21 @@ avatarView.avatarController.clear()
|
|
|
435
378
|
const conversationId = avatarView.avatarController.getCurrentConversationId()
|
|
436
379
|
// Returns: Current conversationId for the active audio session, or null if no active session
|
|
437
380
|
|
|
381
|
+
// Volume control (affects only avatar audio player, not system volume)
|
|
382
|
+
avatarView.avatarController.setVolume(0.5) // Set volume to 50% (0.0 to 1.0)
|
|
383
|
+
const currentVolume = avatarView.avatarController.getVolume() // Get current volume (0.0 to 1.0)
|
|
384
|
+
|
|
438
385
|
// Set event callbacks
|
|
439
386
|
avatarView.avatarController.onConnectionState = (state: ConnectionState) => {} // SDK mode only
|
|
440
|
-
avatarView.avatarController.
|
|
387
|
+
avatarView.avatarController.onConversationState = (state: ConversationState) => {}
|
|
441
388
|
avatarView.avatarController.onError = (error: Error) => {}
|
|
442
389
|
```
|
|
443
390
|
|
|
444
391
|
**Important Notes:**
|
|
445
392
|
- `start()` and `close()` are only available in SDK mode
|
|
446
|
-
- `
|
|
447
|
-
- `pause()`, `resume()`, `interrupt()`, `clear()`, and `
|
|
393
|
+
- `yieldAudioData()` and `yieldFramesData()` are only available in Host mode
|
|
394
|
+
- `pause()`, `resume()`, `interrupt()`, `clear()`, `getCurrentConversationId()`, `setVolume()`, and `getVolume()` are available in both modes
|
|
448
395
|
- The playback mode is determined when creating `AvatarView` and cannot be changed
|
|
449
|
-
- **Conversation ID**: In Host mode, always send audio data first to obtain a conversationId, then use that conversationId when sending animation data. Animation data with mismatched conversationId will be discarded. Use `getCurrentConversationId()` to retrieve the current active conversationId.
|
|
450
396
|
|
|
451
397
|
## 🔧 Configuration
|
|
452
398
|
|
|
@@ -460,7 +406,7 @@ interface Configuration {
|
|
|
460
406
|
```
|
|
461
407
|
|
|
462
408
|
**Description:**
|
|
463
|
-
- `environment`: Specifies the environment (cn/
|
|
409
|
+
- `environment`: Specifies the environment (cn/intl/test), SDK will automatically use the corresponding API address and WebSocket address based on the environment
|
|
464
410
|
- `drivingServiceMode`: Specifies the driving service mode
|
|
465
411
|
- `DrivingServiceMode.sdk` (default): SDK mode - SDK handles WebSocket communication automatically
|
|
466
412
|
- `DrivingServiceMode.host`: Host mode - Host application provides audio and animation data
|
|
@@ -469,34 +415,11 @@ interface Configuration {
|
|
|
469
415
|
```typescript
|
|
470
416
|
enum Environment {
|
|
471
417
|
cn = 'cn', // China region
|
|
472
|
-
|
|
418
|
+
intl = 'intl', // International region
|
|
473
419
|
test = 'test' // Test environment
|
|
474
420
|
}
|
|
475
421
|
```
|
|
476
422
|
|
|
477
|
-
### AvatarView Constructor
|
|
478
|
-
|
|
479
|
-
```typescript
|
|
480
|
-
constructor(avatar: Avatar, container: HTMLElement)
|
|
481
|
-
```
|
|
482
|
-
|
|
483
|
-
**Parameters:**
|
|
484
|
-
- `avatar`: Avatar 实例
|
|
485
|
-
- `container`: Canvas 容器元素(必选)
|
|
486
|
-
- Canvas 自动使用容器的完整尺寸(宽度和高度)
|
|
487
|
-
- Canvas 宽高比适应容器尺寸 - 设置容器尺寸以控制宽高比
|
|
488
|
-
- Canvas 会自动添加到容器中
|
|
489
|
-
|
|
490
|
-
**Note:** 播放模式由 `AvatarKit.initialize()` 配置中的 `drivingServiceMode` 决定,而不是在构造函数参数中
|
|
491
|
-
- SDK automatically handles resize events via ResizeObserver
|
|
492
|
-
|
|
493
|
-
```typescript
|
|
494
|
-
enum AvatarPlaybackMode {
|
|
495
|
-
network = 'network', // SDK mode: SDK handles WebSocket communication
|
|
496
|
-
external = 'external' // Host mode: Host provides data, SDK handles playback
|
|
497
|
-
}
|
|
498
|
-
```
|
|
499
|
-
|
|
500
423
|
### CameraConfig
|
|
501
424
|
|
|
502
425
|
```typescript
|
|
@@ -524,17 +447,23 @@ enum ConnectionState {
|
|
|
524
447
|
}
|
|
525
448
|
```
|
|
526
449
|
|
|
527
|
-
###
|
|
450
|
+
### ConversationState
|
|
528
451
|
|
|
529
452
|
```typescript
|
|
530
|
-
enum
|
|
531
|
-
idle = 'idle', //
|
|
532
|
-
|
|
533
|
-
playing = 'playing', // Playing
|
|
534
|
-
paused = 'paused' // Paused (can be resumed)
|
|
453
|
+
enum ConversationState {
|
|
454
|
+
idle = 'idle', // 呼吸态
|
|
455
|
+
playing = 'playing' // 播放态
|
|
535
456
|
}
|
|
536
457
|
```
|
|
537
458
|
|
|
459
|
+
**状态说明:**
|
|
460
|
+
- `idle`: 数字人处于呼吸态,等待对话开始
|
|
461
|
+
- `playing`: 数字人正在播放对话内容(包括过渡动画期间)
|
|
462
|
+
|
|
463
|
+
**注意:** 过渡动画期间会提前通知目标状态:
|
|
464
|
+
- 从 `idle` 过渡到 `playing` 时,立即通知 `playing` 状态
|
|
465
|
+
- 从 `playing` 过渡到 `idle` 时,立即通知 `idle` 状态
|
|
466
|
+
|
|
538
467
|
## 🎨 Rendering System
|
|
539
468
|
|
|
540
469
|
The SDK supports two rendering backends:
|
|
@@ -601,13 +530,8 @@ const container = document.getElementById('avatar-container')
|
|
|
601
530
|
const avatarView = new AvatarView(avatar, container)
|
|
602
531
|
|
|
603
532
|
// Use
|
|
604
|
-
const
|
|
605
|
-
|
|
606
|
-
const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
|
|
607
|
-
// Step 2: Stream additional audio (returns conversationId)
|
|
608
|
-
const currentConversationId = avatarView.avatarController.yieldAudioData(audioChunk, false)
|
|
609
|
-
// Step 3: Use conversationId to send animation data (mismatched conversationId will be discarded)
|
|
610
|
-
avatarView.avatarController.yieldFramesData(keyframes, currentConversationId || conversationId)
|
|
533
|
+
const conversationId = avatarView.avatarController.yieldAudioData(audioChunk, false)
|
|
534
|
+
avatarView.avatarController.yieldFramesData(keyframes, conversationId)
|
|
611
535
|
|
|
612
536
|
// Cleanup
|
|
613
537
|
avatarView.avatarController.clear() // Clear all data and resources
|
|
@@ -626,67 +550,6 @@ avatarView.dispose() // Automatically cleans up all resources
|
|
|
626
550
|
- Supports dynamic loading/unloading of character and animation resources
|
|
627
551
|
- Provides memory usage monitoring interface
|
|
628
552
|
|
|
629
|
-
### Audio Data Sending
|
|
630
|
-
|
|
631
|
-
#### SDK Mode
|
|
632
|
-
|
|
633
|
-
The `send()` method receives audio data in `ArrayBuffer` format:
|
|
634
|
-
|
|
635
|
-
**Audio Format Requirements:**
|
|
636
|
-
- **Sample Rate**: 16kHz (16000 Hz) - **Backend requirement, must be exactly 16kHz**
|
|
637
|
-
- **Format**: PCM16 (16-bit signed integer, little-endian)
|
|
638
|
-
- **Channels**: Mono (single channel)
|
|
639
|
-
- **Data Size**: Each sample is 2 bytes, so 1 second of audio = 16000 samples × 2 bytes = 32000 bytes
|
|
640
|
-
|
|
641
|
-
**Usage:**
|
|
642
|
-
- `audioData`: Audio data (ArrayBuffer format, must be 16kHz mono PCM16)
|
|
643
|
-
- `end=false` (default) - Continue sending audio data for current conversation
|
|
644
|
-
- `end=true` - Mark the end of current conversation round. After `end=true`, sending new audio data will interrupt any ongoing playback from the previous conversation round
|
|
645
|
-
- **Important**: No need to wait for `end=true` to start playing, it will automatically start playing after accumulating enough audio data
|
|
646
|
-
|
|
647
|
-
#### Host Mode
|
|
648
|
-
|
|
649
|
-
The `playback()` method is used to playback existing audio and animation data (replay mode), generating a new conversationId and interrupting any existing conversation.
|
|
650
|
-
|
|
651
|
-
**Two ways to start a session in Host mode:**
|
|
652
|
-
1. **Use `playback()`** - For replaying existing complete audio and animation data
|
|
653
|
-
2. **Use `yieldAudioData()` directly** - For streaming new audio data (automatically generates conversationId if needed)
|
|
654
|
-
|
|
655
|
-
Then use `yieldAudioData()` to stream additional audio:
|
|
656
|
-
|
|
657
|
-
**Audio Format Requirements:**
|
|
658
|
-
- Same as SDK mode: 16kHz mono PCM16 format
|
|
659
|
-
- Audio data should be provided as `Uint8Array` in chunks with `isLast` flag
|
|
660
|
-
|
|
661
|
-
**Usage:**
|
|
662
|
-
```typescript
|
|
663
|
-
// Playback existing audio and animation data (starts a new conversation)
|
|
664
|
-
// Note: Audio and animation data should be obtained from your backend service
|
|
665
|
-
const initialAudioChunks = [
|
|
666
|
-
{ data: audioData1, isLast: false },
|
|
667
|
-
{ data: audioData2, isLast: false }
|
|
668
|
-
]
|
|
669
|
-
const conversationId = await avatarController.playback(initialAudioChunks, initialKeyframes)
|
|
670
|
-
// Returns: conversationId - New conversation ID for this conversation session
|
|
671
|
-
|
|
672
|
-
// Stream additional audio chunks
|
|
673
|
-
const conversationId = avatarController.yieldAudioData(audioChunk, isLast)
|
|
674
|
-
// Returns: conversationId - Conversation ID for this audio session
|
|
675
|
-
```
|
|
676
|
-
|
|
677
|
-
**⚠️ Conversation ID Workflow:**
|
|
678
|
-
1. **Start a session** → Choose one of two ways:
|
|
679
|
-
- **Option A**: Use `playback(initialAudioChunks, initialKeyframes)` to replay existing complete data
|
|
680
|
-
- **Option B**: Use `yieldAudioData(audioChunk)` directly to start streaming (automatically generates conversationId)
|
|
681
|
-
2. **Get conversationId** → Both methods return a conversationId
|
|
682
|
-
3. **Send animation with conversationId** → Use the conversationId from step 1 in `yieldFramesData()`
|
|
683
|
-
4. **Data matching** → Only animation data with matching conversationId will be accepted
|
|
684
|
-
|
|
685
|
-
**Resampling (Both Modes):**
|
|
686
|
-
- If your audio source is at a different sample rate (e.g., 24kHz, 48kHz), you **must** resample it to 16kHz before sending
|
|
687
|
-
- For high-quality resampling, use Web Audio API's `OfflineAudioContext` with anti-aliasing filtering
|
|
688
|
-
- See example projects (`vanilla`, `react`, `vue`) for complete resampling implementation
|
|
689
|
-
|
|
690
553
|
## 🌐 Browser Compatibility
|
|
691
554
|
|
|
692
555
|
- **Chrome/Edge** 90+ (WebGPU recommended)
|
|
@@ -1,38 +1,42 @@
|
|
|
1
1
|
var C = Object.defineProperty;
|
|
2
2
|
var g = (h, t, e) => t in h ? C(h, t, { enumerable: !0, configurable: !0, writable: !0, value: e }) : h[t] = e;
|
|
3
|
-
var
|
|
4
|
-
import { A as m, e as f, a as c, l as
|
|
3
|
+
var s = (h, t, e) => g(h, typeof t != "symbol" ? t + "" : t, e);
|
|
4
|
+
import { A as m, e as f, a as c, l as n } from "./index-DYf1u8L7.js";
|
|
5
5
|
class y {
|
|
6
6
|
constructor(t) {
|
|
7
7
|
// AudioContext is managed internally
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
8
|
+
s(this, "audioContext", null);
|
|
9
|
+
s(this, "sampleRate");
|
|
10
|
+
s(this, "channelCount");
|
|
11
|
+
s(this, "debug");
|
|
12
12
|
// Session-level state
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
s(this, "sessionId");
|
|
14
|
+
s(this, "sessionStartTime", 0);
|
|
15
15
|
// AudioContext time when session started
|
|
16
|
-
|
|
16
|
+
s(this, "pausedTimeOffset", 0);
|
|
17
17
|
// Accumulated paused time
|
|
18
|
-
|
|
18
|
+
s(this, "pausedAt", 0);
|
|
19
19
|
// Time when paused
|
|
20
|
-
|
|
20
|
+
s(this, "pausedAudioContextTime", 0);
|
|
21
21
|
// audioContext.currentTime when paused (for resume calculation)
|
|
22
|
-
|
|
22
|
+
s(this, "scheduledTime", 0);
|
|
23
23
|
// Next chunk schedule time in AudioContext time
|
|
24
24
|
// Playback state
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
s(this, "isPlaying", !1);
|
|
26
|
+
s(this, "isPaused", !1);
|
|
27
|
+
s(this, "autoStartEnabled", !0);
|
|
28
28
|
// Control whether to auto-start when buffer is ready
|
|
29
29
|
// Audio buffer queue
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
s(this, "audioChunks", []);
|
|
31
|
+
s(this, "scheduledChunks", 0);
|
|
32
32
|
// Number of chunks already scheduled
|
|
33
|
-
|
|
33
|
+
s(this, "activeSources", /* @__PURE__ */ new Set());
|
|
34
|
+
// Volume control
|
|
35
|
+
s(this, "gainNode", null);
|
|
36
|
+
s(this, "volume", 1);
|
|
37
|
+
// Default volume 1.0 (0.0 - 1.0)
|
|
34
38
|
// Event callbacks
|
|
35
|
-
|
|
39
|
+
s(this, "onEndedCallback");
|
|
36
40
|
this.sessionId = `session_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`, this.sampleRate = (t == null ? void 0 : t.sampleRate) ?? m.audio.sampleRate, this.channelCount = (t == null ? void 0 : t.channelCount) ?? 1, this.debug = (t == null ? void 0 : t.debug) ?? !1;
|
|
37
41
|
}
|
|
38
42
|
/**
|
|
@@ -43,7 +47,7 @@ class y {
|
|
|
43
47
|
try {
|
|
44
48
|
this.audioContext = new AudioContext({
|
|
45
49
|
sampleRate: this.sampleRate
|
|
46
|
-
}), this.audioContext.state === "suspended" && await this.audioContext.resume(), this.log("AudioContext initialized", {
|
|
50
|
+
}), this.gainNode = this.audioContext.createGain(), this.gainNode.gain.value = this.volume, this.gainNode.connect(this.audioContext.destination), this.audioContext.state === "suspended" && await this.audioContext.resume(), this.log("AudioContext initialized", {
|
|
47
51
|
sessionId: this.sessionId,
|
|
48
52
|
sampleRate: this.audioContext.sampleRate,
|
|
49
53
|
state: this.audioContext.state
|
|
@@ -53,7 +57,7 @@ class y {
|
|
|
53
57
|
throw c.logEvent("activeAudioSessionFailed", "warning", {
|
|
54
58
|
sessionId: this.sessionId,
|
|
55
59
|
reason: e
|
|
56
|
-
}),
|
|
60
|
+
}), n.error("Failed to initialize AudioContext:", e), t instanceof Error ? t : new Error(e);
|
|
57
61
|
}
|
|
58
62
|
}
|
|
59
63
|
/**
|
|
@@ -61,7 +65,7 @@ class y {
|
|
|
61
65
|
*/
|
|
62
66
|
addChunk(t, e = !1) {
|
|
63
67
|
if (!this.audioContext) {
|
|
64
|
-
|
|
68
|
+
n.error("AudioContext not initialized");
|
|
65
69
|
return;
|
|
66
70
|
}
|
|
67
71
|
this.audioChunks.push({ data: t, isLast: e }), this.log(`Added chunk ${this.audioChunks.length}`, {
|
|
@@ -132,16 +136,16 @@ class y {
|
|
|
132
136
|
}
|
|
133
137
|
const r = e.data, o = e.isLast, a = this.pcmToAudioBuffer(r);
|
|
134
138
|
if (!a) {
|
|
135
|
-
|
|
139
|
+
n.error("Failed to create AudioBuffer from PCM data"), c.logEvent("character_player", "error", {
|
|
136
140
|
sessionId: this.sessionId,
|
|
137
141
|
event: "audio_buffer_creation_failed"
|
|
138
142
|
});
|
|
139
143
|
return;
|
|
140
144
|
}
|
|
141
145
|
try {
|
|
142
|
-
const
|
|
143
|
-
|
|
144
|
-
this.activeSources.delete(
|
|
146
|
+
const i = this.audioContext.createBufferSource();
|
|
147
|
+
i.buffer = a, i.connect(this.gainNode), i.start(this.scheduledTime), this.activeSources.add(i), i.onended = () => {
|
|
148
|
+
this.activeSources.delete(i), o && this.activeSources.size === 0 && (this.log("Last audio chunk ended, marking playback as ended"), this.markEnded());
|
|
145
149
|
}, this.scheduledTime += a.duration, this.scheduledChunks++, this.log(`[StreamingAudioPlayer] Scheduled chunk ${t + 1}/${this.audioChunks.length}`, {
|
|
146
150
|
startTime: this.scheduledTime - a.duration,
|
|
147
151
|
duration: a.duration,
|
|
@@ -149,11 +153,11 @@ class y {
|
|
|
149
153
|
isLast: o,
|
|
150
154
|
activeSources: this.activeSources.size
|
|
151
155
|
});
|
|
152
|
-
} catch (
|
|
153
|
-
|
|
156
|
+
} catch (i) {
|
|
157
|
+
n.errorWithError("Failed to schedule audio chunk:", i), c.logEvent("character_player", "error", {
|
|
154
158
|
sessionId: this.sessionId,
|
|
155
159
|
event: "schedule_chunk_failed",
|
|
156
|
-
reason:
|
|
160
|
+
reason: i instanceof Error ? i.message : String(i)
|
|
157
161
|
});
|
|
158
162
|
}
|
|
159
163
|
}
|
|
@@ -165,25 +169,25 @@ class y {
|
|
|
165
169
|
if (!this.audioContext)
|
|
166
170
|
return null;
|
|
167
171
|
if (t.length === 0) {
|
|
168
|
-
const l = Math.floor(this.sampleRate * 0.01),
|
|
172
|
+
const l = Math.floor(this.sampleRate * 0.01), u = this.audioContext.createBuffer(
|
|
169
173
|
this.channelCount,
|
|
170
174
|
l,
|
|
171
175
|
this.sampleRate
|
|
172
176
|
);
|
|
173
177
|
for (let d = 0; d < this.channelCount; d++)
|
|
174
|
-
|
|
175
|
-
return
|
|
178
|
+
u.getChannelData(d).fill(0);
|
|
179
|
+
return u;
|
|
176
180
|
}
|
|
177
181
|
const e = new Uint8Array(t), r = new Int16Array(e.buffer, 0, e.length / 2), o = r.length / this.channelCount, a = this.audioContext.createBuffer(
|
|
178
182
|
this.channelCount,
|
|
179
183
|
o,
|
|
180
184
|
this.sampleRate
|
|
181
185
|
);
|
|
182
|
-
for (let
|
|
183
|
-
const l = a.getChannelData(
|
|
184
|
-
for (let
|
|
185
|
-
const d =
|
|
186
|
-
l[
|
|
186
|
+
for (let i = 0; i < this.channelCount; i++) {
|
|
187
|
+
const l = a.getChannelData(i);
|
|
188
|
+
for (let u = 0; u < o; u++) {
|
|
189
|
+
const d = u * this.channelCount + i;
|
|
190
|
+
l[u] = r[d] / 32768;
|
|
187
191
|
}
|
|
188
192
|
}
|
|
189
193
|
return a;
|
|
@@ -204,7 +208,7 @@ class y {
|
|
|
204
208
|
*/
|
|
205
209
|
pause() {
|
|
206
210
|
!this.isPlaying || this.isPaused || !this.audioContext || (this.pausedAt = this.getCurrentTime(), this.pausedAudioContextTime = this.audioContext.currentTime, this.isPaused = !0, this.audioContext.state === "running" && this.audioContext.suspend().catch((t) => {
|
|
207
|
-
|
|
211
|
+
n.errorWithError("Failed to suspend AudioContext:", t), this.isPaused = !1;
|
|
208
212
|
}), this.log("Playback paused", {
|
|
209
213
|
pausedAt: this.pausedAt,
|
|
210
214
|
pausedAudioContextTime: this.pausedAudioContextTime,
|
|
@@ -221,7 +225,7 @@ class y {
|
|
|
221
225
|
try {
|
|
222
226
|
await this.audioContext.resume();
|
|
223
227
|
} catch (e) {
|
|
224
|
-
throw
|
|
228
|
+
throw n.errorWithError("Failed to resume AudioContext:", e), e;
|
|
225
229
|
}
|
|
226
230
|
const t = this.audioContext.currentTime;
|
|
227
231
|
this.sessionStartTime = this.pausedAudioContextTime - this.pausedAt - this.pausedTimeOffset, this.isPaused = !1, this.scheduledChunks < this.audioChunks.length && this.scheduleAllChunks(), this.log("Playback resumed", {
|
|
@@ -307,7 +311,7 @@ class y {
|
|
|
307
311
|
* Dispose and cleanup
|
|
308
312
|
*/
|
|
309
313
|
dispose() {
|
|
310
|
-
this.stop(), this.audioContext && (this.audioContext.close(), this.audioContext = null), this.audioChunks = [], this.scheduledChunks = 0, this.sessionStartTime = 0, this.pausedTimeOffset = 0, this.pausedAt = 0, this.pausedAudioContextTime = 0, this.scheduledTime = 0, this.onEndedCallback = void 0, this.log("StreamingAudioPlayer disposed");
|
|
314
|
+
this.stop(), this.audioContext && (this.audioContext.close(), this.audioContext = null, this.gainNode = null), this.audioChunks = [], this.scheduledChunks = 0, this.sessionStartTime = 0, this.pausedTimeOffset = 0, this.pausedAt = 0, this.pausedAudioContextTime = 0, this.scheduledTime = 0, this.onEndedCallback = void 0, this.log("StreamingAudioPlayer disposed");
|
|
311
315
|
}
|
|
312
316
|
/**
|
|
313
317
|
* Flush buffered audio
|
|
@@ -321,14 +325,29 @@ class y {
|
|
|
321
325
|
}
|
|
322
326
|
this.scheduledChunks < this.audioChunks.length && this.audioChunks.splice(this.scheduledChunks), this.log("Flushed (soft)", { remainingScheduled: this.scheduledChunks });
|
|
323
327
|
}
|
|
328
|
+
/**
|
|
329
|
+
* 设置音量 (0.0 - 1.0)
|
|
330
|
+
* 注意:这仅控制数字人音频播放器的音量,不影响系统音量
|
|
331
|
+
* @param volume 音量值,范围 0.0 到 1.0(0.0 为静音,1.0 为最大音量)
|
|
332
|
+
*/
|
|
333
|
+
setVolume(t) {
|
|
334
|
+
(t < 0 || t > 1) && (n.warn(`[StreamingAudioPlayer] Volume out of range: ${t}, clamping to [0, 1]`), t = Math.max(0, Math.min(1, t))), this.volume = t, this.gainNode && (this.gainNode.gain.value = t);
|
|
335
|
+
}
|
|
336
|
+
/**
|
|
337
|
+
* 获取当前音量
|
|
338
|
+
* @returns 当前音量值 (0.0 - 1.0)
|
|
339
|
+
*/
|
|
340
|
+
getVolume() {
|
|
341
|
+
return this.volume;
|
|
342
|
+
}
|
|
324
343
|
/**
|
|
325
344
|
* Debug logging
|
|
326
345
|
*/
|
|
327
346
|
log(t, e) {
|
|
328
|
-
this.debug &&
|
|
347
|
+
this.debug && n.log(`[StreamingAudioPlayer] ${t}`, e || "");
|
|
329
348
|
}
|
|
330
349
|
}
|
|
331
350
|
export {
|
|
332
351
|
y as StreamingAudioPlayer
|
|
333
352
|
};
|
|
334
|
-
//# sourceMappingURL=StreamingAudioPlayer-
|
|
353
|
+
//# sourceMappingURL=StreamingAudioPlayer-PkzxBP93.js.map
|