@spatialwalk/avatarkit 1.0.0-beta.16 → 1.0.0-beta.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,54 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.0.0-beta.18] - 2025-01-25
9
+
10
+ ### 🔧 API Changes
11
+ - **Renamed `reqId` to `conversationId`** - Updated terminology for better clarity
12
+ - All methods and parameters that used `reqId` now use `conversationId`
13
+ - `getCurrentReqId()` → `getCurrentConversationId()`
14
+ - `generateReqId()` → `generateConversationId()`
15
+ - Updated all event logs and documentation to use `conversationId`
16
+ - Note: Protobuf protocol still uses `reqId` field name internally, but SDK API uses `conversationId`
17
+
18
+ ### 📚 Documentation
19
+ - Enhanced Host mode documentation to clearly emphasize the workflow: send audio data first to get conversationId, then use that conversationId to send animation data
20
+ - Updated Host Mode Example and Host Mode Flow sections with clearer step-by-step instructions
21
+
22
+ ## [1.0.0-beta.17] - 2025-01-24
23
+
24
+ ### ✨ New Features
25
+ - **Audio-Only Fallback Mechanism** - SDK now includes automatic fallback to audio-only playback when animation data is unavailable
26
+ - SDK mode: Automatically enters audio-only mode when server returns an error
27
+ - Host mode: Automatically enters audio-only mode when empty animation data is provided
28
+ - Once in audio-only mode, subsequent animation data for that session is ignored
29
+ - Fallback mode is interruptible, just like normal playback mode
30
+
31
+ ### 🔧 API Changes
32
+ - **Playback Mode Configuration** - Moved playback mode configuration from `AvatarView` constructor to `AvatarKit.initialize()`
33
+ - Playback mode is now determined by `drivingServiceMode` in `AvatarKit.initialize()` configuration
34
+ - `AvatarView` constructor now only requires `avatar` and `container` parameters
35
+ - Removed `AvatarViewOptions` interface
36
+ - `container` parameter is now required (no longer optional)
37
+ - **Method Renames** - Renamed methods in `AvatarController` for Host mode to better reflect their purpose
38
+ - `play()` → `playback()`: Renamed to better reflect that the method is used for playback of existing data (replay mode)
39
+ - Old API: `avatarController.play(initialAudioChunks, initialKeyframes)`
40
+ - New API: `avatarController.playback(initialAudioChunks, initialKeyframes)`
41
+ - `sendAudioChunk()` → `yieldAudioData()`: Renamed to better reflect that the method yields/streams audio data
42
+ - Old API: `avatarController.sendAudioChunk(data, isLast)`
43
+ - New API: `avatarController.yieldAudioData(data, isLast)`
44
+ - `sendKeyframes()` → `yieldFramesData()`: Renamed to better reflect that the method yields/streams animation keyframes
45
+ - Old API: `avatarController.sendKeyframes(keyframes, reqId)`
46
+ - New API: `avatarController.yieldFramesData(keyframes, conversationId)`
47
+
48
+ ### 🔧 Improvements
49
+ - Extended transition animation duration from 200ms to 400ms for smoother end-of-playback transitions
50
+
51
+ ### 📚 Documentation
52
+ - Updated README.md to use "SDK mode" and "Host mode" terminology instead of "Network mode" and "External data mode"
53
+ - Added fallback mechanism documentation
54
+ - Updated API reference to reflect new constructor signature
55
+
8
56
  ## [1.0.0-beta.16] - 2025-11-21
9
57
 
10
58
  ### ✨ New Features
@@ -174,7 +222,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
174
222
  - **External Data Mode**:
175
223
  - External components fully control audio and animation data acquisition
176
224
  - SDK only responsible for synchronized playback of externally provided data
177
- - Use `controller.play()`, `controller.sendAudioChunk()` and `controller.sendKeyframes()` methods
225
+ - Use `controller.playback()`, `controller.yieldAudioData()` and `controller.yieldFramesData()` methods
178
226
 
179
227
  ### ✨ New Features
180
228
 
package/README.md CHANGED
@@ -32,8 +32,13 @@ import {
32
32
  } from '@spatialwalk/avatarkit'
33
33
 
34
34
  // 1. Initialize SDK
35
+ import { DrivingServiceMode } from '@spatialwalk/avatarkit'
36
+
35
37
  const configuration: Configuration = {
36
38
  environment: Environment.test,
39
+ drivingServiceMode: DrivingServiceMode.sdk, // Optional, 'sdk' is default
40
+ // - DrivingServiceMode.sdk: SDK mode - SDK handles WebSocket communication
41
+ // - DrivingServiceMode.host: Host mode - Host app provides audio and animation data
37
42
  }
38
43
 
39
44
  await AvatarKit.initialize('your-app-id', configuration)
@@ -42,55 +47,57 @@ await AvatarKit.initialize('your-app-id', configuration)
42
47
  // AvatarKit.setSessionToken('your-session-token')
43
48
 
44
49
  // 2. Load character
45
- const avatarManager = new AvatarManager()
50
+ const avatarManager = AvatarManager.shared
46
51
  const avatar = await avatarManager.load('character-id', (progress) => {
47
52
  console.log(`Loading progress: ${progress.progress}%`)
48
53
  })
49
54
 
50
55
  // 3. Create view (automatically creates Canvas and AvatarController)
51
- // Network mode (default)
56
+ // The playback mode is determined by drivingServiceMode in AvatarKit configuration
57
+ // - DrivingServiceMode.sdk: SDK mode - SDK handles WebSocket communication
58
+ // - DrivingServiceMode.host: Host mode - Host app provides audio and animation data
52
59
  const container = document.getElementById('avatar-container')
53
- const avatarView = new AvatarView(avatar, {
54
- container: container,
55
- playbackMode: 'network' // Optional, 'network' is default
56
- })
60
+ const avatarView = new AvatarView(avatar, container)
57
61
 
58
- // 4. Start real-time communication (network mode only)
62
+ // 4. Start real-time communication (SDK mode only)
59
63
  await avatarView.avatarController.start()
60
64
 
61
- // 5. Send audio data (network mode)
65
+ // 5. Send audio data (SDK mode)
62
66
  // ⚠️ Important: Audio must be 16kHz mono PCM16 format
63
67
  // If audio is Uint8Array, you can use slice().buffer to convert to ArrayBuffer
64
68
  const audioUint8 = new Uint8Array(1024) // Example: 16kHz PCM16 audio data (512 samples = 1024 bytes)
65
69
  const audioData = audioUint8.slice().buffer // Simplified conversion, works for ArrayBuffer and SharedArrayBuffer
66
70
  avatarView.avatarController.send(audioData, false) // Send audio data, will automatically start playing after accumulating enough data
67
- avatarView.avatarController.send(audioData, true) // end=true means immediately return animation data, no longer accumulating
71
+ avatarView.avatarController.send(audioData, true) // end=true marks the end of current conversation round
68
72
  ```
69
73
 
70
- ### External Data Mode Example
74
+ ### Host Mode Example
71
75
 
72
76
  ```typescript
73
77
  import { AvatarPlaybackMode } from '@spatialwalk/avatarkit'
74
78
 
75
- // 1-3. Same as network mode (initialize SDK, load character)
79
+ // 1-3. Same as SDK mode (initialize SDK, load character)
76
80
 
77
- // 3. Create view with external data mode
81
+ // 3. Create view with Host mode
78
82
  const container = document.getElementById('avatar-container')
79
- const avatarView = new AvatarView(avatar, {
80
- container: container,
81
- playbackMode: AvatarPlaybackMode.external
82
- })
83
+ const avatarView = new AvatarView(avatar, container)
83
84
 
84
- // 4. Start playback with initial data (obtained from your service)
85
- // Note: Audio and animation data should be obtained from your backend service
85
+ // 4. Host Mode Workflow:
86
+ // ⚠️ IMPORTANT: In Host mode, you MUST send audio data FIRST to get a conversationId,
87
+ // then use that conversationId to send animation data.
88
+ // Animation data with mismatched conversationId will be discarded.
89
+
90
+ // Option A: Playback existing audio and animation data (replay mode)
86
91
  const initialAudioChunks = [{ data: audioData1, isLast: false }, { data: audioData2, isLast: false }]
87
92
  const initialKeyframes = animationData1 // Animation keyframes from your service
88
-
89
- await avatarView.avatarController.play(initialAudioChunks, initialKeyframes)
90
-
91
- // 5. Stream additional data as needed
92
- avatarView.avatarController.sendAudioChunk(audioData3, false)
93
- avatarView.avatarController.sendKeyframes(animationData2)
93
+ // Step 1: Send audio first to get conversationId
94
+ const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
95
+
96
+ // Option B: Stream new audio and animation data
97
+ // Step 1: Send audio data first to get conversationId
98
+ const currentConversationId = avatarView.avatarController.yieldAudioData(audioData3, false)
99
+ // Step 2: Use the conversationId to send animation data (mismatched conversationId will be discarded)
100
+ avatarView.avatarController.yieldFramesData(animationData2, currentConversationId || conversationId)
94
101
  ```
95
102
 
96
103
  ### Complete Examples
@@ -100,8 +107,8 @@ Check the example code in the GitHub repository for complete usage flows for bot
100
107
  **Example Project:** [AvatarKit-Web-Demo](https://github.com/spatialwalk/AvatarKit-Web-Demo)
101
108
 
102
109
  This repository contains complete examples for Vanilla JS, Vue 3, and React, demonstrating:
103
- - Network mode: Real-time audio input with automatic animation data reception
104
- - External data mode: Custom data sources with manual audio/animation data management
110
+ - SDK mode: Real-time audio input with automatic animation data reception
111
+ - Host mode: Custom data sources with manual audio/animation data management
105
112
 
106
113
  ## 🏗️ Architecture Overview
107
114
 
@@ -111,7 +118,7 @@ The SDK uses a three-layer architecture for clear separation of concerns:
111
118
 
112
119
  1. **Rendering Layer (AvatarView)** - Responsible for 3D rendering only
113
120
  2. **Playback Layer (AvatarController)** - Manages audio/animation synchronization and playback
114
- 3. **Network Layer** - Handles WebSocket communication (only in network mode, internal implementation)
121
+ 3. **Network Layer** - Handles WebSocket communication (only in SDK mode, internal implementation)
115
122
 
116
123
  ### Core Components
117
124
 
@@ -122,23 +129,36 @@ The SDK uses a three-layer architecture for clear separation of concerns:
122
129
 
123
130
  ### Playback Modes
124
131
 
125
- The SDK supports two playback modes, configured when creating `AvatarView`:
132
+ The SDK supports two playback modes, configured in `AvatarKit.initialize()`:
126
133
 
127
- #### 1. Network Mode (Default)
134
+ #### 1. SDK Mode (Default)
135
+ - Configured via `drivingServiceMode: DrivingServiceMode.sdk` in `AvatarKit.initialize()`
128
136
  - SDK handles WebSocket communication automatically
129
137
  - Send audio data via `AvatarController.send()`
130
138
  - SDK receives animation data from backend and synchronizes playback
131
139
  - Best for: Real-time audio input scenarios
132
140
 
133
- #### 2. External Data Mode
134
- - External components manage their own network/data fetching
135
- - External components provide both audio and animation data
141
+ #### 2. Host Mode
142
+ - Configured via `drivingServiceMode: DrivingServiceMode.host` in `AvatarKit.initialize()`
143
+ - Host application manages its own network/data fetching
144
+ - Host application provides both audio and animation data
136
145
  - SDK only handles synchronized playback
137
146
  - Best for: Custom data sources, pre-recorded content, or custom network implementations
138
147
 
148
+ **Note:** The playback mode is determined by `drivingServiceMode` in `AvatarKit.initialize()` configuration.
149
+
150
+ ### Fallback Mechanism
151
+
152
+ The SDK includes a fallback mechanism to ensure audio playback continues even when animation data is unavailable:
153
+
154
+ - **SDK Mode**: If the server returns an error or fails to provide animation data, the SDK automatically enters audio-only mode and continues playing audio independently
155
+ - **Host Mode**: If empty animation data is provided (empty array or undefined), the SDK automatically enters audio-only mode
156
+ - Once in audio-only mode, any subsequent animation data for that session will be ignored, and only audio will continue playing
157
+ - The fallback mode is interruptible, just like normal playback mode
158
+
139
159
  ### Data Flow
140
160
 
141
- #### Network Mode Flow
161
+ #### SDK Mode Flow
142
162
 
143
163
  ```
144
164
  User audio input (16kHz mono PCM16)
@@ -158,15 +178,20 @@ AvatarController (playback loop) → AvatarView.renderRealtimeFrame()
158
178
  RenderSystem → WebGPU/WebGL → Canvas rendering
159
179
  ```
160
180
 
161
- #### External Data Mode Flow
181
+ #### Host Mode Flow
162
182
 
163
183
  ```
164
184
  External data source (audio + animation)
165
185
 
166
- AvatarController.play(initialAudio, initialKeyframes) // Start playback
186
+ Step 1: Send audio data FIRST to get conversationId
187
+
188
+ AvatarController.playback(initialAudio, initialKeyframes) // Returns conversationId
189
+ OR
190
+ AvatarController.yieldAudioData(audioChunk) // Returns conversationId
191
+
192
+ Step 2: Use conversationId to send animation data
167
193
 
168
- AvatarController.sendAudioChunk() // Stream additional audio
169
- AvatarController.sendKeyframes() // Stream additional animation
194
+ AvatarController.yieldFramesData(keyframes, conversationId) // Requires conversationId
170
195
 
171
196
  AvatarController → AnimationPlayer (synchronized playback)
172
197
 
@@ -178,8 +203,8 @@ RenderSystem → WebGPU/WebGL → Canvas rendering
178
203
  ```
179
204
 
180
205
  **Note:**
181
- - In network mode, users provide audio data, SDK handles network communication and animation data reception
182
- - In external data mode, users provide both audio and animation data, SDK handles synchronized playback only
206
+ - In SDK mode, users provide audio data, SDK handles network communication and animation data reception
207
+ - In Host mode, users provide both audio and animation data, SDK handles synchronized playback only
183
208
 
184
209
  ### Audio Format Requirements
185
210
 
@@ -240,10 +265,11 @@ AvatarKit.cleanup()
240
265
 
241
266
  ### AvatarManager
242
267
 
243
- Character resource manager, responsible for downloading, caching, and loading character data.
268
+ Character resource manager, responsible for downloading, caching, and loading character data. Use the singleton instance via `AvatarManager.shared`.
244
269
 
245
270
  ```typescript
246
- const manager = new AvatarManager()
271
+ // Get singleton instance
272
+ const manager = AvatarManager.shared
247
273
 
248
274
  // Load character
249
275
  const avatar = await manager.load(
@@ -267,22 +293,16 @@ manager.clearCache()
267
293
  import { AvatarPlaybackMode } from '@spatialwalk/avatarkit'
268
294
 
269
295
  // Create view (Canvas is automatically added to container)
270
- // Network mode (default)
296
+ // Create view (playback mode is determined by drivingServiceMode in AvatarKit configuration)
271
297
  const container = document.getElementById('avatar-container')
272
- const avatarView = new AvatarView(avatar: Avatar, {
273
- container: container,
274
- playbackMode: AvatarPlaybackMode.network // Optional, default is 'network'
275
- })
276
-
277
- // External data mode
278
- const avatarView = new AvatarView(avatar: Avatar, {
279
- container: container,
280
- playbackMode: AvatarPlaybackMode.external
281
- })
298
+ const avatarView = new AvatarView(avatar, container)
282
299
 
283
300
  // Get playback mode
284
301
  const mode = avatarView.playbackMode // 'network' | 'external'
285
302
 
303
+ // Wait for first frame to render
304
+ await avatarView.ready // Promise that resolves when the first frame is rendered
305
+
286
306
  // Cleanup resources (must be called before switching characters)
287
307
  avatarView.dispose()
288
308
  ```
@@ -299,12 +319,9 @@ if (currentAvatarView) {
299
319
  const newAvatar = await avatarManager.load('new-character-id')
300
320
 
301
321
  // Create new AvatarView
302
- currentAvatarView = new AvatarView(newAvatar, {
303
- container: container,
304
- playbackMode: AvatarPlaybackMode.network
305
- })
322
+ currentAvatarView = new AvatarView(newAvatar, container)
306
323
 
307
- // Network mode: start connection
324
+ // SDK mode: start connection
308
325
  if (currentAvatarView.playbackMode === AvatarPlaybackMode.network) {
309
326
  await currentAvatarView.controller.start()
310
327
  }
@@ -312,51 +329,87 @@ if (currentAvatarView.playbackMode === AvatarPlaybackMode.network) {
312
329
 
313
330
  ### AvatarController
314
331
 
315
- Audio/animation playback controller (playback layer), manages synchronized playback of audio and animation. Automatically handles WebSocket communication in network mode.
332
+ Audio/animation playback controller (playback layer), manages synchronized playback of audio and animation. Automatically handles WebSocket communication in SDK mode.
316
333
 
317
334
  **Two Usage Patterns:**
318
335
 
319
- #### Network Mode Methods
336
+ #### SDK Mode Methods
320
337
 
321
338
  ```typescript
322
339
  // Start WebSocket service
323
340
  await avatarView.avatarController.start()
324
341
 
325
- // Send audio data (SDK handles receiving animation data automatically)
326
- avatarView.avatarController.send(audioData: ArrayBuffer, end: boolean)
342
+ // Send audio data
343
+ const conversationId = avatarView.avatarController.send(audioData: ArrayBuffer, end: boolean)
344
+ // Returns: conversationId - Conversation ID for this conversation session (used to distinguish each conversation round)
327
345
  // audioData: Audio data (ArrayBuffer format, must be 16kHz mono PCM16)
328
346
  // - Sample rate: 16kHz (16000 Hz) - backend requirement
329
347
  // - Format: PCM16 (16-bit signed integer, little-endian)
330
348
  // - Channels: Mono (single channel)
331
349
  // - Example: 1 second = 16000 samples × 2 bytes = 32000 bytes
332
- // end: false (default) - Normal audio data sending, server will accumulate audio data, automatically returns animation data and starts synchronized playback of animation and audio after accumulating enough data
333
- // end: true - Immediately return animation data, no longer accumulating, used for ending current conversation or scenarios requiring immediate response
350
+ // end: false (default) - Continue sending audio data for current conversation
351
+ // end: true - Mark the end of current conversation round. After end=true, sending new audio data will interrupt any ongoing playback from the previous conversation round
334
352
 
335
353
  // Close WebSocket service
336
354
  avatarView.avatarController.close()
337
355
  ```
338
356
 
339
- #### External Data Mode Methods
357
+ #### Host Mode Methods
340
358
 
341
359
  ```typescript
342
- // Start playback with initial audio and animation data
343
- await avatarView.avatarController.play(
344
- initialAudioChunks?: Array<{ data: Uint8Array, isLast: boolean }>, // Initial audio chunks (16kHz mono PCM16)
345
- initialKeyframes?: any[] // Initial animation keyframes (obtained from your service)
360
+ // Playback existing audio and animation data (starts a new conversation)
361
+ const conversationId = await avatarView.avatarController.playback(
362
+ initialAudioChunks?: Array<{ data: Uint8Array, isLast: boolean }>, // Existing audio chunks (16kHz mono PCM16)
363
+ initialKeyframes?: any[] // Existing animation keyframes (obtained from your service)
346
364
  )
365
+ // Returns: conversationId - New conversation ID for this conversation session
347
366
 
348
- // Stream additional audio chunks (after play() is called)
349
- avatarView.avatarController.sendAudioChunk(
367
+ // Stream additional audio chunks (after playback() is called)
368
+ const conversationId = avatarView.avatarController.yieldAudioData(
350
369
  data: Uint8Array, // Audio chunk data
351
370
  isLast: boolean = false // Whether this is the last chunk
352
371
  )
372
+ // Returns: conversationId - Conversation ID for this audio session
353
373
 
354
- // Stream additional animation keyframes (after play() is called)
355
- avatarView.avatarController.sendKeyframes(
356
- keyframes: any[] // Additional animation keyframes (obtained from your service)
374
+ // Stream additional animation keyframes (after playback() is called)
375
+ avatarView.avatarController.yieldFramesData(
376
+ keyframes: any[], // Additional animation keyframes (obtained from your service)
377
+ conversationId: string // Conversation ID (required). Use getCurrentConversationId() or yieldAudioData() to get conversationId.
357
378
  )
358
379
  ```
359
380
 
381
+ **⚠️ Important: Conversation ID (conversationId) Management**
382
+
383
+ **SDK Mode:**
384
+ - `send()` returns a conversationId to distinguish each conversation round
385
+ - `end=true` marks the end of a conversation round. After `end=true`, sending new audio data will interrupt any ongoing playback from the previous conversation round
386
+
387
+ **Host Mode:**
388
+ For each conversation session, you **must**:
389
+ 1. **First send audio data** to get a conversationId (used to distinguish each conversation round):
390
+ - `playback()` returns a conversationId when playback existing audio and animation data (replay mode)
391
+ - `yieldAudioData()` returns a conversationId for streaming new audio data
392
+ 2. **Then use that conversationId** to send animation data:
393
+ - `yieldFramesData()` requires a valid conversationId parameter
394
+ - Animation data with mismatched conversationId will be **discarded**
395
+ - Use `getCurrentConversationId()` to retrieve the current active conversationId
396
+
397
+ **Example Flow (Host Mode):**
398
+ ```typescript
399
+ // Step 1: Playback existing data first to get conversationId (or stream new audio)
400
+ const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
401
+ // or stream new audio data
402
+ const conversationId = avatarView.avatarController.yieldAudioData(audioChunk, false)
403
+
404
+ // Step 2: Use the conversationId to send animation data
405
+ avatarView.avatarController.yieldFramesData(keyframes, conversationId)
406
+ ```
407
+
408
+ **Why conversationId is required:**
409
+ - Ensures audio and animation data belong to the same conversation session
410
+ - Prevents data from different sessions from being mixed
411
+ - Automatically discards mismatched animation data for data integrity
412
+
360
413
  #### Common Methods (Both Modes)
361
414
 
362
415
  ```typescript
@@ -372,17 +425,22 @@ avatarView.avatarController.interrupt()
372
425
  // Clear all data and resources
373
426
  avatarView.avatarController.clear()
374
427
 
428
+ // Get current conversation ID (for Host mode)
429
+ const conversationId = avatarView.avatarController.getCurrentConversationId()
430
+ // Returns: Current conversationId for the active audio session, or null if no active session
431
+
375
432
  // Set event callbacks
376
- avatarView.avatarController.onConnectionState = (state: ConnectionState) => {} // Network mode only
433
+ avatarView.avatarController.onConnectionState = (state: ConnectionState) => {} // SDK mode only
377
434
  avatarView.avatarController.onAvatarState = (state: AvatarState) => {}
378
435
  avatarView.avatarController.onError = (error: Error) => {}
379
436
  ```
380
437
 
381
438
  **Important Notes:**
382
- - `start()` and `close()` are only available in network mode
383
- - `play()`, `sendAudioChunk()`, and `sendKeyframes()` are only available in external data mode
384
- - `pause()`, `resume()`, `interrupt()`, and `clear()` are available in both modes
439
+ - `start()` and `close()` are only available in SDK mode
440
+ - `playback()`, `yieldAudioData()`, and `yieldFramesData()` are only available in Host mode
441
+ - `pause()`, `resume()`, `interrupt()`, `clear()`, and `getCurrentConversationId()` are available in both modes
385
442
  - The playback mode is determined when creating `AvatarView` and cannot be changed
443
+ - **Conversation ID**: In Host mode, always send audio data first to obtain a conversationId, then use that conversationId when sending animation data. Animation data with mismatched conversationId will be discarded. Use `getCurrentConversationId()` to retrieve the current active conversationId.
386
444
 
387
445
  ## 🔧 Configuration
388
446
 
@@ -391,11 +449,15 @@ avatarView.avatarController.onError = (error: Error) => {}
391
449
  ```typescript
392
450
  interface Configuration {
393
451
  environment: Environment
452
+ drivingServiceMode?: DrivingServiceMode // Optional, default is 'sdk' (SDK mode)
394
453
  }
395
454
  ```
396
455
 
397
456
  **Description:**
398
457
  - `environment`: Specifies the environment (cn/us/test), SDK will automatically use the corresponding API address and WebSocket address based on the environment
458
+ - `drivingServiceMode`: Specifies the driving service mode
459
+ - `DrivingServiceMode.sdk` (default): SDK mode - SDK handles WebSocket communication automatically
460
+ - `DrivingServiceMode.host`: Host mode - Host application provides audio and animation data
399
461
  - `sessionToken`: Set separately via `AvatarKit.setSessionToken()`, not in Configuration
400
462
 
401
463
  ```typescript
@@ -406,28 +468,26 @@ enum Environment {
406
468
  }
407
469
  ```
408
470
 
409
- ### AvatarViewOptions
471
+ ### AvatarView Constructor
410
472
 
411
473
  ```typescript
412
- interface AvatarViewOptions {
413
- playbackMode?: AvatarPlaybackMode // Playback mode, default is 'network'
414
- container?: HTMLElement // Canvas container element
415
- }
474
+ constructor(avatar: Avatar, container: HTMLElement)
416
475
  ```
417
476
 
418
- **Description:**
419
- - `playbackMode`: Specifies the playback mode (`'network'` or `'external'`), default is `'network'`
420
- - `'network'`: SDK handles WebSocket communication, send audio via `send()`
421
- - `'external'`: External components provide audio and animation data, SDK handles synchronized playback
422
- - `container`: Optional container element for Canvas, if not provided, Canvas will be created but not added to DOM
423
- - Canvas automatically uses the container's full dimensions (width and height)
424
- - Canvas aspect ratio adapts to container size - set container dimensions to control the aspect ratio
477
+ **Parameters:**
478
+ - `avatar`: Avatar 实例
479
+ - `container`: Canvas 容器元素(必选)
480
+ - Canvas 自动使用容器的完整尺寸(宽度和高度)
481
+ - Canvas 宽高比适应容器尺寸 - 设置容器尺寸以控制宽高比
482
+ - Canvas 会自动添加到容器中
483
+
484
+ **Note:** 播放模式由 `AvatarKit.initialize()` 配置中的 `drivingServiceMode` 决定,而不是在构造函数参数中
425
485
  - SDK automatically handles resize events via ResizeObserver
426
486
 
427
487
  ```typescript
428
488
  enum AvatarPlaybackMode {
429
- network = 'network', // Network mode: SDK handles WebSocket communication
430
- external = 'external' // External data mode: External provides data, SDK handles playback
489
+ network = 'network', // SDK mode: SDK handles WebSocket communication
490
+ external = 'external' // Host mode: Host provides data, SDK handles playback
431
491
  }
432
492
  ```
433
493
 
@@ -511,15 +571,12 @@ avatarView.avatarController.onError = (error: Error) => {
511
571
 
512
572
  ### Lifecycle Management
513
573
 
514
- #### Network Mode Lifecycle
574
+ #### SDK Mode Lifecycle
515
575
 
516
576
  ```typescript
517
577
  // Initialize
518
578
  const container = document.getElementById('avatar-container')
519
- const avatarView = new AvatarView(avatar, {
520
- container: container,
521
- playbackMode: AvatarPlaybackMode.network
522
- })
579
+ const avatarView = new AvatarView(avatar, container)
523
580
  await avatarView.avatarController.start()
524
581
 
525
582
  // Use
@@ -530,21 +587,21 @@ avatarView.avatarController.close()
530
587
  avatarView.dispose() // Automatically cleans up all resources
531
588
  ```
532
589
 
533
- #### External Data Mode Lifecycle
590
+ #### Host Mode Lifecycle
534
591
 
535
592
  ```typescript
536
593
  // Initialize
537
594
  const container = document.getElementById('avatar-container')
538
- const avatarView = new AvatarView(avatar, {
539
- container: container,
540
- playbackMode: AvatarPlaybackMode.external
541
- })
595
+ const avatarView = new AvatarView(avatar, container)
542
596
 
543
597
  // Use
544
598
  const initialAudioChunks = [{ data: audioData1, isLast: false }]
545
- await avatarView.avatarController.play(initialAudioChunks, initialKeyframes)
546
- avatarView.avatarController.sendAudioChunk(audioChunk, false)
547
- avatarView.avatarController.sendKeyframes(keyframes)
599
+ // Step 1: Send audio first to get conversationId
600
+ const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
601
+ // Step 2: Stream additional audio (returns conversationId)
602
+ const currentConversationId = avatarView.avatarController.yieldAudioData(audioChunk, false)
603
+ // Step 3: Use conversationId to send animation data (mismatched conversationId will be discarded)
604
+ avatarView.avatarController.yieldFramesData(keyframes, currentConversationId || conversationId)
548
605
 
549
606
  // Cleanup
550
607
  avatarView.avatarController.clear() // Clear all data and resources
@@ -554,8 +611,8 @@ avatarView.dispose() // Automatically cleans up all resources
554
611
  **⚠️ Important Notes:**
555
612
  - When disposing AvatarView instances, must call `dispose()` to properly clean up resources
556
613
  - Not properly cleaning up may cause resource leaks and rendering errors
557
- - In network mode, call `close()` before `dispose()` to properly close WebSocket connections
558
- - In external data mode, call `clear()` before `dispose()` to clear all playback data
614
+ - In SDK mode, call `close()` before `dispose()` to properly close WebSocket connections
615
+ - In Host mode, call `clear()` before `dispose()` to clear all playback data
559
616
 
560
617
  ### Memory Optimization
561
618
 
@@ -565,7 +622,7 @@ avatarView.dispose() // Automatically cleans up all resources
565
622
 
566
623
  ### Audio Data Sending
567
624
 
568
- #### Network Mode
625
+ #### SDK Mode
569
626
 
570
627
  The `send()` method receives audio data in `ArrayBuffer` format:
571
628
 
@@ -577,32 +634,39 @@ The `send()` method receives audio data in `ArrayBuffer` format:
577
634
 
578
635
  **Usage:**
579
636
  - `audioData`: Audio data (ArrayBuffer format, must be 16kHz mono PCM16)
580
- - `end=false` (default) - Normal audio data sending, server will accumulate audio data, automatically returns animation data and starts synchronized playback of animation and audio after accumulating enough data
581
- - `end=true` - Immediately return animation data, no longer accumulating, used for ending current conversation or scenarios requiring immediate response
637
+ - `end=false` (default) - Continue sending audio data for current conversation
638
+ - `end=true` - Mark the end of current conversation round. After `end=true`, sending new audio data will interrupt any ongoing playback from the previous conversation round
582
639
  - **Important**: No need to wait for `end=true` to start playing, it will automatically start playing after accumulating enough audio data
583
640
 
584
- #### External Data Mode
641
+ #### Host Mode
585
642
 
586
- The `play()` method starts playback with initial data, then use `sendAudioChunk()` to stream additional audio:
643
+ The `playback()` method is used to playback existing audio and animation data (replay mode), generating a new conversationId and interrupting any existing conversation. Then use `yieldAudioData()` to stream additional audio:
587
644
 
588
645
  **Audio Format Requirements:**
589
- - Same as network mode: 16kHz mono PCM16 format
646
+ - Same as SDK mode: 16kHz mono PCM16 format
590
647
  - Audio data should be provided as `Uint8Array` in chunks with `isLast` flag
591
648
 
592
649
  **Usage:**
593
650
  ```typescript
594
- // Start playback with initial audio and animation data
651
+ // Playback existing audio and animation data (starts a new conversation)
595
652
  // Note: Audio and animation data should be obtained from your backend service
596
653
  const initialAudioChunks = [
597
654
  { data: audioData1, isLast: false },
598
655
  { data: audioData2, isLast: false }
599
656
  ]
600
- await avatarController.play(initialAudioChunks, initialKeyframes)
657
+ const conversationId = await avatarController.playback(initialAudioChunks, initialKeyframes)
658
+ // Returns: conversationId - New conversation ID for this conversation session
601
659
 
602
660
  // Stream additional audio chunks
603
- avatarController.sendAudioChunk(audioChunk, isLast)
661
+ const conversationId = avatarController.yieldAudioData(audioChunk, isLast)
662
+ // Returns: conversationId - Conversation ID for this audio session
604
663
  ```
605
664
 
665
+ **⚠️ Conversation ID Workflow:**
666
+ 1. **Playback existing data or send audio first** → Get conversationId from `playback()` (for existing data) or `yieldAudioData()` (for streaming)
667
+ 2. **Send animation with conversationId** → Use the conversationId from step 1 in `yieldFramesData()`
668
+ 3. **Data matching** → Only animation data with matching conversationId will be accepted
669
+
606
670
  **Resampling (Both Modes):**
607
671
  - If your audio source is at a different sample rate (e.g., 24kHz, 48kHz), you **must** resample it to 16kHz before sending
608
672
  - For high-quality resampling, use Web Audio API's `OfflineAudioContext` with anti-aliasing filtering
@@ -1,7 +1,7 @@
1
1
  var C = Object.defineProperty;
2
2
  var g = (h, t, e) => t in h ? C(h, t, { enumerable: !0, configurable: !0, writable: !0, value: e }) : h[t] = e;
3
3
  var i = (h, t, e) => g(h, typeof t != "symbol" ? t + "" : t, e);
4
- import { A as m, e as f, a as c, l as u } from "./index-Dsokgngg.js";
4
+ import { A as m, e as f, a as c, l as u } from "./index-CuR_S9Ng.js";
5
5
  class y {
6
6
  constructor(t) {
7
7
  // AudioContext is managed internally
@@ -331,4 +331,4 @@ class y {
331
331
  export {
332
332
  y as StreamingAudioPlayer
333
333
  };
334
- //# sourceMappingURL=StreamingAudioPlayer-COgQTrz3.js.map
334
+ //# sourceMappingURL=StreamingAudioPlayer-D5P7mU8B.js.map