@spatialwalk/avatarkit 1.0.0-beta.17 → 1.0.0-beta.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,11 +5,25 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.0.0-beta.18] - 2025-01-25
9
+
10
+ ### 🔧 API Changes
11
+ - **Renamed `reqId` to `conversationId`** - Updated terminology for better clarity
12
+ - All methods and parameters that used `reqId` now use `conversationId`
13
+ - `getCurrentReqId()` → `getCurrentConversationId()`
14
+ - `generateReqId()` → `generateConversationId()`
15
+ - Updated all event logs and documentation to use `conversationId`
16
+ - Note: Protobuf protocol still uses `reqId` field name internally, but SDK API uses `conversationId`
17
+
18
+ ### 📚 Documentation
19
+ - Enhanced Host mode documentation to clearly emphasize the workflow: send audio data first to get conversationId, then use that conversationId to send animation data
20
+ - Updated Host Mode Example and Host Mode Flow sections with clearer step-by-step instructions
21
+
8
22
  ## [1.0.0-beta.17] - 2025-01-24
9
23
 
10
24
  ### ✨ New Features
11
25
  - **Audio-Only Fallback Mechanism** - SDK now includes automatic fallback to audio-only playback when animation data is unavailable
12
- - Network mode: Automatically enters audio-only mode when server returns an error
26
+ - SDK mode: Automatically enters audio-only mode when server returns an error
13
27
  - Host mode: Automatically enters audio-only mode when empty animation data is provided
14
28
  - Once in audio-only mode, subsequent animation data for that session is ignored
15
29
  - Fallback mode is interruptible, just like normal playback mode
@@ -20,6 +34,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
20
34
  - `AvatarView` constructor now only requires `avatar` and `container` parameters
21
35
  - Removed `AvatarViewOptions` interface
22
36
  - `container` parameter is now required (no longer optional)
37
+ - **Method Renames** - Renamed methods in `AvatarController` for Host mode to better reflect their purpose
38
+ - `play()` → `playback()`: Renamed to better reflect that the method is used for playback of existing data (replay mode)
39
+ - Old API: `avatarController.play(initialAudioChunks, initialKeyframes)`
40
+ - New API: `avatarController.playback(initialAudioChunks, initialKeyframes)`
41
+ - `sendAudioChunk()` → `yieldAudioData()`: Renamed to better reflect that the method yields/streams audio data
42
+ - Old API: `avatarController.sendAudioChunk(data, isLast)`
43
+ - New API: `avatarController.yieldAudioData(data, isLast)`
44
+ - `sendKeyframes()` → `yieldFramesData()`: Renamed to better reflect that the method yields/streams animation keyframes
45
+ - Old API: `avatarController.sendKeyframes(keyframes, reqId)`
46
+ - New API: `avatarController.yieldFramesData(keyframes, conversationId)`
23
47
 
24
48
  ### 🔧 Improvements
25
49
  - Extended transition animation duration from 200ms to 400ms for smoother end-of-playback transitions
package/README.md CHANGED
@@ -37,8 +37,8 @@ import { DrivingServiceMode } from '@spatialwalk/avatarkit'
37
37
  const configuration: Configuration = {
38
38
  environment: Environment.test,
39
39
  drivingServiceMode: DrivingServiceMode.sdk, // Optional, 'sdk' is default
40
- // - DrivingServiceMode.sdk: SDK mode (network mode) - SDK handles WebSocket communication
41
- // - DrivingServiceMode.host: Host mode (external data mode) - Host app provides audio and animation data
40
+ // - DrivingServiceMode.sdk: SDK mode - SDK handles WebSocket communication
41
+ // - DrivingServiceMode.host: Host mode - Host app provides audio and animation data
42
42
  }
43
43
 
44
44
  await AvatarKit.initialize('your-app-id', configuration)
@@ -47,59 +47,57 @@ await AvatarKit.initialize('your-app-id', configuration)
47
47
  // AvatarKit.setSessionToken('your-session-token')
48
48
 
49
49
  // 2. Load character
50
- const avatarManager = new AvatarManager()
50
+ const avatarManager = AvatarManager.shared
51
51
  const avatar = await avatarManager.load('character-id', (progress) => {
52
52
  console.log(`Loading progress: ${progress.progress}%`)
53
53
  })
54
54
 
55
55
  // 3. Create view (automatically creates Canvas and AvatarController)
56
56
  // The playback mode is determined by drivingServiceMode in AvatarKit configuration
57
- // - DrivingServiceMode.sdk: SDK mode (network mode) - SDK handles WebSocket communication
58
- // - DrivingServiceMode.host: Host mode (external data mode) - Host app provides audio and animation data
57
+ // - DrivingServiceMode.sdk: SDK mode - SDK handles WebSocket communication
58
+ // - DrivingServiceMode.host: Host mode - Host app provides audio and animation data
59
59
  const container = document.getElementById('avatar-container')
60
60
  const avatarView = new AvatarView(avatar, container)
61
61
 
62
- // 4. Start real-time communication (network mode only)
62
+ // 4. Start real-time communication (SDK mode only)
63
63
  await avatarView.avatarController.start()
64
64
 
65
- // 5. Send audio data (network mode)
65
+ // 5. Send audio data (SDK mode)
66
66
  // ⚠️ Important: Audio must be 16kHz mono PCM16 format
67
67
  // If audio is Uint8Array, you can use slice().buffer to convert to ArrayBuffer
68
68
  const audioUint8 = new Uint8Array(1024) // Example: 16kHz PCM16 audio data (512 samples = 1024 bytes)
69
69
  const audioData = audioUint8.slice().buffer // Simplified conversion, works for ArrayBuffer and SharedArrayBuffer
70
70
  avatarView.avatarController.send(audioData, false) // Send audio data, will automatically start playing after accumulating enough data
71
- avatarView.avatarController.send(audioData, true) // end=true means immediately return animation data, no longer accumulating
71
+ avatarView.avatarController.send(audioData, true) // end=true marks the end of current conversation round
72
72
  ```
73
73
 
74
- ### External Data Mode Example
74
+ ### Host Mode Example
75
75
 
76
76
  ```typescript
77
77
  import { AvatarPlaybackMode } from '@spatialwalk/avatarkit'
78
78
 
79
- // 1-3. Same as network mode (initialize SDK, load character)
79
+ // 1-3. Same as SDK mode (initialize SDK, load character)
80
80
 
81
- // 3. Create view with external data mode
81
+ // 3. Create view with Host mode
82
82
  const container = document.getElementById('avatar-container')
83
83
  const avatarView = new AvatarView(avatar, container)
84
84
 
85
- // 4. Start playback with initial data (obtained from your service)
86
- // Note: Audio and animation data should be obtained from your backend service
87
- const initialAudioChunks = [{ data: audioData1, isLast: false }, { data: audioData2, isLast: false }]
88
- const initialKeyframes = animationData1 // Animation keyframes from your service
85
+ // 4. Host Mode Workflow:
86
+ // ⚠️ IMPORTANT: In Host mode, you MUST send audio data FIRST to get a conversationId,
87
+ // then use that conversationId to send animation data.
88
+ // Animation data with mismatched conversationId will be discarded.
89
89
 
90
- // 4. Start playback with initial data (obtained from your service)
91
- // Note: Audio and animation data should be obtained from your backend service
90
+ // Option A: Playback existing audio and animation data (replay mode)
92
91
  const initialAudioChunks = [{ data: audioData1, isLast: false }, { data: audioData2, isLast: false }]
93
92
  const initialKeyframes = animationData1 // Animation keyframes from your service
94
-
95
- // Step 1: Send audio first to get reqId (required for session management)
96
- const reqId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
97
-
98
- // 5. Stream additional data as needed
99
- // Important: Always send audio first to get reqId, then use that reqId for animation data
100
- const currentReqId = avatarView.avatarController.yieldAudioData(audioData3, false)
101
- // Step 2: Use the reqId to send animation data (mismatched reqId will be discarded)
102
- avatarView.avatarController.yieldFramesData(animationData2, currentReqId || reqId)
93
+ // Step 1: Send audio first to get conversationId
94
+ const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
95
+
96
+ // Option B: Stream new audio and animation data
97
+ // Step 1: Send audio data first to get conversationId
98
+ const currentConversationId = avatarView.avatarController.yieldAudioData(audioData3, false)
99
+ // Step 2: Use the conversationId to send animation data (mismatched conversationId will be discarded)
100
+ avatarView.avatarController.yieldFramesData(animationData2, currentConversationId || conversationId)
103
101
  ```
104
102
 
105
103
  ### Complete Examples
@@ -109,8 +107,8 @@ Check the example code in the GitHub repository for complete usage flows for bot
109
107
  **Example Project:** [AvatarKit-Web-Demo](https://github.com/spatialwalk/AvatarKit-Web-Demo)
110
108
 
111
109
  This repository contains complete examples for Vanilla JS, Vue 3, and React, demonstrating:
112
- - Network mode: Real-time audio input with automatic animation data reception
113
- - External data mode: Custom data sources with manual audio/animation data management
110
+ - SDK mode: Real-time audio input with automatic animation data reception
111
+ - Host mode: Custom data sources with manual audio/animation data management
114
112
 
115
113
  ## 🏗️ Architecture Overview
116
114
 
@@ -120,7 +118,7 @@ The SDK uses a three-layer architecture for clear separation of concerns:
120
118
 
121
119
  1. **Rendering Layer (AvatarView)** - Responsible for 3D rendering only
122
120
  2. **Playback Layer (AvatarController)** - Manages audio/animation synchronization and playback
123
- 3. **Network Layer** - Handles WebSocket communication (only in network mode, internal implementation)
121
+ 3. **Network Layer** - Handles WebSocket communication (only in SDK mode, internal implementation)
124
122
 
125
123
  ### Core Components
126
124
 
@@ -153,14 +151,14 @@ The SDK supports two playback modes, configured in `AvatarKit.initialize()`:
153
151
 
154
152
  The SDK includes a fallback mechanism to ensure audio playback continues even when animation data is unavailable:
155
153
 
156
- - **Network Mode**: If the server returns an error or fails to provide animation data, the SDK automatically enters audio-only mode and continues playing audio independently
154
+ - **SDK Mode**: If the server returns an error or fails to provide animation data, the SDK automatically enters audio-only mode and continues playing audio independently
157
155
  - **Host Mode**: If empty animation data is provided (empty array or undefined), the SDK automatically enters audio-only mode
158
156
  - Once in audio-only mode, any subsequent animation data for that session will be ignored, and only audio will continue playing
159
157
  - The fallback mode is interruptible, just like normal playback mode
160
158
 
161
159
  ### Data Flow
162
160
 
163
- #### Network Mode Flow
161
+ #### SDK Mode Flow
164
162
 
165
163
  ```
166
164
  User audio input (16kHz mono PCM16)
@@ -180,15 +178,20 @@ AvatarController (playback loop) → AvatarView.renderRealtimeFrame()
180
178
  RenderSystem → WebGPU/WebGL → Canvas rendering
181
179
  ```
182
180
 
183
- #### External Data Mode Flow
181
+ #### Host Mode Flow
184
182
 
185
183
  ```
186
184
  External data source (audio + animation)
187
185
 
188
- AvatarController.playback(initialAudio, initialKeyframes) // Start playback
186
+ Step 1: Send audio data FIRST to get conversationId
189
187
 
190
- AvatarController.yieldAudioData() // Stream additional audio
191
- AvatarController.yieldFramesData() // Stream additional animation
188
+ AvatarController.playback(initialAudio, initialKeyframes) // Returns conversationId
189
+ OR
190
+ AvatarController.yieldAudioData(audioChunk) // Returns conversationId
191
+
192
+ Step 2: Use conversationId to send animation data
193
+
194
+ AvatarController.yieldFramesData(keyframes, conversationId) // Requires conversationId
192
195
 
193
196
  AvatarController → AnimationPlayer (synchronized playback)
194
197
 
@@ -200,8 +203,8 @@ RenderSystem → WebGPU/WebGL → Canvas rendering
200
203
  ```
201
204
 
202
205
  **Note:**
203
- - In network mode, users provide audio data, SDK handles network communication and animation data reception
204
- - In external data mode, users provide both audio and animation data, SDK handles synchronized playback only
206
+ - In SDK mode, users provide audio data, SDK handles network communication and animation data reception
207
+ - In Host mode, users provide both audio and animation data, SDK handles synchronized playback only
205
208
 
206
209
  ### Audio Format Requirements
207
210
 
@@ -262,10 +265,11 @@ AvatarKit.cleanup()
262
265
 
263
266
  ### AvatarManager
264
267
 
265
- Character resource manager, responsible for downloading, caching, and loading character data.
268
+ Character resource manager, responsible for downloading, caching, and loading character data. Use the singleton instance via `AvatarManager.shared`.
266
269
 
267
270
  ```typescript
268
- const manager = new AvatarManager()
271
+ // Get singleton instance
272
+ const manager = AvatarManager.shared
269
273
 
270
274
  // Load character
271
275
  const avatar = await manager.load(
@@ -296,6 +300,9 @@ const avatarView = new AvatarView(avatar, container)
296
300
  // Get playback mode
297
301
  const mode = avatarView.playbackMode // 'network' | 'external'
298
302
 
303
+ // Wait for first frame to render
304
+ await avatarView.ready // Promise that resolves when the first frame is rendered
305
+
299
306
  // Cleanup resources (must be called before switching characters)
300
307
  avatarView.dispose()
301
308
  ```
@@ -314,7 +321,7 @@ const newAvatar = await avatarManager.load('new-character-id')
314
321
  // Create new AvatarView
315
322
  currentAvatarView = new AvatarView(newAvatar, container)
316
323
 
317
- // Network mode: start connection
324
+ // SDK mode: start connection
318
325
  if (currentAvatarView.playbackMode === AvatarPlaybackMode.network) {
319
326
  await currentAvatarView.controller.start()
320
327
  }
@@ -322,77 +329,83 @@ if (currentAvatarView.playbackMode === AvatarPlaybackMode.network) {
322
329
 
323
330
  ### AvatarController
324
331
 
325
- Audio/animation playback controller (playback layer), manages synchronized playback of audio and animation. Automatically handles WebSocket communication in network mode.
332
+ Audio/animation playback controller (playback layer), manages synchronized playback of audio and animation. Automatically handles WebSocket communication in SDK mode.
326
333
 
327
334
  **Two Usage Patterns:**
328
335
 
329
- #### Network Mode Methods
336
+ #### SDK Mode Methods
330
337
 
331
338
  ```typescript
332
339
  // Start WebSocket service
333
340
  await avatarView.avatarController.start()
334
341
 
335
- // Send audio data (SDK handles receiving animation data automatically)
336
- avatarView.avatarController.send(audioData: ArrayBuffer, end: boolean)
342
+ // Send audio data
343
+ const conversationId = avatarView.avatarController.send(audioData: ArrayBuffer, end: boolean)
344
+ // Returns: conversationId - Conversation ID for this conversation session (used to distinguish each conversation round)
337
345
  // audioData: Audio data (ArrayBuffer format, must be 16kHz mono PCM16)
338
346
  // - Sample rate: 16kHz (16000 Hz) - backend requirement
339
347
  // - Format: PCM16 (16-bit signed integer, little-endian)
340
348
  // - Channels: Mono (single channel)
341
349
  // - Example: 1 second = 16000 samples × 2 bytes = 32000 bytes
342
- // end: false (default) - Normal audio data sending, server will accumulate audio data, automatically returns animation data and starts synchronized playback of animation and audio after accumulating enough data
343
- // end: true - Immediately return animation data, no longer accumulating, used for ending current conversation or scenarios requiring immediate response
350
+ // end: false (default) - Continue sending audio data for current conversation
351
+ // end: true - Mark the end of current conversation round. After end=true, sending new audio data will interrupt any ongoing playback from the previous conversation round
344
352
 
345
353
  // Close WebSocket service
346
354
  avatarView.avatarController.close()
347
355
  ```
348
356
 
349
- #### External Data Mode Methods
357
+ #### Host Mode Methods
350
358
 
351
359
  ```typescript
352
360
  // Playback existing audio and animation data (starts a new conversation)
353
- const reqId = await avatarView.avatarController.playback(
361
+ const conversationId = await avatarView.avatarController.playback(
354
362
  initialAudioChunks?: Array<{ data: Uint8Array, isLast: boolean }>, // Existing audio chunks (16kHz mono PCM16)
355
363
  initialKeyframes?: any[] // Existing animation keyframes (obtained from your service)
356
364
  )
357
- // Returns: reqId - New request ID for this conversation session
365
+ // Returns: conversationId - New conversation ID for this conversation session
358
366
 
359
367
  // Stream additional audio chunks (after playback() is called)
360
- const reqId = avatarView.avatarController.yieldAudioData(
368
+ const conversationId = avatarView.avatarController.yieldAudioData(
361
369
  data: Uint8Array, // Audio chunk data
362
370
  isLast: boolean = false // Whether this is the last chunk
363
371
  )
364
- // Returns: reqId - Request ID for this audio session
372
+ // Returns: conversationId - Conversation ID for this audio session
365
373
 
366
374
  // Stream additional animation keyframes (after playback() is called)
367
375
  avatarView.avatarController.yieldFramesData(
368
376
  keyframes: any[], // Additional animation keyframes (obtained from your service)
369
- reqId: string // Request ID (required). Use getCurrentReqId() or yieldAudioData() to get reqId.
377
+ conversationId: string // Conversation ID (required). Use getCurrentConversationId() or yieldAudioData() to get conversationId.
370
378
  )
371
379
  ```
372
380
 
373
- **⚠️ Important: Request ID (reqId) Management**
381
+ **⚠️ Important: Conversation ID (conversationId) Management**
374
382
 
383
+ **SDK Mode:**
384
+ - `send()` returns a conversationId to distinguish each conversation round
385
+ - `end=true` marks the end of a conversation round. After `end=true`, sending new audio data will interrupt any ongoing playback from the previous conversation round
386
+
387
+ **Host Mode:**
375
388
  For each conversation session, you **must**:
376
- 1. **First send audio data** to get a reqId:
377
- - `playback()` returns a reqId when starting a new conversation
378
- - `yieldAudioData()` returns a reqId for the current audio session
379
- 2. **Then use that reqId** to send animation data:
380
- - `yieldFramesData()` requires a valid reqId parameter
381
- - Animation data with mismatched reqId will be **discarded**
382
- - Use `getCurrentReqId()` to retrieve the current active reqId
383
-
384
- **Example Flow:**
389
+ 1. **First send audio data** to get a conversationId (used to distinguish each conversation round):
390
+ - `playback()` returns a conversationId when playback existing audio and animation data (replay mode)
391
+ - `yieldAudioData()` returns a conversationId for streaming new audio data
392
+ 2. **Then use that conversationId** to send animation data:
393
+ - `yieldFramesData()` requires a valid conversationId parameter
394
+ - Animation data with mismatched conversationId will be **discarded**
395
+ - Use `getCurrentConversationId()` to retrieve the current active conversationId
396
+
397
+ **Example Flow (Host Mode):**
385
398
  ```typescript
386
- // Step 1: Send audio first to get reqId
387
- const reqId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
388
- // or
389
- const reqId = avatarView.avatarController.yieldAudioData(audioChunk, false)
399
+ // Step 1: Playback existing data first to get conversationId (or stream new audio)
400
+ const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
401
+ // or stream new audio data
402
+ const conversationId = avatarView.avatarController.yieldAudioData(audioChunk, false)
390
403
 
391
- // Step 2: Use the reqId to send animation data
392
- avatarView.avatarController.yieldFramesData(keyframes, reqId)
404
+ // Step 2: Use the conversationId to send animation data
405
+ avatarView.avatarController.yieldFramesData(keyframes, conversationId)
393
406
  ```
394
407
 
395
- **Why reqId is required:**
408
+ **Why conversationId is required:**
396
409
  - Ensures audio and animation data belong to the same conversation session
397
410
  - Prevents data from different sessions from being mixed
398
411
  - Automatically discards mismatched animation data for data integrity
@@ -412,18 +425,22 @@ avatarView.avatarController.interrupt()
412
425
  // Clear all data and resources
413
426
  avatarView.avatarController.clear()
414
427
 
428
+ // Get current conversation ID (for Host mode)
429
+ const conversationId = avatarView.avatarController.getCurrentConversationId()
430
+ // Returns: Current conversationId for the active audio session, or null if no active session
431
+
415
432
  // Set event callbacks
416
- avatarView.avatarController.onConnectionState = (state: ConnectionState) => {} // Network mode only
433
+ avatarView.avatarController.onConnectionState = (state: ConnectionState) => {} // SDK mode only
417
434
  avatarView.avatarController.onAvatarState = (state: AvatarState) => {}
418
435
  avatarView.avatarController.onError = (error: Error) => {}
419
436
  ```
420
437
 
421
438
  **Important Notes:**
422
- - `start()` and `close()` are only available in network mode
423
- - `playback()`, `yieldAudioData()`, and `yieldFramesData()` are only available in external data mode
424
- - `pause()`, `resume()`, `interrupt()`, and `clear()` are available in both modes
439
+ - `start()` and `close()` are only available in SDK mode
440
+ - `playback()`, `yieldAudioData()`, and `yieldFramesData()` are only available in Host mode
441
+ - `pause()`, `resume()`, `interrupt()`, `clear()`, and `getCurrentConversationId()` are available in both modes
425
442
  - The playback mode is determined when creating `AvatarView` and cannot be changed
426
- - **Request ID (reqId)**: In external data mode, always send audio data first to obtain a reqId, then use that reqId when sending animation data. Animation data with mismatched reqId will be discarded.
443
+ - **Conversation ID**: In Host mode, always send audio data first to obtain a conversationId, then use that conversationId when sending animation data. Animation data with mismatched conversationId will be discarded. Use `getCurrentConversationId()` to retrieve the current active conversationId.
427
444
 
428
445
  ## 🔧 Configuration
429
446
 
@@ -432,7 +449,7 @@ avatarView.avatarController.onError = (error: Error) => {}
432
449
  ```typescript
433
450
  interface Configuration {
434
451
  environment: Environment
435
- drivingServiceMode?: DrivingServiceMode // Optional, default is 'sdk' (network mode)
452
+ drivingServiceMode?: DrivingServiceMode // Optional, default is 'sdk' (SDK mode)
436
453
  }
437
454
  ```
438
455
 
@@ -469,8 +486,8 @@ constructor(avatar: Avatar, container: HTMLElement)
469
486
 
470
487
  ```typescript
471
488
  enum AvatarPlaybackMode {
472
- network = 'network', // Network mode: SDK handles WebSocket communication
473
- external = 'external' // External data mode: External provides data, SDK handles playback
489
+ network = 'network', // SDK mode: SDK handles WebSocket communication
490
+ external = 'external' // Host mode: Host provides data, SDK handles playback
474
491
  }
475
492
  ```
476
493
 
@@ -554,14 +571,12 @@ avatarView.avatarController.onError = (error: Error) => {
554
571
 
555
572
  ### Lifecycle Management
556
573
 
557
- #### Network Mode Lifecycle
574
+ #### SDK Mode Lifecycle
558
575
 
559
576
  ```typescript
560
577
  // Initialize
561
578
  const container = document.getElementById('avatar-container')
562
579
  const avatarView = new AvatarView(avatar, container)
563
- playbackMode: AvatarPlaybackMode.network
564
- })
565
580
  await avatarView.avatarController.start()
566
581
 
567
582
  // Use
@@ -572,7 +587,7 @@ avatarView.avatarController.close()
572
587
  avatarView.dispose() // Automatically cleans up all resources
573
588
  ```
574
589
 
575
- #### External Data Mode Lifecycle
590
+ #### Host Mode Lifecycle
576
591
 
577
592
  ```typescript
578
593
  // Initialize
@@ -581,12 +596,12 @@ const avatarView = new AvatarView(avatar, container)
581
596
 
582
597
  // Use
583
598
  const initialAudioChunks = [{ data: audioData1, isLast: false }]
584
- // Step 1: Send audio first to get reqId
585
- const reqId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
586
- // Step 2: Stream additional audio (returns reqId)
587
- const currentReqId = avatarView.avatarController.yieldAudioData(audioChunk, false)
588
- // Step 3: Use reqId to send animation data (mismatched reqId will be discarded)
589
- avatarView.avatarController.yieldFramesData(keyframes, currentReqId || reqId)
599
+ // Step 1: Send audio first to get conversationId
600
+ const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
601
+ // Step 2: Stream additional audio (returns conversationId)
602
+ const currentConversationId = avatarView.avatarController.yieldAudioData(audioChunk, false)
603
+ // Step 3: Use conversationId to send animation data (mismatched conversationId will be discarded)
604
+ avatarView.avatarController.yieldFramesData(keyframes, currentConversationId || conversationId)
590
605
 
591
606
  // Cleanup
592
607
  avatarView.avatarController.clear() // Clear all data and resources
@@ -596,8 +611,8 @@ avatarView.dispose() // Automatically cleans up all resources
596
611
  **⚠️ Important Notes:**
597
612
  - When disposing AvatarView instances, must call `dispose()` to properly clean up resources
598
613
  - Not properly cleaning up may cause resource leaks and rendering errors
599
- - In network mode, call `close()` before `dispose()` to properly close WebSocket connections
600
- - In external data mode, call `clear()` before `dispose()` to clear all playback data
614
+ - In SDK mode, call `close()` before `dispose()` to properly close WebSocket connections
615
+ - In Host mode, call `clear()` before `dispose()` to clear all playback data
601
616
 
602
617
  ### Memory Optimization
603
618
 
@@ -607,7 +622,7 @@ avatarView.dispose() // Automatically cleans up all resources
607
622
 
608
623
  ### Audio Data Sending
609
624
 
610
- #### Network Mode
625
+ #### SDK Mode
611
626
 
612
627
  The `send()` method receives audio data in `ArrayBuffer` format:
613
628
 
@@ -619,16 +634,16 @@ The `send()` method receives audio data in `ArrayBuffer` format:
619
634
 
620
635
  **Usage:**
621
636
  - `audioData`: Audio data (ArrayBuffer format, must be 16kHz mono PCM16)
622
- - `end=false` (default) - Normal audio data sending, server will accumulate audio data, automatically returns animation data and starts synchronized playback of animation and audio after accumulating enough data
623
- - `end=true` - Immediately return animation data, no longer accumulating, used for ending current conversation or scenarios requiring immediate response
637
+ - `end=false` (default) - Continue sending audio data for current conversation
638
+ - `end=true` - Mark the end of current conversation round. After `end=true`, sending new audio data will interrupt any ongoing playback from the previous conversation round
624
639
  - **Important**: No need to wait for `end=true` to start playing, it will automatically start playing after accumulating enough audio data
625
640
 
626
- #### External Data Mode
641
+ #### Host Mode
627
642
 
628
- The `playback()` method starts playback with existing audio and animation data, generating a new reqId and interrupting any existing conversation. Then use `yieldAudioData()` to stream additional audio:
643
+ The `playback()` method is used to playback existing audio and animation data (replay mode), generating a new conversationId and interrupting any existing conversation. Then use `yieldAudioData()` to stream additional audio:
629
644
 
630
645
  **Audio Format Requirements:**
631
- - Same as network mode: 16kHz mono PCM16 format
646
+ - Same as SDK mode: 16kHz mono PCM16 format
632
647
  - Audio data should be provided as `Uint8Array` in chunks with `isLast` flag
633
648
 
634
649
  **Usage:**
@@ -639,18 +654,18 @@ const initialAudioChunks = [
639
654
  { data: audioData1, isLast: false },
640
655
  { data: audioData2, isLast: false }
641
656
  ]
642
- const reqId = await avatarController.playback(initialAudioChunks, initialKeyframes)
643
- // Returns: reqId - New request ID for this conversation session
657
+ const conversationId = await avatarController.playback(initialAudioChunks, initialKeyframes)
658
+ // Returns: conversationId - New conversation ID for this conversation session
644
659
 
645
660
  // Stream additional audio chunks
646
- const reqId = avatarController.yieldAudioData(audioChunk, isLast)
647
- // Returns: reqId - Request ID for this audio session
661
+ const conversationId = avatarController.yieldAudioData(audioChunk, isLast)
662
+ // Returns: conversationId - Conversation ID for this audio session
648
663
  ```
649
664
 
650
- **⚠️ Request ID Workflow:**
651
- 1. **Send audio first** → Get reqId from `playback()` or `yieldAudioData()`
652
- 2. **Send animation with reqId** → Use the reqId from step 1 in `yieldFramesData()`
653
- 3. **Data matching** → Only animation data with matching reqId will be accepted
665
+ **⚠️ Conversation ID Workflow:**
666
+ 1. **Playback existing data or send audio first** → Get conversationId from `playback()` (for existing data) or `yieldAudioData()` (for streaming)
667
+ 2. **Send animation with conversationId** → Use the conversationId from step 1 in `yieldFramesData()`
668
+ 3. **Data matching** → Only animation data with matching conversationId will be accepted
654
669
 
655
670
  **Resampling (Both Modes):**
656
671
  - If your audio source is at a different sample rate (e.g., 24kHz, 48kHz), you **must** resample it to 16kHz before sending
@@ -1,7 +1,7 @@
1
1
  var C = Object.defineProperty;
2
2
  var g = (h, t, e) => t in h ? C(h, t, { enumerable: !0, configurable: !0, writable: !0, value: e }) : h[t] = e;
3
3
  var i = (h, t, e) => g(h, typeof t != "symbol" ? t + "" : t, e);
4
- import { A as m, e as f, a as c, l as u } from "./index-suaZGA5u.js";
4
+ import { A as m, e as f, a as c, l as u } from "./index-CuR_S9Ng.js";
5
5
  class y {
6
6
  constructor(t) {
7
7
  // AudioContext is managed internally
@@ -331,4 +331,4 @@ class y {
331
331
  export {
332
332
  y as StreamingAudioPlayer
333
333
  };
334
- //# sourceMappingURL=StreamingAudioPlayer-a8MwHQ3Q.js.map
334
+ //# sourceMappingURL=StreamingAudioPlayer-D5P7mU8B.js.map