@spatialwalk/avatarkit 1.0.0-beta.21 → 1.0.0-beta.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.0.0-beta.23] - 2025-01-26
9
+
10
+ ### 🔧 API Changes
11
+ - **Breaking Change** - `playback()` method is no longer supported and has been removed from public API
12
+
13
+ ## [1.0.0-beta.22] - 2025-01-26
14
+
15
+ ### 🔧 API Changes
16
+ - **State Callback Renamed** - `onAvatarState` has been renamed to `onConversationState` for better clarity
17
+ - The callback now uses `ConversationState` enum with states: `idle` and `playing`
18
+ - **Environment Enum Updated** - `Environment.us` has been renamed to `Environment.intl` for better internationalization support
19
+ - All references to `Environment.us` should be updated to `Environment.intl`
20
+ - Remote config endpoints now use `intl` instead of `us`
21
+
22
+ ### ✨ New Features
23
+ - **Volume Control** - Added volume control API for audio playback
24
+ - `setVolume(volume: number)` - Set audio volume (0.0 to 1.0)
25
+ - `getVolume(): number` - Get current audio volume
26
+ - Volume control only affects the avatar's audio player, not system volume
27
+ - Volume changes take effect immediately, including for currently playing audio
28
+
8
29
  ## [1.0.0-beta.21] - 2025-01-25
9
30
 
10
31
  ### ✨ New Features
@@ -221,7 +242,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
221
242
  ## [1.0.0-beta.5] - 2025-11-14
222
243
 
223
244
  ### 🐛 Bug Fixes
224
- - Fixed missing `AvatarPlaybackMode` enum export in published package
245
+ - Fixed missing `DrivingServiceMode` enum export in published package
225
246
 
226
247
  ---
227
248
 
@@ -286,7 +307,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
286
307
  // New API
287
308
  new AvatarView(avatar, {
288
309
  container: container,
289
- playbackMode: AvatarPlaybackMode.network // or AvatarPlaybackMode.external
310
+ playbackMode: DrivingServiceMode.sdk // or DrivingServiceMode.host
290
311
  })
291
312
  ```
292
313
 
package/README.md CHANGED
@@ -28,11 +28,11 @@ import {
28
28
  AvatarManager,
29
29
  AvatarView,
30
30
  Configuration,
31
- Environment
31
+ Environment,
32
+ DrivingServiceMode
32
33
  } from '@spatialwalk/avatarkit'
33
34
 
34
35
  // 1. Initialize SDK
35
- import { DrivingServiceMode } from '@spatialwalk/avatarkit'
36
36
 
37
37
  const configuration: Configuration = {
38
38
  environment: Environment.test,
@@ -62,19 +62,15 @@ const avatarView = new AvatarView(avatar, container)
62
62
  // 4. Start real-time communication (SDK mode only)
63
63
  await avatarView.avatarController.start()
64
64
 
65
- // 5. Send audio data (SDK mode)
66
- // ⚠️ Important: Audio must be 16kHz mono PCM16 format
67
- // If audio is Uint8Array, you can use slice().buffer to convert to ArrayBuffer
68
- const audioUint8 = new Uint8Array(1024) // Example: 16kHz PCM16 audio data (512 samples = 1024 bytes)
69
- const audioData = audioUint8.slice().buffer // Simplified conversion, works for ArrayBuffer and SharedArrayBuffer
70
- avatarView.avatarController.send(audioData, false) // Send audio data, will automatically start playing after accumulating enough data
65
+ // 5. Send audio data (SDK mode, must be 16kHz mono PCM16 format)
66
+ const audioData = new ArrayBuffer(1024) // Example: 16kHz PCM16 audio data
67
+ avatarView.avatarController.send(audioData, false) // Send audio data
71
68
  avatarView.avatarController.send(audioData, true) // end=true marks the end of current conversation round
72
69
  ```
73
70
 
74
71
  ### Host Mode Example
75
72
 
76
73
  ```typescript
77
- import { AvatarPlaybackMode } from '@spatialwalk/avatarkit'
78
74
 
79
75
  // 1-3. Same as SDK mode (initialize SDK, load character)
80
76
 
@@ -83,22 +79,9 @@ const container = document.getElementById('avatar-container')
83
79
  const avatarView = new AvatarView(avatar, container)
84
80
 
85
81
  // 4. Host Mode Workflow:
86
- // ⚠️ IMPORTANT: In Host mode, you MUST send audio data FIRST to get a conversationId,
87
- // then use that conversationId to send animation data.
88
- // Animation data with mismatched conversationId will be discarded.
89
-
90
- // Option A: Playback existing audio and animation data (replay mode)
91
- const initialAudioChunks = [{ data: audioData1, isLast: false }, { data: audioData2, isLast: false }]
92
- const initialKeyframes = animationData1 // Animation keyframes from your service
93
- // Step 1: Send audio first to get conversationId
94
- const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
95
-
96
- // Option B: Stream new audio and animation data (start a new session directly)
97
- // Step 1: Send audio data first to get conversationId (automatically generates conversationId if starting new session)
98
- const currentConversationId = avatarView.avatarController.yieldAudioData(audioData3, false)
99
- // Step 2: Use the conversationId to send animation data (mismatched conversationId will be discarded)
100
- avatarView.avatarController.yieldFramesData(animationData2, currentConversationId || conversationId)
101
- // Note: To start playback, you need to call playback() with the accumulated data, or ensure enough audio data is sent
82
+ // Send audio data first to get conversationId, then use it to send animation data
83
+ const conversationId = avatarView.avatarController.yieldAudioData(audioData, false)
84
+ avatarView.avatarController.yieldFramesData(animationData, conversationId)
102
85
  ```
103
86
 
104
87
  ### Complete Examples
@@ -186,15 +169,9 @@ RenderSystem → WebGPU/WebGL → Canvas rendering
186
169
  ```
187
170
  External data source (audio + animation)
188
171
 
189
- Step 1: Send audio data FIRST to get conversationId
190
-
191
- AvatarController.playback(initialAudio, initialKeyframes) // Returns conversationId
192
- OR
193
172
  AvatarController.yieldAudioData(audioChunk) // Returns conversationId
194
173
 
195
- Step 2: Use conversationId to send animation data
196
-
197
- AvatarController.yieldFramesData(keyframes, conversationId) // Requires conversationId
174
+ AvatarController.yieldFramesData(keyframes, conversationId)
198
175
 
199
176
  AvatarController → AnimationPlayer (synchronized playback)
200
177
 
@@ -205,10 +182,6 @@ AvatarController (playback loop) → AvatarView.renderRealtimeFrame()
205
182
  RenderSystem → WebGPU/WebGL → Canvas rendering
206
183
  ```
207
184
 
208
- **Note:**
209
- - In SDK mode, users provide audio data, SDK handles network communication and animation data reception
210
- - In Host mode, users provide both audio and animation data, SDK handles synchronized playback only
211
-
212
185
  ### Audio Format Requirements
213
186
 
214
187
  **⚠️ Important:** The SDK requires audio data to be in **16kHz mono PCM16** format:
@@ -288,21 +261,28 @@ manager.clearCache()
288
261
 
289
262
  3D rendering view (rendering layer), responsible for 3D rendering only. Internally automatically creates and manages `AvatarController`.
290
263
 
291
- **Playback Mode Configuration:**
264
+ ```typescript
265
+ constructor(avatar: Avatar, container: HTMLElement)
266
+ ```
267
+
268
+ **Parameters:**
269
+ - `avatar`: Avatar 实例
270
+ - `container`: Canvas 容器元素(必选)
271
+ - Canvas 自动使用容器的完整尺寸(宽度和高度)
272
+ - Canvas 宽高比适应容器尺寸 - 设置容器尺寸以控制宽高比
273
+ - Canvas 会自动添加到容器中
274
+ - SDK automatically handles resize events via ResizeObserver
275
+
276
+ **Playback Mode:**
277
+ - The playback mode is determined by `drivingServiceMode` in `AvatarKit.initialize()` configuration
292
278
  - The playback mode is fixed when creating `AvatarView` and persists throughout its lifecycle
293
279
  - Cannot be changed after creation
294
280
 
295
281
  ```typescript
296
- import { AvatarPlaybackMode } from '@spatialwalk/avatarkit'
297
-
298
282
  // Create view (Canvas is automatically added to container)
299
- // Create view (playback mode is determined by drivingServiceMode in AvatarKit configuration)
300
283
  const container = document.getElementById('avatar-container')
301
284
  const avatarView = new AvatarView(avatar, container)
302
285
 
303
- // Get playback mode
304
- const mode = avatarView.playbackMode // 'network' | 'external'
305
-
306
286
  // Wait for first frame to render
307
287
  await avatarView.ready // Promise that resolves when the first frame is rendered
308
288
 
@@ -324,10 +304,8 @@ const newAvatar = await avatarManager.load('new-character-id')
324
304
  // Create new AvatarView
325
305
  currentAvatarView = new AvatarView(newAvatar, container)
326
306
 
327
- // SDK mode: start connection
328
- if (currentAvatarView.playbackMode === AvatarPlaybackMode.network) {
307
+ // SDK mode: start connection (will throw error if not in SDK mode)
329
308
  await currentAvatarView.controller.start()
330
- }
331
309
  ```
332
310
 
333
311
  ### AvatarController
@@ -342,14 +320,9 @@ Audio/animation playback controller (playback layer), manages synchronized playb
342
320
  // Start WebSocket service
343
321
  await avatarView.avatarController.start()
344
322
 
345
- // Send audio data
323
+ // Send audio data (must be 16kHz mono PCM16 format)
346
324
  const conversationId = avatarView.avatarController.send(audioData: ArrayBuffer, end: boolean)
347
- // Returns: conversationId - Conversation ID for this conversation session (used to distinguish each conversation round)
348
- // audioData: Audio data (ArrayBuffer format, must be 16kHz mono PCM16)
349
- // - Sample rate: 16kHz (16000 Hz) - backend requirement
350
- // - Format: PCM16 (16-bit signed integer, little-endian)
351
- // - Channels: Mono (single channel)
352
- // - Example: 1 second = 16000 samples × 2 bytes = 32000 bytes
325
+ // Returns: conversationId - Conversation ID for this conversation session
353
326
  // end: false (default) - Continue sending audio data for current conversation
354
327
  // end: true - Mark the end of current conversation round. After end=true, sending new audio data will interrupt any ongoing playback from the previous conversation round
355
328
 
@@ -360,25 +333,17 @@ avatarView.avatarController.close()
360
333
  #### Host Mode Methods
361
334
 
362
335
  ```typescript
363
- // Playback existing audio and animation data (starts a new conversation)
364
- const conversationId = await avatarView.avatarController.playback(
365
- initialAudioChunks?: Array<{ data: Uint8Array, isLast: boolean }>, // Existing audio chunks (16kHz mono PCM16)
366
- initialKeyframes?: any[] // Existing animation keyframes (obtained from your service)
367
- )
368
- // Returns: conversationId - New conversation ID for this conversation session
369
-
370
- // Stream audio chunks (can be called directly to start a new session, or after playback() to add more data)
336
+ // Stream audio chunks (must be 16kHz mono PCM16 format)
371
337
  const conversationId = avatarView.avatarController.yieldAudioData(
372
338
  data: Uint8Array, // Audio chunk data
373
339
  isLast: boolean = false // Whether this is the last chunk
374
340
  )
375
341
  // Returns: conversationId - Conversation ID for this audio session
376
- // Note: If no conversationId exists, a new one will be automatically generated
377
342
 
378
343
  // Stream animation keyframes (requires conversationId from audio data)
379
344
  avatarView.avatarController.yieldFramesData(
380
345
  keyframes: any[], // Animation keyframes (obtained from your service)
381
- conversationId: string // Conversation ID (required). Use getCurrentConversationId() or yieldAudioData() to get conversationId.
346
+ conversationId: string // Conversation ID (required)
382
347
  )
383
348
  ```
384
349
 
@@ -386,36 +351,14 @@ avatarView.avatarController.yieldFramesData(
386
351
 
387
352
  **SDK Mode:**
388
353
  - `send()` returns a conversationId to distinguish each conversation round
389
- - `end=true` marks the end of a conversation round. After `end=true`, sending new audio data will interrupt any ongoing playback from the previous conversation round
354
+ - `end=true` marks the end of a conversation round
390
355
 
391
356
  **Host Mode:**
392
- For each conversation session, you **must**:
393
- 1. **First send audio data** to get a conversationId (used to distinguish each conversation round):
394
- - `playback()` returns a conversationId when playback existing audio and animation data (replay mode)
395
- - `yieldAudioData()` returns a conversationId for streaming new audio data
396
- 2. **Then use that conversationId** to send animation data:
357
+ - `yieldAudioData()` returns a conversationId (automatically generates if starting new session)
397
358
  - `yieldFramesData()` requires a valid conversationId parameter
398
359
  - Animation data with mismatched conversationId will be **discarded**
399
360
  - Use `getCurrentConversationId()` to retrieve the current active conversationId
400
361
 
401
- **Example Flow (Host Mode):**
402
- ```typescript
403
- // Option A: Playback existing complete data (replay mode)
404
- const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
405
-
406
- // Option B: Start streaming new data directly
407
- // Step 1: Send audio data first to get conversationId (automatically generates if starting new session)
408
- const conversationId = avatarView.avatarController.yieldAudioData(audioChunk, false)
409
- // Step 2: Use the conversationId to send animation data
410
- avatarView.avatarController.yieldFramesData(keyframes, conversationId)
411
- // Note: To start playback with Option B, call playback() with accumulated data or ensure enough audio is sent
412
- ```
413
-
414
- **Why conversationId is required:**
415
- - Ensures audio and animation data belong to the same conversation session
416
- - Prevents data from different sessions from being mixed
417
- - Automatically discards mismatched animation data for data integrity
418
-
419
362
  #### Common Methods (Both Modes)
420
363
 
421
364
  ```typescript
@@ -435,18 +378,21 @@ avatarView.avatarController.clear()
435
378
  const conversationId = avatarView.avatarController.getCurrentConversationId()
436
379
  // Returns: Current conversationId for the active audio session, or null if no active session
437
380
 
381
+ // Volume control (affects only avatar audio player, not system volume)
382
+ avatarView.avatarController.setVolume(0.5) // Set volume to 50% (0.0 to 1.0)
383
+ const currentVolume = avatarView.avatarController.getVolume() // Get current volume (0.0 to 1.0)
384
+
438
385
  // Set event callbacks
439
386
  avatarView.avatarController.onConnectionState = (state: ConnectionState) => {} // SDK mode only
440
- avatarView.avatarController.onAvatarState = (state: AvatarState) => {}
387
+ avatarView.avatarController.onConversationState = (state: ConversationState) => {}
441
388
  avatarView.avatarController.onError = (error: Error) => {}
442
389
  ```
443
390
 
444
391
  **Important Notes:**
445
392
  - `start()` and `close()` are only available in SDK mode
446
- - `playback()`, `yieldAudioData()`, and `yieldFramesData()` are only available in Host mode
447
- - `pause()`, `resume()`, `interrupt()`, `clear()`, and `getCurrentConversationId()` are available in both modes
393
+ - `yieldAudioData()` and `yieldFramesData()` are only available in Host mode
394
+ - `pause()`, `resume()`, `interrupt()`, `clear()`, `getCurrentConversationId()`, `setVolume()`, and `getVolume()` are available in both modes
448
395
  - The playback mode is determined when creating `AvatarView` and cannot be changed
449
- - **Conversation ID**: In Host mode, always send audio data first to obtain a conversationId, then use that conversationId when sending animation data. Animation data with mismatched conversationId will be discarded. Use `getCurrentConversationId()` to retrieve the current active conversationId.
450
396
 
451
397
  ## 🔧 Configuration
452
398
 
@@ -460,7 +406,7 @@ interface Configuration {
460
406
  ```
461
407
 
462
408
  **Description:**
463
- - `environment`: Specifies the environment (cn/us/test), SDK will automatically use the corresponding API address and WebSocket address based on the environment
409
+ - `environment`: Specifies the environment (cn/intl/test), SDK will automatically use the corresponding API address and WebSocket address based on the environment
464
410
  - `drivingServiceMode`: Specifies the driving service mode
465
411
  - `DrivingServiceMode.sdk` (default): SDK mode - SDK handles WebSocket communication automatically
466
412
  - `DrivingServiceMode.host`: Host mode - Host application provides audio and animation data
@@ -469,34 +415,11 @@ interface Configuration {
469
415
  ```typescript
470
416
  enum Environment {
471
417
  cn = 'cn', // China region
472
- us = 'us', // US region
418
+ intl = 'intl', // International region
473
419
  test = 'test' // Test environment
474
420
  }
475
421
  ```
476
422
 
477
- ### AvatarView Constructor
478
-
479
- ```typescript
480
- constructor(avatar: Avatar, container: HTMLElement)
481
- ```
482
-
483
- **Parameters:**
484
- - `avatar`: Avatar 实例
485
- - `container`: Canvas 容器元素(必选)
486
- - Canvas 自动使用容器的完整尺寸(宽度和高度)
487
- - Canvas 宽高比适应容器尺寸 - 设置容器尺寸以控制宽高比
488
- - Canvas 会自动添加到容器中
489
-
490
- **Note:** 播放模式由 `AvatarKit.initialize()` 配置中的 `drivingServiceMode` 决定,而不是在构造函数参数中
491
- - SDK automatically handles resize events via ResizeObserver
492
-
493
- ```typescript
494
- enum AvatarPlaybackMode {
495
- network = 'network', // SDK mode: SDK handles WebSocket communication
496
- external = 'external' // Host mode: Host provides data, SDK handles playback
497
- }
498
- ```
499
-
500
423
  ### CameraConfig
501
424
 
502
425
  ```typescript
@@ -524,17 +447,23 @@ enum ConnectionState {
524
447
  }
525
448
  ```
526
449
 
527
- ### AvatarState
450
+ ### ConversationState
528
451
 
529
452
  ```typescript
530
- enum AvatarState {
531
- idle = 'idle', // Idle state, showing breathing animation
532
- active = 'active', // Active, waiting for playable content
533
- playing = 'playing', // Playing
534
- paused = 'paused' // Paused (can be resumed)
453
+ enum ConversationState {
454
+ idle = 'idle', // 呼吸态
455
+ playing = 'playing' // 播放态
535
456
  }
536
457
  ```
537
458
 
459
+ **状态说明:**
460
+ - `idle`: 数字人处于呼吸态,等待对话开始
461
+ - `playing`: 数字人正在播放对话内容(包括过渡动画期间)
462
+
463
+ **注意:** 过渡动画期间会提前通知目标状态:
464
+ - 从 `idle` 过渡到 `playing` 时,立即通知 `playing` 状态
465
+ - 从 `playing` 过渡到 `idle` 时,立即通知 `idle` 状态
466
+
538
467
  ## 🎨 Rendering System
539
468
 
540
469
  The SDK supports two rendering backends:
@@ -601,13 +530,8 @@ const container = document.getElementById('avatar-container')
601
530
  const avatarView = new AvatarView(avatar, container)
602
531
 
603
532
  // Use
604
- const initialAudioChunks = [{ data: audioData1, isLast: false }]
605
- // Step 1: Send audio first to get conversationId
606
- const conversationId = await avatarView.avatarController.playback(initialAudioChunks, initialKeyframes)
607
- // Step 2: Stream additional audio (returns conversationId)
608
- const currentConversationId = avatarView.avatarController.yieldAudioData(audioChunk, false)
609
- // Step 3: Use conversationId to send animation data (mismatched conversationId will be discarded)
610
- avatarView.avatarController.yieldFramesData(keyframes, currentConversationId || conversationId)
533
+ const conversationId = avatarView.avatarController.yieldAudioData(audioChunk, false)
534
+ avatarView.avatarController.yieldFramesData(keyframes, conversationId)
611
535
 
612
536
  // Cleanup
613
537
  avatarView.avatarController.clear() // Clear all data and resources
@@ -626,67 +550,6 @@ avatarView.dispose() // Automatically cleans up all resources
626
550
  - Supports dynamic loading/unloading of character and animation resources
627
551
  - Provides memory usage monitoring interface
628
552
 
629
- ### Audio Data Sending
630
-
631
- #### SDK Mode
632
-
633
- The `send()` method receives audio data in `ArrayBuffer` format:
634
-
635
- **Audio Format Requirements:**
636
- - **Sample Rate**: 16kHz (16000 Hz) - **Backend requirement, must be exactly 16kHz**
637
- - **Format**: PCM16 (16-bit signed integer, little-endian)
638
- - **Channels**: Mono (single channel)
639
- - **Data Size**: Each sample is 2 bytes, so 1 second of audio = 16000 samples × 2 bytes = 32000 bytes
640
-
641
- **Usage:**
642
- - `audioData`: Audio data (ArrayBuffer format, must be 16kHz mono PCM16)
643
- - `end=false` (default) - Continue sending audio data for current conversation
644
- - `end=true` - Mark the end of current conversation round. After `end=true`, sending new audio data will interrupt any ongoing playback from the previous conversation round
645
- - **Important**: No need to wait for `end=true` to start playing, it will automatically start playing after accumulating enough audio data
646
-
647
- #### Host Mode
648
-
649
- The `playback()` method is used to playback existing audio and animation data (replay mode), generating a new conversationId and interrupting any existing conversation.
650
-
651
- **Two ways to start a session in Host mode:**
652
- 1. **Use `playback()`** - For replaying existing complete audio and animation data
653
- 2. **Use `yieldAudioData()` directly** - For streaming new audio data (automatically generates conversationId if needed)
654
-
655
- Then use `yieldAudioData()` to stream additional audio:
656
-
657
- **Audio Format Requirements:**
658
- - Same as SDK mode: 16kHz mono PCM16 format
659
- - Audio data should be provided as `Uint8Array` in chunks with `isLast` flag
660
-
661
- **Usage:**
662
- ```typescript
663
- // Playback existing audio and animation data (starts a new conversation)
664
- // Note: Audio and animation data should be obtained from your backend service
665
- const initialAudioChunks = [
666
- { data: audioData1, isLast: false },
667
- { data: audioData2, isLast: false }
668
- ]
669
- const conversationId = await avatarController.playback(initialAudioChunks, initialKeyframes)
670
- // Returns: conversationId - New conversation ID for this conversation session
671
-
672
- // Stream additional audio chunks
673
- const conversationId = avatarController.yieldAudioData(audioChunk, isLast)
674
- // Returns: conversationId - Conversation ID for this audio session
675
- ```
676
-
677
- **⚠️ Conversation ID Workflow:**
678
- 1. **Start a session** → Choose one of two ways:
679
- - **Option A**: Use `playback(initialAudioChunks, initialKeyframes)` to replay existing complete data
680
- - **Option B**: Use `yieldAudioData(audioChunk)` directly to start streaming (automatically generates conversationId)
681
- 2. **Get conversationId** → Both methods return a conversationId
682
- 3. **Send animation with conversationId** → Use the conversationId from step 1 in `yieldFramesData()`
683
- 4. **Data matching** → Only animation data with matching conversationId will be accepted
684
-
685
- **Resampling (Both Modes):**
686
- - If your audio source is at a different sample rate (e.g., 24kHz, 48kHz), you **must** resample it to 16kHz before sending
687
- - For high-quality resampling, use Web Audio API's `OfflineAudioContext` with anti-aliasing filtering
688
- - See example projects (`vanilla`, `react`, `vue`) for complete resampling implementation
689
-
690
553
  ## 🌐 Browser Compatibility
691
554
 
692
555
  - **Chrome/Edge** 90+ (WebGPU recommended)
@@ -1,38 +1,42 @@
1
1
  var C = Object.defineProperty;
2
2
  var g = (h, t, e) => t in h ? C(h, t, { enumerable: !0, configurable: !0, writable: !0, value: e }) : h[t] = e;
3
- var i = (h, t, e) => g(h, typeof t != "symbol" ? t + "" : t, e);
4
- import { A as m, e as f, a as c, l as u } from "./index-ChKhyUK4.js";
3
+ var s = (h, t, e) => g(h, typeof t != "symbol" ? t + "" : t, e);
4
+ import { A as m, e as f, a as c, l as n } from "./index-DYf1u8L7.js";
5
5
  class y {
6
6
  constructor(t) {
7
7
  // AudioContext is managed internally
8
- i(this, "audioContext", null);
9
- i(this, "sampleRate");
10
- i(this, "channelCount");
11
- i(this, "debug");
8
+ s(this, "audioContext", null);
9
+ s(this, "sampleRate");
10
+ s(this, "channelCount");
11
+ s(this, "debug");
12
12
  // Session-level state
13
- i(this, "sessionId");
14
- i(this, "sessionStartTime", 0);
13
+ s(this, "sessionId");
14
+ s(this, "sessionStartTime", 0);
15
15
  // AudioContext time when session started
16
- i(this, "pausedTimeOffset", 0);
16
+ s(this, "pausedTimeOffset", 0);
17
17
  // Accumulated paused time
18
- i(this, "pausedAt", 0);
18
+ s(this, "pausedAt", 0);
19
19
  // Time when paused
20
- i(this, "pausedAudioContextTime", 0);
20
+ s(this, "pausedAudioContextTime", 0);
21
21
  // audioContext.currentTime when paused (for resume calculation)
22
- i(this, "scheduledTime", 0);
22
+ s(this, "scheduledTime", 0);
23
23
  // Next chunk schedule time in AudioContext time
24
24
  // Playback state
25
- i(this, "isPlaying", !1);
26
- i(this, "isPaused", !1);
27
- i(this, "autoStartEnabled", !0);
25
+ s(this, "isPlaying", !1);
26
+ s(this, "isPaused", !1);
27
+ s(this, "autoStartEnabled", !0);
28
28
  // Control whether to auto-start when buffer is ready
29
29
  // Audio buffer queue
30
- i(this, "audioChunks", []);
31
- i(this, "scheduledChunks", 0);
30
+ s(this, "audioChunks", []);
31
+ s(this, "scheduledChunks", 0);
32
32
  // Number of chunks already scheduled
33
- i(this, "activeSources", /* @__PURE__ */ new Set());
33
+ s(this, "activeSources", /* @__PURE__ */ new Set());
34
+ // Volume control
35
+ s(this, "gainNode", null);
36
+ s(this, "volume", 1);
37
+ // Default volume 1.0 (0.0 - 1.0)
34
38
  // Event callbacks
35
- i(this, "onEndedCallback");
39
+ s(this, "onEndedCallback");
36
40
  this.sessionId = `session_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`, this.sampleRate = (t == null ? void 0 : t.sampleRate) ?? m.audio.sampleRate, this.channelCount = (t == null ? void 0 : t.channelCount) ?? 1, this.debug = (t == null ? void 0 : t.debug) ?? !1;
37
41
  }
38
42
  /**
@@ -43,7 +47,7 @@ class y {
43
47
  try {
44
48
  this.audioContext = new AudioContext({
45
49
  sampleRate: this.sampleRate
46
- }), this.audioContext.state === "suspended" && await this.audioContext.resume(), this.log("AudioContext initialized", {
50
+ }), this.gainNode = this.audioContext.createGain(), this.gainNode.gain.value = this.volume, this.gainNode.connect(this.audioContext.destination), this.audioContext.state === "suspended" && await this.audioContext.resume(), this.log("AudioContext initialized", {
47
51
  sessionId: this.sessionId,
48
52
  sampleRate: this.audioContext.sampleRate,
49
53
  state: this.audioContext.state
@@ -53,7 +57,7 @@ class y {
53
57
  throw c.logEvent("activeAudioSessionFailed", "warning", {
54
58
  sessionId: this.sessionId,
55
59
  reason: e
56
- }), u.error("Failed to initialize AudioContext:", e), t instanceof Error ? t : new Error(e);
60
+ }), n.error("Failed to initialize AudioContext:", e), t instanceof Error ? t : new Error(e);
57
61
  }
58
62
  }
59
63
  /**
@@ -61,7 +65,7 @@ class y {
61
65
  */
62
66
  addChunk(t, e = !1) {
63
67
  if (!this.audioContext) {
64
- u.error("AudioContext not initialized");
68
+ n.error("AudioContext not initialized");
65
69
  return;
66
70
  }
67
71
  this.audioChunks.push({ data: t, isLast: e }), this.log(`Added chunk ${this.audioChunks.length}`, {
@@ -132,16 +136,16 @@ class y {
132
136
  }
133
137
  const r = e.data, o = e.isLast, a = this.pcmToAudioBuffer(r);
134
138
  if (!a) {
135
- u.error("Failed to create AudioBuffer from PCM data"), c.logEvent("character_player", "error", {
139
+ n.error("Failed to create AudioBuffer from PCM data"), c.logEvent("character_player", "error", {
136
140
  sessionId: this.sessionId,
137
141
  event: "audio_buffer_creation_failed"
138
142
  });
139
143
  return;
140
144
  }
141
145
  try {
142
- const s = this.audioContext.createBufferSource();
143
- s.buffer = a, s.connect(this.audioContext.destination), s.start(this.scheduledTime), this.activeSources.add(s), s.onended = () => {
144
- this.activeSources.delete(s), o && this.activeSources.size === 0 && (this.log("Last audio chunk ended, marking playback as ended"), this.markEnded());
146
+ const i = this.audioContext.createBufferSource();
147
+ i.buffer = a, i.connect(this.gainNode), i.start(this.scheduledTime), this.activeSources.add(i), i.onended = () => {
148
+ this.activeSources.delete(i), o && this.activeSources.size === 0 && (this.log("Last audio chunk ended, marking playback as ended"), this.markEnded());
145
149
  }, this.scheduledTime += a.duration, this.scheduledChunks++, this.log(`[StreamingAudioPlayer] Scheduled chunk ${t + 1}/${this.audioChunks.length}`, {
146
150
  startTime: this.scheduledTime - a.duration,
147
151
  duration: a.duration,
@@ -149,11 +153,11 @@ class y {
149
153
  isLast: o,
150
154
  activeSources: this.activeSources.size
151
155
  });
152
- } catch (s) {
153
- u.errorWithError("Failed to schedule audio chunk:", s), c.logEvent("character_player", "error", {
156
+ } catch (i) {
157
+ n.errorWithError("Failed to schedule audio chunk:", i), c.logEvent("character_player", "error", {
154
158
  sessionId: this.sessionId,
155
159
  event: "schedule_chunk_failed",
156
- reason: s instanceof Error ? s.message : String(s)
160
+ reason: i instanceof Error ? i.message : String(i)
157
161
  });
158
162
  }
159
163
  }
@@ -165,25 +169,25 @@ class y {
165
169
  if (!this.audioContext)
166
170
  return null;
167
171
  if (t.length === 0) {
168
- const l = Math.floor(this.sampleRate * 0.01), n = this.audioContext.createBuffer(
172
+ const l = Math.floor(this.sampleRate * 0.01), u = this.audioContext.createBuffer(
169
173
  this.channelCount,
170
174
  l,
171
175
  this.sampleRate
172
176
  );
173
177
  for (let d = 0; d < this.channelCount; d++)
174
- n.getChannelData(d).fill(0);
175
- return n;
178
+ u.getChannelData(d).fill(0);
179
+ return u;
176
180
  }
177
181
  const e = new Uint8Array(t), r = new Int16Array(e.buffer, 0, e.length / 2), o = r.length / this.channelCount, a = this.audioContext.createBuffer(
178
182
  this.channelCount,
179
183
  o,
180
184
  this.sampleRate
181
185
  );
182
- for (let s = 0; s < this.channelCount; s++) {
183
- const l = a.getChannelData(s);
184
- for (let n = 0; n < o; n++) {
185
- const d = n * this.channelCount + s;
186
- l[n] = r[d] / 32768;
186
+ for (let i = 0; i < this.channelCount; i++) {
187
+ const l = a.getChannelData(i);
188
+ for (let u = 0; u < o; u++) {
189
+ const d = u * this.channelCount + i;
190
+ l[u] = r[d] / 32768;
187
191
  }
188
192
  }
189
193
  return a;
@@ -204,7 +208,7 @@ class y {
204
208
  */
205
209
  pause() {
206
210
  !this.isPlaying || this.isPaused || !this.audioContext || (this.pausedAt = this.getCurrentTime(), this.pausedAudioContextTime = this.audioContext.currentTime, this.isPaused = !0, this.audioContext.state === "running" && this.audioContext.suspend().catch((t) => {
207
- u.errorWithError("Failed to suspend AudioContext:", t), this.isPaused = !1;
211
+ n.errorWithError("Failed to suspend AudioContext:", t), this.isPaused = !1;
208
212
  }), this.log("Playback paused", {
209
213
  pausedAt: this.pausedAt,
210
214
  pausedAudioContextTime: this.pausedAudioContextTime,
@@ -221,7 +225,7 @@ class y {
221
225
  try {
222
226
  await this.audioContext.resume();
223
227
  } catch (e) {
224
- throw u.errorWithError("Failed to resume AudioContext:", e), e;
228
+ throw n.errorWithError("Failed to resume AudioContext:", e), e;
225
229
  }
226
230
  const t = this.audioContext.currentTime;
227
231
  this.sessionStartTime = this.pausedAudioContextTime - this.pausedAt - this.pausedTimeOffset, this.isPaused = !1, this.scheduledChunks < this.audioChunks.length && this.scheduleAllChunks(), this.log("Playback resumed", {
@@ -307,7 +311,7 @@ class y {
307
311
  * Dispose and cleanup
308
312
  */
309
313
  dispose() {
310
- this.stop(), this.audioContext && (this.audioContext.close(), this.audioContext = null), this.audioChunks = [], this.scheduledChunks = 0, this.sessionStartTime = 0, this.pausedTimeOffset = 0, this.pausedAt = 0, this.pausedAudioContextTime = 0, this.scheduledTime = 0, this.onEndedCallback = void 0, this.log("StreamingAudioPlayer disposed");
314
+ this.stop(), this.audioContext && (this.audioContext.close(), this.audioContext = null, this.gainNode = null), this.audioChunks = [], this.scheduledChunks = 0, this.sessionStartTime = 0, this.pausedTimeOffset = 0, this.pausedAt = 0, this.pausedAudioContextTime = 0, this.scheduledTime = 0, this.onEndedCallback = void 0, this.log("StreamingAudioPlayer disposed");
311
315
  }
312
316
  /**
313
317
  * Flush buffered audio
@@ -321,14 +325,29 @@ class y {
321
325
  }
322
326
  this.scheduledChunks < this.audioChunks.length && this.audioChunks.splice(this.scheduledChunks), this.log("Flushed (soft)", { remainingScheduled: this.scheduledChunks });
323
327
  }
328
+ /**
329
+ * 设置音量 (0.0 - 1.0)
330
+ * 注意:这仅控制数字人音频播放器的音量,不影响系统音量
331
+ * @param volume 音量值,范围 0.0 到 1.0(0.0 为静音,1.0 为最大音量)
332
+ */
333
+ setVolume(t) {
334
+ (t < 0 || t > 1) && (n.warn(`[StreamingAudioPlayer] Volume out of range: ${t}, clamping to [0, 1]`), t = Math.max(0, Math.min(1, t))), this.volume = t, this.gainNode && (this.gainNode.gain.value = t);
335
+ }
336
+ /**
337
+ * 获取当前音量
338
+ * @returns 当前音量值 (0.0 - 1.0)
339
+ */
340
+ getVolume() {
341
+ return this.volume;
342
+ }
324
343
  /**
325
344
  * Debug logging
326
345
  */
327
346
  log(t, e) {
328
- this.debug && u.log(`[StreamingAudioPlayer] ${t}`, e || "");
347
+ this.debug && n.log(`[StreamingAudioPlayer] ${t}`, e || "");
329
348
  }
330
349
  }
331
350
  export {
332
351
  y as StreamingAudioPlayer
333
352
  };
334
- //# sourceMappingURL=StreamingAudioPlayer-DEXcuhRW.js.map
353
+ //# sourceMappingURL=StreamingAudioPlayer-PkzxBP93.js.map