@spatialwalk/avatarkit 1.0.0-beta.62 → 1.0.0-beta.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,24 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [1.0.0-beta.63] - 2026-01-14
6
+
7
+ ### ✨ New Features
8
+ - **Audio Context Initialization API** - Added `initializeAudioContext()` method to `AvatarController`
9
+ - Must be called in a user gesture context (click, touchstart, etc.) before any audio operations
10
+ - Ensures AudioContext is created and initialized in a user gesture context, preventing browser security policy issues
11
+ - All audio operations (`send()`, `yieldAudioData()`, `start()`, `playback()`, etc.) now require prior initialization
12
+
13
+ ### 🔧 Improvements
14
+ - **Initialization Flow** - Removed all lazy initialization logic for audio context
15
+ - Audio context initialization is now centralized in `initializeAudioContext()` method
16
+ - All audio operations check for initialization before proceeding
17
+ - Clear error messages when audio operations are attempted without initialization
18
+
19
+ ### 🐛 Bugfixes
20
+ - **Audio Context User Gesture Requirement** - Fixed issue where AudioContext could not be properly initialized when external applications request recording permissions
21
+ - Audio context must now be initialized in user gesture context, ensuring browser security policies are satisfied
22
+
5
23
  ## [1.0.0-beta.62] - 2026-01-14
6
24
 
7
25
  ### ✨ New Features
package/README.md CHANGED
@@ -20,6 +20,10 @@ npm install @spatialwalk/avatarkit
20
20
 
21
21
  ## 🎯 Quick Start
22
22
 
23
+ ### ⚠️ Important: Audio Context Initialization
24
+
25
+ **Before using any audio-related features, you MUST initialize the audio context in a user gesture context** (e.g., `click`, `touchstart` event handlers). This is required by browser security policies. Calling `initializeAudioContext()` outside a user gesture will fail.
26
+
23
27
  ### Basic Usage
24
28
 
25
29
  ```typescript
@@ -70,13 +74,21 @@ const avatar = await avatarManager.load('character-id', (progress) => {
70
74
  const container = document.getElementById('avatar-container')
71
75
  const avatarView = new AvatarView(avatar, container)
72
76
 
73
- // 4. Start real-time communication (SDK mode only)
74
- await avatarView.avatarController.start()
75
-
76
- // 5. Send audio data (SDK mode, must be mono PCM16 format matching configured sample rate)
77
- const audioData = new ArrayBuffer(1024) // Example: PCM16 audio data at configured sample rate
78
- avatarView.avatarController.send(audioData, false) // Send audio data
79
- avatarView.avatarController.send(audioData, true) // end=true marks the end of current conversation round
77
+ // 4. ⚠️ CRITICAL: Initialize audio context (MUST be called in user gesture context)
78
+ // This method MUST be called within a user gesture event handler (click, touchstart, etc.)
79
+ // to satisfy browser security policies. Calling it outside a user gesture will fail.
80
+ button.addEventListener('click', async () => {
81
+ // Initialize audio context - MUST be in user gesture context
82
+ await avatarView.controller.initializeAudioContext()
83
+
84
+ // 5. Start real-time communication (SDK mode only)
85
+ await avatarView.controller.start()
86
+
87
+ // 6. Send audio data (SDK mode, must be mono PCM16 format matching configured sample rate)
88
+ const audioData = new ArrayBuffer(1024) // Example: PCM16 audio data at configured sample rate
89
+ avatarView.controller.send(audioData, false) // Send audio data
90
+ avatarView.controller.send(audioData, true) // end=true marks the end of current conversation round
91
+ })
80
92
  ```
81
93
 
82
94
  ### Host Mode Example
@@ -89,10 +101,17 @@ avatarView.avatarController.send(audioData, true) // end=true marks the end of c
89
101
  const container = document.getElementById('avatar-container')
90
102
  const avatarView = new AvatarView(avatar, container)
91
103
 
92
- // 4. Host Mode Workflow:
93
- // Send audio data first to get conversationId, then use it to send animation data
94
- const conversationId = avatarView.avatarController.yieldAudioData(audioData, false)
95
- avatarView.avatarController.yieldFramesData(animationDataArray, conversationId) // animationDataArray: (Uint8Array | ArrayBuffer)[]
104
+ // 4. ⚠️ CRITICAL: Initialize audio context (MUST be called in user gesture context)
105
+ // This method MUST be called within a user gesture event handler (click, touchstart, etc.)
106
+ // to satisfy browser security policies. Calling it outside a user gesture will fail.
107
+ button.addEventListener('click', async () => {
108
+ // Initialize audio context - MUST be in user gesture context
109
+ await avatarView.controller.initializeAudioContext()
110
+
111
+ // 5. Host Mode Workflow:
112
+ // Send audio data first to get conversationId, then use it to send animation data
113
+ const conversationId = avatarView.controller.yieldAudioData(audioData, false)
114
+ avatarView.controller.yieldFramesData(animationDataArray, conversationId) // animationDataArray: (Uint8Array | ArrayBuffer)[]
96
115
  ```
97
116
 
98
117
  ### Complete Examples
@@ -350,34 +369,52 @@ Audio/animation playback controller (playback layer), manages synchronized playb
350
369
  #### SDK Mode Methods
351
370
 
352
371
  ```typescript
353
- // Start WebSocket service
354
- await avatarView.avatarController.start()
355
-
356
- // Send audio data (must be 16kHz mono PCM16 format)
357
- const conversationId = avatarView.avatarController.send(audioData: ArrayBuffer, end: boolean)
358
- // Returns: conversationId - Conversation ID for this conversation session
359
- // end: false (default) - Continue sending audio data for current conversation
360
- // end: true - Mark the end of current conversation round. After end=true, sending new audio data will interrupt any ongoing playback from the previous conversation round
372
+ // ⚠️ CRITICAL: Initialize audio context first (MUST be called in user gesture context)
373
+ // This method MUST be called within a user gesture event handler (click, touchstart, etc.)
374
+ // to satisfy browser security policies. Calling it outside a user gesture will fail.
375
+ // All audio operations (start, send, etc.) require prior initialization.
376
+ button.addEventListener('click', async () => {
377
+ // Initialize audio context - MUST be in user gesture context
378
+ await avatarView.controller.initializeAudioContext()
379
+
380
+ // Start WebSocket service
381
+ await avatarView.controller.start()
382
+
383
+ // Send audio data (must be 16kHz mono PCM16 format)
384
+ const conversationId = avatarView.controller.send(audioData: ArrayBuffer, end: boolean)
385
+ // Returns: conversationId - Conversation ID for this conversation session
386
+ // end: false (default) - Continue sending audio data for current conversation
387
+ // end: true - Mark the end of current conversation round. After end=true, sending new audio data will interrupt any ongoing playback from the previous conversation round
388
+ })
361
389
 
362
390
  // Close WebSocket service
363
- avatarView.avatarController.close()
391
+ avatarView.controller.close()
364
392
  ```
365
393
 
366
394
  #### Host Mode Methods
367
395
 
368
396
  ```typescript
369
- // Stream audio chunks (must be 16kHz mono PCM16 format)
370
- const conversationId = avatarView.avatarController.yieldAudioData(
371
- data: Uint8Array, // Audio chunk data
372
- isLast: boolean = false // Whether this is the last chunk
373
- )
374
- // Returns: conversationId - Conversation ID for this audio session
375
-
376
- // Stream animation keyframes (requires conversationId from audio data)
377
- avatarView.avatarController.yieldFramesData(
378
- keyframesDataArray: (Uint8Array | ArrayBuffer)[], // Animation keyframes binary data array (each element is a protobuf encoded Message)
379
- conversationId: string // Conversation ID (required)
380
- )
397
+ // ⚠️ CRITICAL: Initialize audio context first (MUST be called in user gesture context)
398
+ // This method MUST be called within a user gesture event handler (click, touchstart, etc.)
399
+ // to satisfy browser security policies. Calling it outside a user gesture will fail.
400
+ // All audio operations (yieldAudioData, yieldFramesData, etc.) require prior initialization.
401
+ button.addEventListener('click', async () => {
402
+ // Initialize audio context - MUST be in user gesture context
403
+ await avatarView.controller.initializeAudioContext()
404
+
405
+ // Stream audio chunks (must be 16kHz mono PCM16 format)
406
+ const conversationId = avatarView.controller.yieldAudioData(
407
+ data: Uint8Array, // Audio chunk data
408
+ isLast: boolean = false // Whether this is the last chunk
409
+ )
410
+ // Returns: conversationId - Conversation ID for this audio session
411
+
412
+ // Stream animation keyframes (requires conversationId from audio data)
413
+ avatarView.controller.yieldFramesData(
414
+ keyframesDataArray: (Uint8Array | ArrayBuffer)[], // Animation keyframes binary data array (each element is a protobuf encoded Message)
415
+ conversationId: string // Conversation ID (required)
416
+ )
417
+ })
381
418
  ```
382
419
 
383
420
  **⚠️ Important: Conversation ID (conversationId) Management**
@@ -1,7 +1,7 @@
1
1
  var __defProp = Object.defineProperty;
2
2
  var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
3
3
  var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
4
- import { A as APP_CONFIG, l as logger, e as errorToMessage, a as logEvent } from "./index-Bhjn1nq3.js";
4
+ import { A as APP_CONFIG, l as logger, e as errorToMessage, a as logEvent } from "./index-C1md-jKJ.js";
5
5
  class StreamingAudioPlayer {
6
6
  constructor(options) {
7
7
  __publicField(this, "audioContext", null);
@@ -44,6 +44,8 @@ export declare class AvatarController {
44
44
  playbackMode?: DrivingServiceMode;
45
45
  });
46
46
  getCurrentConversationId(): string | null;
47
+ initializeAudioContext(): Promise<void>;
48
+ private checkAudioContextInitialized;
47
49
  start(): Promise<void>;
48
50
  send(audioData: ArrayBuffer, end?: boolean): string | null;
49
51
  close(): void;
@@ -7624,7 +7624,7 @@ const _AnimationPlayer = class _AnimationPlayer {
7624
7624
  if (this.streamingPlayer) {
7625
7625
  return;
7626
7626
  }
7627
- const { StreamingAudioPlayer } = await import("./StreamingAudioPlayer-BWsAt_s7.js");
7627
+ const { StreamingAudioPlayer } = await import("./StreamingAudioPlayer-CO9WTktN.js");
7628
7628
  const { AvatarSDK: AvatarSDK2 } = await Promise.resolve().then(() => AvatarSDK$1);
7629
7629
  const audioFormat = AvatarSDK2.getAudioFormat();
7630
7630
  this.streamingPlayer = new StreamingAudioPlayer({
@@ -8961,7 +8961,7 @@ class AvatarSDK {
8961
8961
  }
8962
8962
  __publicField(AvatarSDK, "_isInitialized", false);
8963
8963
  __publicField(AvatarSDK, "_configuration", null);
8964
- __publicField(AvatarSDK, "_version", "1.0.0-beta.62");
8964
+ __publicField(AvatarSDK, "_version", "1.0.0-beta.63");
8965
8965
  __publicField(AvatarSDK, "_avatarCore", null);
8966
8966
  __publicField(AvatarSDK, "_dynamicSdkConfig", null);
8967
8967
  const AvatarSDK$1 = Object.freeze(Object.defineProperty({
@@ -10741,38 +10741,71 @@ class AvatarController {
10741
10741
  getCurrentConversationId() {
10742
10742
  return this.getEffectiveConversationId();
10743
10743
  }
10744
- async start() {
10745
- if (!this.networkLayer) {
10746
- throw new SPAvatarError(
10747
- "Network layer not available. Use SDK mode.",
10748
- "NETWORK_LAYER_NOT_AVAILABLE"
10749
- );
10744
+ async initializeAudioContext() {
10745
+ var _a;
10746
+ if ((_a = this.animationPlayer) == null ? void 0 : _a.isStreamingReady()) {
10747
+ return;
10750
10748
  }
10751
10749
  if (!this.animationPlayer) {
10752
10750
  this.animationPlayer = new AnimationPlayer();
10751
+ }
10752
+ if (!this.animationPlayer.isStreamingReady()) {
10753
10753
  try {
10754
10754
  await this.animationPlayer.createAndInitializeStreamingPlayer();
10755
10755
  } catch (error) {
10756
10756
  const message = error instanceof Error ? error.message : String(error);
10757
- logger.error("[AvatarController] Failed to create streaming player:", message);
10758
- logEvent("character_player", "error", {
10759
- avatar_id: this.avatar.id,
10760
- event: "streaming_player_init_failed",
10761
- reason: message
10762
- });
10763
- throw error;
10757
+ logger.error("[AvatarController] Failed to initialize audio context:", message);
10758
+ throw new SPAvatarError(
10759
+ `Failed to initialize audio context: ${message}`,
10760
+ "AUDIO_CONTEXT_INIT_FAILED"
10761
+ );
10762
+ }
10763
+ }
10764
+ const streamingPlayer = this.animationPlayer.getStreamingPlayer();
10765
+ if (streamingPlayer) {
10766
+ const audioContext = streamingPlayer.audioContext;
10767
+ if (audioContext && audioContext.state === "suspended") {
10768
+ try {
10769
+ await audioContext.resume();
10770
+ } catch (err) {
10771
+ logger.warn("[AvatarController] Failed to resume AudioContext during initialization:", err);
10772
+ }
10764
10773
  }
10765
10774
  }
10775
+ }
10776
+ checkAudioContextInitialized() {
10777
+ var _a;
10778
+ if (!((_a = this.animationPlayer) == null ? void 0 : _a.isStreamingReady())) {
10779
+ throw new SPAvatarError(
10780
+ "Audio context not initialized. Call initializeAudioContext() in a user gesture context first.",
10781
+ "AUDIO_CONTEXT_NOT_INITIALIZED"
10782
+ );
10783
+ }
10784
+ }
10785
+ async start() {
10786
+ if (!this.networkLayer) {
10787
+ throw new SPAvatarError(
10788
+ "Network layer not available. Use SDK mode.",
10789
+ "NETWORK_LAYER_NOT_AVAILABLE"
10790
+ );
10791
+ }
10792
+ this.checkAudioContextInitialized();
10766
10793
  await this.networkLayer.connect(this.avatar.id);
10767
10794
  }
10768
10795
  send(audioData, end = false) {
10769
- var _a, _b, _c;
10796
+ var _a, _b, _c, _d;
10797
+ try {
10798
+ this.checkAudioContextInitialized();
10799
+ } catch (error) {
10800
+ (_a = this.onError) == null ? void 0 : _a.call(this, error);
10801
+ return null;
10802
+ }
10770
10803
  if (!this.networkLayer) {
10771
- (_a = this.onError) == null ? void 0 : _a.call(this, new SPAvatarError("Network layer not available", "NETWORK_LAYER_NOT_AVAILABLE"));
10804
+ (_b = this.onError) == null ? void 0 : _b.call(this, new SPAvatarError("Network layer not available", "NETWORK_LAYER_NOT_AVAILABLE"));
10772
10805
  return null;
10773
10806
  }
10774
10807
  if (!this.networkLayer.canSend()) {
10775
- (_b = this.onError) == null ? void 0 : _b.call(this, new SPAvatarError("Service not connected", "NOT_CONNECTED"));
10808
+ (_c = this.onError) == null ? void 0 : _c.call(this, new SPAvatarError("Service not connected", "NOT_CONNECTED"));
10776
10809
  logEvent("character_manager", "warning", {
10777
10810
  avatar_id: this.avatar.id,
10778
10811
  event: "send_not_connected"
@@ -10790,7 +10823,7 @@ class AvatarController {
10790
10823
  }
10791
10824
  if (!this.isPlaying && this.currentState === AvatarState.idle) {
10792
10825
  this.currentState = AvatarState.active;
10793
- (_c = this.onConversationState) == null ? void 0 : _c.call(this, this.mapToConversationState(AvatarState.active));
10826
+ (_d = this.onConversationState) == null ? void 0 : _d.call(this, this.mapToConversationState(AvatarState.active));
10794
10827
  }
10795
10828
  return this.networkLayer.getCurrentConversationId();
10796
10829
  }
@@ -10810,18 +10843,13 @@ class AvatarController {
10810
10843
  (_a = this.onConnectionState) == null ? void 0 : _a.call(this, ConnectionState.disconnected);
10811
10844
  }
10812
10845
  async playback(initialAudioChunks, initialKeyframes) {
10846
+ this.checkAudioContextInitialized();
10813
10847
  if (this.isPlaying || this.currentConversationId) {
10814
10848
  this.interrupt();
10815
10849
  }
10816
10850
  this.currentConversationId = this.generateAndLogNewConversationId();
10817
10851
  this.reqEnd = false;
10818
10852
  this.clearPlaybackData();
10819
- if (!this.animationPlayer) {
10820
- this.animationPlayer = new AnimationPlayer();
10821
- }
10822
- if (!this.animationPlayer.isStreamingReady()) {
10823
- await this.animationPlayer.createAndInitializeStreamingPlayer();
10824
- }
10825
10853
  if (initialAudioChunks && initialAudioChunks.length > 0) {
10826
10854
  this.pendingAudioChunks.push(...initialAudioChunks);
10827
10855
  }
@@ -10848,7 +10876,13 @@ class AvatarController {
10848
10876
  return this.currentConversationId;
10849
10877
  }
10850
10878
  yieldAudioData(data, isLast = false) {
10851
- var _a, _b;
10879
+ var _a, _b, _c;
10880
+ try {
10881
+ this.checkAudioContextInitialized();
10882
+ } catch (error) {
10883
+ (_a = this.onError) == null ? void 0 : _a.call(this, error);
10884
+ return null;
10885
+ }
10852
10886
  if (this.reqEnd && this.isPlaying && this.currentConversationId) {
10853
10887
  this.interrupt();
10854
10888
  this.currentConversationId = this.generateAndLogNewConversationId();
@@ -10879,12 +10913,12 @@ class AvatarController {
10879
10913
  metrics.tap2Timestamp = Date.now();
10880
10914
  }
10881
10915
  }
10882
- if (this.isPlaying && ((_a = this.animationPlayer) == null ? void 0 : _a.isStreamingReady())) {
10916
+ if (this.isPlaying && ((_b = this.animationPlayer) == null ? void 0 : _b.isStreamingReady())) {
10883
10917
  this.animationPlayer.addAudioChunk(data, isLast);
10884
10918
  } else {
10885
10919
  if (data.length > 0 || isLast) {
10886
10920
  this.pendingAudioChunks.push({ data, isLast });
10887
- (_b = this.onConversationState) == null ? void 0 : _b.call(this, this.mapToConversationState(AvatarState.active));
10921
+ (_c = this.onConversationState) == null ? void 0 : _c.call(this, this.mapToConversationState(AvatarState.active));
10888
10922
  }
10889
10923
  }
10890
10924
  return this.currentConversationId;
@@ -11237,6 +11271,7 @@ class AvatarController {
11237
11271
  }
11238
11272
  async startStreamingPlaybackInternal() {
11239
11273
  var _a, _b, _c;
11274
+ this.checkAudioContextInitialized();
11240
11275
  if (this.isPlaying) {
11241
11276
  this.isStartingPlayback = false;
11242
11277
  return;
@@ -11245,30 +11280,15 @@ class AvatarController {
11245
11280
  return;
11246
11281
  }
11247
11282
  this.isStartingPlayback = true;
11248
- if (!this.animationPlayer) {
11249
- this.animationPlayer = new AnimationPlayer();
11250
- }
11251
- if (!this.animationPlayer.isStreamingReady()) {
11252
- try {
11253
- await this.animationPlayer.createAndInitializeStreamingPlayer();
11254
- } catch (error) {
11255
- this.isStartingPlayback = false;
11256
- const message = error instanceof Error ? error.message : String(error);
11257
- logger.error("[AvatarController] Failed to create streaming player:", message);
11258
- logEvent("character_player", "error", {
11259
- avatar_id: this.avatar.id,
11260
- event: "streaming_player_init_failed",
11261
- reason: message
11262
- });
11263
- throw error;
11264
- }
11265
- }
11266
11283
  if (!this.currentKeyframes || this.currentKeyframes.length === 0) {
11267
11284
  this.isStartingPlayback = false;
11268
11285
  logger.warn("[AvatarController] No animation data to play");
11269
11286
  return;
11270
11287
  }
11271
11288
  try {
11289
+ if (!this.animationPlayer) {
11290
+ throw new SPAvatarError("Animation player not initialized", "ANIMATION_PLAYER_NOT_INITIALIZED");
11291
+ }
11272
11292
  await this.animationPlayer.prepareStreamingPlayer(() => {
11273
11293
  var _a2, _b2;
11274
11294
  this.isPlaying = false;
@@ -11490,21 +11510,7 @@ class AvatarController {
11490
11510
  async startAudioOnlyPlayback() {
11491
11511
  var _a, _b;
11492
11512
  if (!this.animationPlayer) {
11493
- this.animationPlayer = new AnimationPlayer();
11494
- }
11495
- if (!this.animationPlayer.isStreamingReady()) {
11496
- try {
11497
- await this.animationPlayer.createAndInitializeStreamingPlayer();
11498
- } catch (error) {
11499
- const message = error instanceof Error ? error.message : String(error);
11500
- logger.error("[AvatarController] Failed to create streaming player for audio-only mode:", message);
11501
- logEvent("character_player", "error", {
11502
- avatar_id: this.avatar.id,
11503
- event: "audio_only_streaming_player_init_failed",
11504
- reason: message
11505
- });
11506
- throw error;
11507
- }
11513
+ throw new SPAvatarError("Animation player not initialized", "ANIMATION_PLAYER_NOT_INITIALIZED");
11508
11514
  }
11509
11515
  try {
11510
11516
  await this.animationPlayer.prepareStreamingPlayer(() => {
@@ -11587,7 +11593,9 @@ class AvatarController {
11587
11593
  }
11588
11594
  addAudioChunkToBuffer(data, isLast) {
11589
11595
  if (!this.animationPlayer) {
11590
- this.animationPlayer = new AnimationPlayer();
11596
+ logger.warn("[AvatarController] animationPlayer is null in addAudioChunkToBuffer, this should not happen");
11597
+ this.pendingAudioChunks.push({ data, isLast });
11598
+ return;
11591
11599
  }
11592
11600
  if (this.isPlaying && this.animationPlayer.isStreamingReady()) {
11593
11601
  this.animationPlayer.addAudioChunk(data, isLast);
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { b, c, f, d, j, g, C, i, D, E, k, h, L, R, S, m } from "./index-Bhjn1nq3.js";
1
+ import { b, c, f, d, j, g, C, i, D, E, k, h, L, R, S, m } from "./index-C1md-jKJ.js";
2
2
  export {
3
3
  b as Avatar,
4
4
  c as AvatarController,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@spatialwalk/avatarkit",
3
3
  "type": "module",
4
- "version": "1.0.0-beta.62",
4
+ "version": "1.0.0-beta.63",
5
5
  "description": "SPAvatar SDK - 3D Gaussian Splatting Avatar Rendering SDK",
6
6
  "author": "SPAvatar Team",
7
7
  "license": "MIT",