@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.62 → 1.0.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -78,7 +78,7 @@ sdk.setListenerFromLSD(listenerPos, cameraPos, lookAtPos);
78
78
  ### Web Audio Algorithms
79
79
  - **Coordinate normalization** – Unreal sends centimeters; `SpatialAudioManager` auto-detects large values and converts to meters once.
80
80
  - **Orientation math** – `setListenerFromLSD()` builds forward/right/up vectors from camera/LookAt to keep the listener aligned with head movement.
81
- - **Dynamic distance gain** – `updateSpatialAudio()` measures distance from listener → source and applies a smooth rolloff curve, so distant avatars fade to silence.
81
+ - **Dynamic distance gain** – `updateSpatialAudio()` measures distance from listener → source and applies a quadratic rolloff curve (0.5m-10m range). Voices gradually fade from 100% (0.5m clear) to complete silence at 10m+. Distance calculated from listener's HEAD position to participant's standing position.
82
82
  - **Noise handling** – the AudioWorklet denoiser now runs an adaptive multi-band gate (per W3C AudioWorklet guidance) before the high/low-pass filters, stripping constant HVAC/fan noise even when the speaker is close. A newly added silence gate mutes tracks entirely after ~250 ms of sub-noise-floor energy, eliminating hiss during dead air without touching spatial cues.
83
83
 
84
84
  #### Noise-Cancellation Stack (What’s Included)
@@ -112,6 +112,13 @@ export declare class SpatialAudioManager extends EventManager {
112
112
  * @param muted True to mute, false to unmute
113
113
  */
114
114
  setParticipantMuted(participantId: string, muted: boolean): void;
115
+ /**
116
+ * Update listener position and orientation
117
+ * The \"listener\" is YOU - where your ears/head are positioned
118
+ *
119
+ * @param position Your HEAD position (camera position), not body position!
120
+ * @param orientation Which way your head is facing (forward and up vectors)
121
+ */
115
122
  setListenerPosition(position: Position, orientation: {
116
123
  forwardX: number;
117
124
  forwardY: number;
@@ -122,7 +129,10 @@ export declare class SpatialAudioManager extends EventManager {
122
129
  }): void;
123
130
  /**
124
131
  * Update listener orientation from LSD camera direction
125
- * @param cameraPos Camera position in world space
132
+ * IMPORTANT: Uses CAMERA position (head) as listener, not body position!
133
+ *
134
+ * @param listenerPos Player body position (for reference, not used as listener)
135
+ * @param cameraPos Camera/HEAD position - THIS is the actual listener position for audio
126
136
  * @param lookAtPos Look-at position (where camera is pointing)
127
137
  */
128
138
  setListenerFromLSD(listenerPos: Position, cameraPos: Position, lookAtPos: Position): void;
@@ -171,13 +181,13 @@ export declare class SpatialAudioManager extends EventManager {
171
181
  /**
172
182
  * Calculate gain based on distance using logarithmic scale
173
183
  * Distance range: 0.5m to 5m
174
- * Gain range: 100% to 0%
184
+ * Gain range: 100% to 20% (never goes to 0 for better audibility)
175
185
  * Uses quadratic equation for human ear perception
176
186
  */
177
187
  private calculateLogarithmicGain;
178
188
  /**
179
- * Apply stereo panning to participant audio
180
- * Converts panning percentages to StereoPanner values
189
+ * Apply stereo panning to participant audio using StereoPannerNode
190
+ * This provides STABLE left-right panning without jitter
181
191
  */
182
192
  private applyStereoPanning;
183
193
  private ensureDenoiseWorklet;
@@ -99,6 +99,7 @@ class SpatialAudioManager extends EventManager_1.EventManager {
99
99
  const stream = new MediaStream([track]);
100
100
  const source = this.audioContext.createMediaStreamSource(stream);
101
101
  const panner = this.audioContext.createPanner();
102
+ const stereoPanner = this.audioContext.createStereoPanner(); // For stable L/R panning
102
103
  const analyser = this.audioContext.createAnalyser();
103
104
  const gain = this.audioContext.createGain();
104
105
  const proximityGain = this.audioContext.createGain();
@@ -142,13 +143,14 @@ class SpatialAudioManager extends EventManager_1.EventManager {
142
143
  }
143
144
  // Create BiquadFilter nodes for static/noise reduction
144
145
  // Based on: https://tagdiwalaviral.medium.com/struggles-of-noise-reduction-in-rtc-part-2-2526f8179442
145
- // HIGHPASS FILTER: Remove low-frequency rumble (< 80Hz)
146
+ // HIGHPASS FILTER: Remove low-frequency rumble and plosives (< 80Hz)
146
147
  // Human voice fundamental: 80-300Hz, harmonics: 300Hz-8kHz
147
148
  // This cuts BELOW voice range while preserving full voice spectrum
149
+ // Lower Q for gentler slope = less phase distortion = fewer onset artifacts
148
150
  const highpassFilter = this.audioContext.createBiquadFilter();
149
151
  highpassFilter.type = "highpass";
150
152
  highpassFilter.frequency.value = 80; // Cut frequencies below 80Hz (removes rumble/pops)
151
- highpassFilter.Q.value = 1.0; // Quality factor
153
+ highpassFilter.Q.value = 0.707; // Butterworth response (gentler, reduces plosives)
152
154
  // LOWPASS FILTER: Remove high-frequency hiss (> 8000Hz)
153
155
  // Voice harmonics extend to ~8kHz - this preserves full voice richness
154
156
  // while removing digital artifacts and hiss ABOVE useful voice range
@@ -156,14 +158,14 @@ class SpatialAudioManager extends EventManager_1.EventManager {
156
158
  lowpassFilter.type = "lowpass";
157
159
  lowpassFilter.frequency.value = 8000; // Cut frequencies above 8kHz (preserves voice harmonics)
158
160
  lowpassFilter.Q.value = 1.0; // Quality factor
159
- // VOICE BAND EMPHASIS: Boost 80-300Hz fundamental range for clarity
161
+ // VOICE BAND EMPHASIS: Subtle boost 80-300Hz fundamental range for clarity
160
162
  // This emphasizes the base pitch without affecting harmonics
161
- // Helps reduce the "pop" when someone starts speaking
163
+ // Reduced gain to prevent onset artifacts ("mic pop" when speaking starts)
162
164
  const voiceBandFilter = this.audioContext.createBiquadFilter();
163
165
  voiceBandFilter.type = "peaking";
164
166
  voiceBandFilter.frequency.value = 180; // Center of voice fundamental (80-300Hz)
165
- voiceBandFilter.Q.value = 1.5; // Moderate width (~100-260Hz affected)
166
- voiceBandFilter.gain.value = 2; // +2dB boost for clarity
167
+ voiceBandFilter.Q.value = 0.8; // Wider, gentler curve (reduces artifacts)
168
+ voiceBandFilter.gain.value = 1; // +1dB subtle boost (was 2dB - too aggressive)
167
169
  const dynamicLowpass = this.audioContext.createBiquadFilter();
168
170
  dynamicLowpass.type = "lowpass";
169
171
  dynamicLowpass.frequency.value = 7500; // Fixed for all angles
@@ -180,14 +182,25 @@ class SpatialAudioManager extends EventManager_1.EventManager {
180
182
  panner.coneOuterAngle = 360;
181
183
  panner.coneOuterGain = 0.3; // Some sound even outside cone
182
184
  // Configure gain for individual participant volume control
183
- gain.gain.value = 1.0;
185
+ gain.gain.value = 1.5; // Boost initial gain (was 1.0)
186
+ // ADD COMPRESSOR: Prevents sudden peaks and "pops" when speaking starts
187
+ // This is KEY to eliminating onset artifacts
188
+ const participantCompressor = this.audioContext.createDynamicsCompressor();
189
+ participantCompressor.threshold.value = -30; // Higher threshold (less compression)
190
+ participantCompressor.knee.value = 10; // Smooth knee for natural sound
191
+ participantCompressor.ratio.value = 2; // 2:1 gentle ratio (was 3:1)
192
+ participantCompressor.attack.value = 0.003; // 3ms fast attack for transients
193
+ participantCompressor.release.value = 0.15; // 150ms release for natural decay
184
194
  let currentNode = source;
195
+ // First apply compressor to tame initial transients (CRITICAL for preventing pops)
196
+ currentNode.connect(participantCompressor);
197
+ currentNode = participantCompressor;
185
198
  if (denoiseNode) {
186
199
  currentNode.connect(denoiseNode);
187
200
  currentNode = denoiseNode;
188
201
  }
189
202
  // Audio chain with voice optimization filters
190
- // Chain: source -> [denoise] -> highpass -> voiceBand -> lowpass -> dynamicLowpass -> proximityGain -> panner -> analyser -> gain -> masterGain
203
+ // Chain: source -> compressor -> [denoise] -> highpass -> voiceBand -> lowpass -> dynamicLowpass -> proximityGain -> panner -> analyser -> gain -> masterGain
191
204
  currentNode.connect(highpassFilter);
192
205
  highpassFilter.connect(voiceBandFilter);
193
206
  voiceBandFilter.connect(lowpassFilter);
@@ -198,9 +211,10 @@ class SpatialAudioManager extends EventManager_1.EventManager {
198
211
  analyser.connect(this.masterGainNode);
199
212
  }
200
213
  else {
201
- // Standard spatialized path with full audio chain
202
- // Audio Chain: source -> filters -> panner -> analyser -> gain -> masterGain -> compressor -> destination
203
- proximityGain.connect(panner);
214
+ // Standard spatialized path with stereo panner
215
+ // Audio Chain: source -> compressor -> filters -> stereoPanner -> panner -> analyser -> gain -> masterGain -> compressor -> destination
216
+ proximityGain.connect(stereoPanner); // Stereo panner for stable L/R
217
+ stereoPanner.connect(panner); // Then 3D panner for distance
204
218
  panner.connect(analyser);
205
219
  analyser.connect(gain);
206
220
  gain.connect(this.masterGainNode);
@@ -208,9 +222,11 @@ class SpatialAudioManager extends EventManager_1.EventManager {
208
222
  this.participantNodes.set(participantId, {
209
223
  source,
210
224
  panner,
225
+ stereoPanner,
211
226
  analyser,
212
227
  gain,
213
228
  proximityGain,
229
+ compressor: participantCompressor,
214
230
  highpassFilter,
215
231
  lowpassFilter,
216
232
  voiceBandFilter,
@@ -301,10 +317,10 @@ class SpatialAudioManager extends EventManager_1.EventManager {
301
317
  const nodes = this.participantNodes.get(participantId);
302
318
  if (nodes?.panner) {
303
319
  const normalizedPosition = this.normalizePositionUnits(position);
304
- const listenerPos = this.listenerPosition;
305
- // Calculate distance (in meters)
320
+ const listenerPos = this.listenerPosition; // This is HEAD position (from setListenerFromLSD)
321
+ // Calculate distance (in meters) - from HEAD to source
306
322
  const distance = this.getDistanceBetween(listenerPos, normalizedPosition);
307
- // Calculate angle between listener and source
323
+ // Calculate angle between listener HEAD and source
308
324
  const angle = this.calculateAngle(listenerPos, normalizedPosition, this.listenerDirection.forward);
309
325
  // Calculate stereo panning based on angle
310
326
  const panning = this.calculatePanning(angle);
@@ -314,10 +330,13 @@ class SpatialAudioManager extends EventManager_1.EventManager {
314
330
  this.applyStereoPanning(participantId, panning);
315
331
  // Apply gain with smooth transition to reduce clicking/popping
316
332
  const gainValue = gain / 100; // Convert to 0-1 range
317
- nodes.gain.gain.setTargetAtTime(gainValue, this.audioContext.currentTime, 0.05 // Smooth transition over 50ms to reduce clicking
318
- );
319
- // Apply proximity gain for additional distance-based attenuation
320
- nodes.proximityGain.gain.setTargetAtTime(gainValue, this.audioContext.currentTime, 0.05);
333
+ // Use exponentialRampToValueAtTime for smoother, more natural transitions
334
+ // This prevents the "pop" when someone starts speaking
335
+ const currentTime = this.audioContext.currentTime;
336
+ const rampTime = 0.08; // 80ms smooth ramp
337
+ // Ensure we never ramp to exactly 0 (causes issues)
338
+ const targetGain = Math.max(0.2, gainValue); // Minimum 20% gain (was 0.001)
339
+ nodes.gain.gain.setTargetAtTime(targetGain, currentTime, rampTime);
321
340
  // Update 3D position for PannerNode (still used for vertical positioning)
322
341
  nodes.panner.positionY.setValueAtTime(normalizedPosition.y, this.audioContext.currentTime);
323
342
  nodes.panner.positionZ.setValueAtTime(normalizedPosition.z, this.audioContext.currentTime);
@@ -348,17 +367,28 @@ class SpatialAudioManager extends EventManager_1.EventManager {
348
367
  nodes.gain.gain.setValueAtTime(muted ? 0 : 1, this.audioContext.currentTime);
349
368
  }
350
369
  }
370
+ /**
371
+ * Update listener position and orientation
372
+ * The \"listener\" is YOU - where your ears/head are positioned
373
+ *
374
+ * @param position Your HEAD position (camera position), not body position!
375
+ * @param orientation Which way your head is facing (forward and up vectors)
376
+ */
351
377
  setListenerPosition(position, orientation) {
352
378
  const normalizedPosition = this.normalizePositionUnits(position);
353
379
  this.applyListenerTransform(normalizedPosition, orientation);
354
380
  }
355
381
  /**
356
382
  * Update listener orientation from LSD camera direction
357
- * @param cameraPos Camera position in world space
383
+ * IMPORTANT: Uses CAMERA position (head) as listener, not body position!
384
+ *
385
+ * @param listenerPos Player body position (for reference, not used as listener)
386
+ * @param cameraPos Camera/HEAD position - THIS is the actual listener position for audio
358
387
  * @param lookAtPos Look-at position (where camera is pointing)
359
388
  */
360
389
  setListenerFromLSD(listenerPos, cameraPos, lookAtPos) {
361
- const normalizedListener = this.normalizePositionUnits(listenerPos);
390
+ // USE CAMERA POSITION AS LISTENER (head position, not body!)
391
+ const normalizedListener = this.normalizePositionUnits(cameraPos); // ✅ Changed from listenerPos
362
392
  const normalizedCamera = this.normalizePositionUnits(cameraPos);
363
393
  const normalizedLookAt = this.normalizePositionUnits(lookAtPos);
364
394
  // Calculate forward vector (from camera to look-at point)
@@ -719,32 +749,31 @@ class SpatialAudioManager extends EventManager_1.EventManager {
719
749
  /**
720
750
  * Calculate gain based on distance using logarithmic scale
721
751
  * Distance range: 0.5m to 5m
722
- * Gain range: 100% to 0%
752
+ * Gain range: 100% to 20% (never goes to 0 for better audibility)
723
753
  * Uses quadratic equation for human ear perception
724
754
  */
725
755
  calculateLogarithmicGain(distance) {
726
- const minDistance = 0.5; // meters
727
- const maxDistance = 5.0; // meters
756
+ const minDistance = 0.5; // meters - clear voice starts here
757
+ const maxDistance = 10.0; // meters - complete silence beyond this
728
758
  // Clamp distance
729
759
  if (distance <= minDistance)
730
- return 100;
760
+ return 100; // Full volume at 0.5m or closer
731
761
  if (distance >= maxDistance)
732
- return 0;
762
+ return 0; // Complete silence at 10m or beyond
733
763
  // Normalize distance to 0-1 range
734
764
  const normalizedDistance = (distance - minDistance) / (maxDistance - minDistance);
735
765
  // Apply quadratic falloff for natural perception
736
- // gain = 100 * (1 - x²)
737
- // This creates a logarithmic-like curve that sounds linear to human ear
766
+ // gain = 100 * (1 - x²) - gradual fade from 100% to 0%
738
767
  const gain = 100 * Math.pow(1 - normalizedDistance, 2);
739
- return Math.max(0, Math.min(100, gain));
768
+ return Math.max(0, Math.min(100, gain)); // Clamp between 0-100%
740
769
  }
741
770
  /**
742
- * Apply stereo panning to participant audio
743
- * Converts panning percentages to StereoPanner values
771
+ * Apply stereo panning to participant audio using StereoPannerNode
772
+ * This provides STABLE left-right panning without jitter
744
773
  */
745
774
  applyStereoPanning(participantId, panning) {
746
775
  const nodes = this.participantNodes.get(participantId);
747
- if (!nodes?.panner)
776
+ if (!nodes?.stereoPanner)
748
777
  return;
749
778
  // Convert left/right percentages to pan value (-1 to +1)
750
779
  // If left=100, right=0 → pan = -1 (full left)
@@ -757,9 +786,10 @@ class SpatialAudioManager extends EventManager_1.EventManager {
757
786
  if (leftRatio + rightRatio > 0) {
758
787
  panValue = (rightRatio - leftRatio);
759
788
  }
760
- // Adjust X position for left-right panning (-1 = left, +1 = right)
789
+ // Use StereoPannerNode for stable, glitch-free panning
790
+ // This is MUCH more stable than manipulating PannerNode.positionX
761
791
  const currentTime = this.audioContext.currentTime;
762
- nodes.panner.positionX.setTargetAtTime(panValue * 5, currentTime, 0.05);
792
+ nodes.stereoPanner.pan.setTargetAtTime(panValue, currentTime, 0.05);
763
793
  }
764
794
  async ensureDenoiseWorklet() {
765
795
  if (!this.isDenoiserEnabled()) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@newgameplusinc/odyssey-audio-video-sdk-dev",
3
- "version": "1.0.62",
3
+ "version": "1.0.64",
4
4
  "description": "Odyssey Spatial Audio & Video SDK using MediaSoup for real-time communication",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",