@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.61 → 1.0.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -112,6 +112,13 @@ export declare class SpatialAudioManager extends EventManager {
112
112
  * @param muted True to mute, false to unmute
113
113
  */
114
114
  setParticipantMuted(participantId: string, muted: boolean): void;
115
+ /**
116
+ * Update listener position and orientation
117
+ * The \"listener\" is YOU - where your ears/head are positioned
118
+ *
119
+ * @param position Your HEAD position (camera position), not body position!
120
+ * @param orientation Which way your head is facing (forward and up vectors)
121
+ */
115
122
  setListenerPosition(position: Position, orientation: {
116
123
  forwardX: number;
117
124
  forwardY: number;
@@ -122,7 +129,10 @@ export declare class SpatialAudioManager extends EventManager {
122
129
  }): void;
123
130
  /**
124
131
  * Update listener orientation from LSD camera direction
125
- * @param cameraPos Camera position in world space
132
+ * IMPORTANT: Uses CAMERA position (head) as listener, not body position!
133
+ *
134
+ * @param listenerPos Player body position (for reference, not used as listener)
135
+ * @param cameraPos Camera/HEAD position - THIS is the actual listener position for audio
126
136
  * @param lookAtPos Look-at position (where camera is pointing)
127
137
  */
128
138
  setListenerFromLSD(listenerPos: Position, cameraPos: Position, lookAtPos: Position): void;
@@ -137,12 +147,49 @@ export declare class SpatialAudioManager extends EventManager {
137
147
  private normalizePositionUnits;
138
148
  private getVectorFromListener;
139
149
  private applyDirectionalSuppression;
150
+ /**
151
+ * Dynamically adjust highpass filter based on voice characteristics
152
+ * Analyzes audio spectrum and sets filter between 85-300Hz
153
+ */
154
+ private adjustVoiceAdaptiveFilter;
140
155
  private calculateClarityScore;
141
156
  private calculateProximityWeight;
142
157
  private calculateDirectionFocus;
143
158
  private normalizeVector;
144
159
  private clamp;
145
160
  private isDenoiserEnabled;
161
+ /**
162
+ * Calculate angle between listener and sound source in degrees (0-360)
163
+ * 0° = front, 90° = right, 180° = back, 270° = left
164
+ */
165
+ private calculateAngle;
166
+ /**
167
+ * Calculate stereo panning based on angle (0-360°)
168
+ * Returns { left: 0-100, right: 0-100 }
169
+ *
170
+ * Reference angles:
171
+ * 0° (front): L100, R100
172
+ * 45° (front-right): L50, R100
173
+ * 90° (right): L0, R100
174
+ * 135° (back-right): L0, R50
175
+ * 180° (back): L50, R50
176
+ * 225° (back-left): L50, R0
177
+ * 270° (left): L100, R0
178
+ * 315° (front-left): L100, R50
179
+ */
180
+ private calculatePanning;
181
+ /**
182
+ * Calculate gain based on distance using logarithmic scale
183
+ * Distance range: 0.5m to 5m
184
+ * Gain range: 100% to 0%
185
+ * Uses quadratic equation for human ear perception
186
+ */
187
+ private calculateLogarithmicGain;
188
+ /**
189
+ * Apply stereo panning to participant audio
190
+ * Converts panning percentages to StereoPanner values
191
+ */
192
+ private applyStereoPanning;
146
193
  private ensureDenoiseWorklet;
147
194
  private resolveOptions;
148
195
  }
@@ -142,17 +142,32 @@ class SpatialAudioManager extends EventManager_1.EventManager {
142
142
  }
143
143
  // Create BiquadFilter nodes for static/noise reduction
144
144
  // Based on: https://tagdiwalaviral.medium.com/struggles-of-noise-reduction-in-rtc-part-2-2526f8179442
145
+ // HIGHPASS FILTER: Remove low-frequency rumble and plosives (< 80Hz)
146
+ // Human voice fundamental: 80-300Hz, harmonics: 300Hz-8kHz
147
+ // This cuts BELOW voice range while preserving full voice spectrum
148
+ // Lower Q for gentler slope = less phase distortion = fewer onset artifacts
145
149
  const highpassFilter = this.audioContext.createBiquadFilter();
146
150
  highpassFilter.type = "highpass";
147
- highpassFilter.frequency.value = 85; // Conservative value to preserve male voice depth
148
- highpassFilter.Q.value = 1.0; // Quality factor
151
+ highpassFilter.frequency.value = 80; // Cut frequencies below 80Hz (removes rumble/pops)
152
+ highpassFilter.Q.value = 0.707; // Butterworth response (gentler, reduces plosives)
153
+ // LOWPASS FILTER: Remove high-frequency hiss (> 8000Hz)
154
+ // Voice harmonics extend to ~8kHz - this preserves full voice richness
155
+ // while removing digital artifacts and hiss ABOVE useful voice range
149
156
  const lowpassFilter = this.audioContext.createBiquadFilter();
150
157
  lowpassFilter.type = "lowpass";
151
- lowpassFilter.frequency.value = 7500; // Below 8kHz to avoid flat/muffled sound
158
+ lowpassFilter.frequency.value = 8000; // Cut frequencies above 8kHz (preserves voice harmonics)
152
159
  lowpassFilter.Q.value = 1.0; // Quality factor
160
+ // VOICE BAND EMPHASIS: Subtle boost 80-300Hz fundamental range for clarity
161
+ // This emphasizes the base pitch without affecting harmonics
162
+ // Reduced gain to prevent onset artifacts ("mic pop" when speaking starts)
163
+ const voiceBandFilter = this.audioContext.createBiquadFilter();
164
+ voiceBandFilter.type = "peaking";
165
+ voiceBandFilter.frequency.value = 180; // Center of voice fundamental (80-300Hz)
166
+ voiceBandFilter.Q.value = 0.8; // Wider, gentler curve (reduces artifacts)
167
+ voiceBandFilter.gain.value = 1; // +1dB subtle boost (was 2dB - too aggressive)
153
168
  const dynamicLowpass = this.audioContext.createBiquadFilter();
154
169
  dynamicLowpass.type = "lowpass";
155
- dynamicLowpass.frequency.value = 7600;
170
+ dynamicLowpass.frequency.value = 7500; // Fixed for all angles
156
171
  dynamicLowpass.Q.value = 0.8;
157
172
  proximityGain.gain.value = 1.0;
158
173
  // Configure Panner for realistic 3D spatial audio
@@ -167,13 +182,27 @@ class SpatialAudioManager extends EventManager_1.EventManager {
167
182
  panner.coneOuterGain = 0.3; // Some sound even outside cone
168
183
  // Configure gain for individual participant volume control
169
184
  gain.gain.value = 1.0;
185
+ // ADD COMPRESSOR: Prevents sudden peaks and "pops" when speaking starts
186
+ // This is KEY to eliminating onset artifacts
187
+ const participantCompressor = this.audioContext.createDynamicsCompressor();
188
+ participantCompressor.threshold.value = -24; // Start compressing at -24dB
189
+ participantCompressor.knee.value = 10; // Smooth knee for natural sound
190
+ participantCompressor.ratio.value = 3; // 3:1 compression ratio
191
+ participantCompressor.attack.value = 0.003; // 3ms fast attack for transients
192
+ participantCompressor.release.value = 0.15; // 150ms release for natural decay
170
193
  let currentNode = source;
194
+ // First apply compressor to tame initial transients (CRITICAL for preventing pops)
195
+ currentNode.connect(participantCompressor);
196
+ currentNode = participantCompressor;
171
197
  if (denoiseNode) {
172
198
  currentNode.connect(denoiseNode);
173
199
  currentNode = denoiseNode;
174
200
  }
201
+ // Audio chain with voice optimization filters
202
+ // Chain: source -> compressor -> [denoise] -> highpass -> voiceBand -> lowpass -> dynamicLowpass -> proximityGain -> panner -> analyser -> gain -> masterGain
175
203
  currentNode.connect(highpassFilter);
176
- highpassFilter.connect(lowpassFilter);
204
+ highpassFilter.connect(voiceBandFilter);
205
+ voiceBandFilter.connect(lowpassFilter);
177
206
  lowpassFilter.connect(dynamicLowpass);
178
207
  dynamicLowpass.connect(proximityGain);
179
208
  if (bypassSpatialization) {
@@ -194,8 +223,10 @@ class SpatialAudioManager extends EventManager_1.EventManager {
194
223
  analyser,
195
224
  gain,
196
225
  proximityGain,
226
+ compressor: participantCompressor,
197
227
  highpassFilter,
198
228
  lowpassFilter,
229
+ voiceBandFilter,
199
230
  dynamicLowpass,
200
231
  denoiseNode,
201
232
  stream,
@@ -282,35 +313,39 @@ class SpatialAudioManager extends EventManager_1.EventManager {
282
313
  updateSpatialAudio(participantId, position, direction) {
283
314
  const nodes = this.participantNodes.get(participantId);
284
315
  if (nodes?.panner) {
285
- const distanceConfig = this.getDistanceConfig();
286
316
  const normalizedPosition = this.normalizePositionUnits(position);
287
- const targetPosition = this.applySpatialBoostIfNeeded(normalizedPosition);
288
- // Update position (where the sound is coming from)
289
- nodes.panner.positionX.setValueAtTime(targetPosition.x, this.audioContext.currentTime);
290
- nodes.panner.positionY.setValueAtTime(targetPosition.y, this.audioContext.currentTime);
291
- nodes.panner.positionZ.setValueAtTime(targetPosition.z, this.audioContext.currentTime);
292
- // Update orientation (where the participant is facing)
293
- // This makes the audio source directional based on participant's direction
294
- if (direction) {
295
- // Normalize direction vector
296
- const length = Math.sqrt(direction.x * direction.x +
297
- direction.y * direction.y +
298
- direction.z * direction.z);
299
- if (length > 0.001) {
300
- const normX = direction.x / length;
301
- const normY = direction.y / length;
302
- const normZ = direction.z / length;
303
- nodes.panner.orientationX.setValueAtTime(normX, this.audioContext.currentTime);
304
- nodes.panner.orientationY.setValueAtTime(normY, this.audioContext.currentTime);
305
- nodes.panner.orientationZ.setValueAtTime(normZ, this.audioContext.currentTime);
306
- }
317
+ const listenerPos = this.listenerPosition; // This is HEAD position (from setListenerFromLSD)
318
+ // Calculate distance (in meters) - from HEAD to source
319
+ const distance = this.getDistanceBetween(listenerPos, normalizedPosition);
320
+ // Calculate angle between listener HEAD and source
321
+ const angle = this.calculateAngle(listenerPos, normalizedPosition, this.listenerDirection.forward);
322
+ // Calculate stereo panning based on angle
323
+ const panning = this.calculatePanning(angle);
324
+ // Calculate logarithmic gain based on distance
325
+ const gain = this.calculateLogarithmicGain(distance);
326
+ // Apply panning
327
+ this.applyStereoPanning(participantId, panning);
328
+ // Apply gain with smooth transition to reduce clicking/popping
329
+ const gainValue = gain / 100; // Convert to 0-1 range
330
+ // Use exponentialRampToValueAtTime for smoother, more natural transitions
331
+ // This prevents the "pop" when someone starts speaking
332
+ const currentTime = this.audioContext.currentTime;
333
+ const rampTime = 0.08; // 80ms smooth ramp (was 50ms - increased for gentler onset)
334
+ // Ensure we never ramp to exactly 0 (causes issues)
335
+ const targetGain = Math.max(0.001, gainValue);
336
+ nodes.gain.gain.setTargetAtTime(targetGain, currentTime, rampTime);
337
+ nodes.proximityGain.gain.setTargetAtTime(targetGain, currentTime, rampTime);
338
+ // Update 3D position for PannerNode (still used for vertical positioning)
339
+ nodes.panner.positionY.setValueAtTime(normalizedPosition.y, this.audioContext.currentTime);
340
+ nodes.panner.positionZ.setValueAtTime(normalizedPosition.z, this.audioContext.currentTime);
341
+ // Log for debugging (remove in production)
342
+ if (Math.random() < 0.01) { // Log 1% of updates to avoid spam
343
+ console.log(`[Spatial Audio] Participant: ${participantId}`);
344
+ console.log(` Distance: ${distance.toFixed(2)}m`);
345
+ console.log(` Angle: ${angle.toFixed(1)}°`);
346
+ console.log(` Panning: L${panning.left.toFixed(0)}% R${panning.right.toFixed(0)}%`);
347
+ console.log(` Gain: ${gain.toFixed(0)}%`);
307
348
  }
308
- const listenerPos = this.listenerPosition;
309
- const vectorToSource = this.getVectorFromListener(targetPosition);
310
- const distance = this.getDistanceBetween(listenerPos, targetPosition);
311
- this.applyDirectionalSuppression(participantId, distance, vectorToSource);
312
- const distanceGain = this.calculateDistanceGain(distanceConfig, distance);
313
- nodes.gain.gain.setTargetAtTime(distanceGain, this.audioContext.currentTime, 0.05);
314
349
  }
315
350
  }
316
351
  /**
@@ -330,17 +365,28 @@ class SpatialAudioManager extends EventManager_1.EventManager {
330
365
  nodes.gain.gain.setValueAtTime(muted ? 0 : 1, this.audioContext.currentTime);
331
366
  }
332
367
  }
368
+ /**
369
+ * Update listener position and orientation
370
+ * The \"listener\" is YOU - where your ears/head are positioned
371
+ *
372
+ * @param position Your HEAD position (camera position), not body position!
373
+ * @param orientation Which way your head is facing (forward and up vectors)
374
+ */
333
375
  setListenerPosition(position, orientation) {
334
376
  const normalizedPosition = this.normalizePositionUnits(position);
335
377
  this.applyListenerTransform(normalizedPosition, orientation);
336
378
  }
337
379
  /**
338
380
  * Update listener orientation from LSD camera direction
339
- * @param cameraPos Camera position in world space
381
+ * IMPORTANT: Uses CAMERA position (head) as listener, not body position!
382
+ *
383
+ * @param listenerPos Player body position (for reference, not used as listener)
384
+ * @param cameraPos Camera/HEAD position - THIS is the actual listener position for audio
340
385
  * @param lookAtPos Look-at position (where camera is pointing)
341
386
  */
342
387
  setListenerFromLSD(listenerPos, cameraPos, lookAtPos) {
343
- const normalizedListener = this.normalizePositionUnits(listenerPos);
388
+ // USE CAMERA POSITION AS LISTENER (head position, not body!)
389
+ const normalizedListener = this.normalizePositionUnits(cameraPos); // ✅ Changed from listenerPos
344
390
  const normalizedCamera = this.normalizePositionUnits(cameraPos);
345
391
  const normalizedLookAt = this.normalizePositionUnits(lookAtPos);
346
392
  // Calculate forward vector (from camera to look-at point)
@@ -535,9 +581,45 @@ class SpatialAudioManager extends EventManager_1.EventManager {
535
581
  }
536
582
  const clarityScore = this.calculateClarityScore(distance, vectorToSource);
537
583
  const targetGain = 0.48 + clarityScore * 0.72; // 0.48 → 1.20
538
- const targetLowpass = 3600 + clarityScore * 4600; // 3.6kHz → ~8.2kHz
584
+ // Only adjust gain based on angle, not frequency
539
585
  nodes.proximityGain.gain.setTargetAtTime(targetGain, this.audioContext.currentTime, 0.08);
540
- nodes.dynamicLowpass.frequency.setTargetAtTime(targetLowpass, this.audioContext.currentTime, 0.12);
586
+ // Analyze voice and adjust highpass filter dynamically (85-300Hz)
587
+ this.adjustVoiceAdaptiveFilter(participantId);
588
+ }
589
+ /**
590
+ * Dynamically adjust highpass filter based on voice characteristics
591
+ * Analyzes audio spectrum and sets filter between 85-300Hz
592
+ */
593
+ adjustVoiceAdaptiveFilter(participantId) {
594
+ const nodes = this.participantNodes.get(participantId);
595
+ if (!nodes?.analyser) {
596
+ return;
597
+ }
598
+ const bufferLength = nodes.analyser.frequencyBinCount;
599
+ const dataArray = new Uint8Array(bufferLength);
600
+ nodes.analyser.getByteFrequencyData(dataArray);
601
+ // Calculate spectral centroid in low frequency range (0-500Hz)
602
+ const sampleRate = this.audioContext.sampleRate;
603
+ const nyquist = sampleRate / 2;
604
+ const binWidth = nyquist / bufferLength;
605
+ let weightedSum = 0;
606
+ let totalEnergy = 0;
607
+ const maxBin = Math.floor(500 / binWidth); // Only analyze up to 500Hz
608
+ for (let i = 0; i < Math.min(maxBin, bufferLength); i++) {
609
+ const frequency = i * binWidth;
610
+ const magnitude = dataArray[i] / 255.0;
611
+ weightedSum += frequency * magnitude;
612
+ totalEnergy += magnitude;
613
+ }
614
+ if (totalEnergy > 0.01) {
615
+ const centroid = weightedSum / totalEnergy;
616
+ // Map centroid to highpass frequency (85-300Hz)
617
+ // Lower centroid = deeper voice = use lower highpass (preserve bass)
618
+ // Higher centroid = higher voice = use higher highpass (remove mud)
619
+ const targetFreq = Math.max(85, Math.min(300, 85 + (centroid - 100) * 0.5));
620
+ nodes.highpassFilter.frequency.setTargetAtTime(targetFreq, this.audioContext.currentTime, 0.15 // Smooth transition
621
+ );
622
+ }
541
623
  }
542
624
  calculateClarityScore(distance, vectorToSource) {
543
625
  const proximityWeight = this.calculateProximityWeight(distance);
@@ -581,6 +663,132 @@ class SpatialAudioManager extends EventManager_1.EventManager {
581
663
  isDenoiserEnabled() {
582
664
  return this.options.denoiser?.enabled !== false;
583
665
  }
666
+ /**
667
+ * Calculate angle between listener and sound source in degrees (0-360)
668
+ * 0° = front, 90° = right, 180° = back, 270° = left
669
+ */
670
+ calculateAngle(listenerPos, sourcePos, listenerForward) {
671
+ // Vector from listener to source
672
+ const dx = sourcePos.x - listenerPos.x;
673
+ const dy = sourcePos.y - listenerPos.y;
674
+ // Project onto horizontal plane (assuming Z is up)
675
+ // Use listener's forward direction to determine angle
676
+ const forwardX = listenerForward.x;
677
+ const forwardY = listenerForward.y;
678
+ // Calculate angle using atan2
679
+ const angleToSource = Math.atan2(dy, dx);
680
+ const forwardAngle = Math.atan2(forwardY, forwardX);
681
+ // Relative angle in radians
682
+ let relativeAngle = angleToSource - forwardAngle;
683
+ // Normalize to 0-2π
684
+ while (relativeAngle < 0)
685
+ relativeAngle += Math.PI * 2;
686
+ while (relativeAngle >= Math.PI * 2)
687
+ relativeAngle -= Math.PI * 2;
688
+ // Convert to degrees (0-360)
689
+ return (relativeAngle * 180 / Math.PI);
690
+ }
691
+ /**
692
+ * Calculate stereo panning based on angle (0-360°)
693
+ * Returns { left: 0-100, right: 0-100 }
694
+ *
695
+ * Reference angles:
696
+ * 0° (front): L100, R100
697
+ * 45° (front-right): L50, R100
698
+ * 90° (right): L0, R100
699
+ * 135° (back-right): L0, R50
700
+ * 180° (back): L50, R50
701
+ * 225° (back-left): L50, R0
702
+ * 270° (left): L100, R0
703
+ * 315° (front-left): L100, R50
704
+ */
705
+ calculatePanning(angle) {
706
+ // Normalize angle to 0-360
707
+ while (angle < 0)
708
+ angle += 360;
709
+ while (angle >= 360)
710
+ angle -= 360;
711
+ let left = 100;
712
+ let right = 100;
713
+ if (angle <= 90) {
714
+ // Front-right quadrant (0° to 90°)
715
+ // Left decreases from 100 to 0
716
+ // Right stays at 100
717
+ left = 100 * (1 - angle / 90);
718
+ right = 100;
719
+ }
720
+ else if (angle <= 180) {
721
+ // Back-right quadrant (90° to 180°)
722
+ // Left stays at 0
723
+ // Right decreases from 100 to 50
724
+ left = 0;
725
+ right = 100 - 50 * ((angle - 90) / 90);
726
+ }
727
+ else if (angle <= 270) {
728
+ // Back-left quadrant (180° to 270°)
729
+ // Left increases from 0 to 100
730
+ // Right decreases from 50 to 0
731
+ const progress = (angle - 180) / 90;
732
+ left = 50 + 50 * progress;
733
+ right = 50 * (1 - progress);
734
+ }
735
+ else {
736
+ // Front-left quadrant (270° to 360°)
737
+ // Left stays at 100
738
+ // Right increases from 0 to 100
739
+ left = 100;
740
+ right = 100 * ((angle - 270) / 90);
741
+ }
742
+ return {
743
+ left: Math.max(0, Math.min(100, left)),
744
+ right: Math.max(0, Math.min(100, right))
745
+ };
746
+ }
747
+ /**
748
+ * Calculate gain based on distance using logarithmic scale
749
+ * Distance range: 0.5m to 5m
750
+ * Gain range: 100% to 0%
751
+ * Uses quadratic equation for human ear perception
752
+ */
753
+ calculateLogarithmicGain(distance) {
754
+ const minDistance = 0.5; // meters
755
+ const maxDistance = 5.0; // meters
756
+ // Clamp distance
757
+ if (distance <= minDistance)
758
+ return 100;
759
+ if (distance >= maxDistance)
760
+ return 0;
761
+ // Normalize distance to 0-1 range
762
+ const normalizedDistance = (distance - minDistance) / (maxDistance - minDistance);
763
+ // Apply quadratic falloff for natural perception
764
+ // gain = 100 * (1 - x²)
765
+ // This creates a logarithmic-like curve that sounds linear to human ear
766
+ const gain = 100 * Math.pow(1 - normalizedDistance, 2);
767
+ return Math.max(0, Math.min(100, gain));
768
+ }
769
+ /**
770
+ * Apply stereo panning to participant audio
771
+ * Converts panning percentages to StereoPanner values
772
+ */
773
+ applyStereoPanning(participantId, panning) {
774
+ const nodes = this.participantNodes.get(participantId);
775
+ if (!nodes?.panner)
776
+ return;
777
+ // Convert left/right percentages to pan value (-1 to +1)
778
+ // If left=100, right=0 → pan = -1 (full left)
779
+ // If left=0, right=100 → pan = +1 (full right)
780
+ // If left=100, right=100 → pan = 0 (center)
781
+ const leftRatio = panning.left / 100;
782
+ const rightRatio = panning.right / 100;
783
+ // Calculate pan position
784
+ let panValue = 0;
785
+ if (leftRatio + rightRatio > 0) {
786
+ panValue = (rightRatio - leftRatio);
787
+ }
788
+ // Adjust X position for left-right panning (-1 = left, +1 = right)
789
+ const currentTime = this.audioContext.currentTime;
790
+ nodes.panner.positionX.setTargetAtTime(panValue * 5, currentTime, 0.05);
791
+ }
584
792
  async ensureDenoiseWorklet() {
585
793
  if (!this.isDenoiserEnabled()) {
586
794
  return;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@newgameplusinc/odyssey-audio-video-sdk-dev",
3
- "version": "1.0.61",
3
+ "version": "1.0.63",
4
4
  "description": "Odyssey Spatial Audio & Video SDK using MediaSoup for real-time communication",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",