@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.12 β†’ 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -13,7 +13,7 @@ It mirrors the production SDK used by Odyssey V2 and ships ready-to-drop into an
13
13
  - 🧭 **Accurate pose propagation** – `updatePosition()` streams listener pose to the SFU while `participant-position-updated` keeps the local store in sync.
14
14
  - 🎧 **Studio-grade spatial audio** – each remote participant gets a dedicated Web Audio graph: denoiser β†’ high-pass β†’ low-pass β†’ HRTF `PannerNode` β†’ adaptive gain β†’ master compressor.
15
15
  - πŸŽ₯ **Camera-ready streams** – video tracks are exposed separately so UI layers can render muted `<video>` tags while audio stays inside Web Audio.
16
- - πŸŽ™οΈ **Clean microphone uplink** – optional `enhanceOutgoingAudioTrack` helper runs mic input through denoiser + EQ + compressor before hitting the SFU.
16
+ - πŸŽ™οΈ **Clean microphone uplink (opt‑in)** – when `outboundTuning.enabled=true`, `enhanceOutgoingAudioTrack` runs mic input through denoiser + EQ + compressor before hitting the SFU.
17
17
  - πŸ” **EventEmitter contract** – subscribe to `room-joined`, `consumer-created`, `participant-position-updated`, etc., without touching Socket.IO directly.
18
18
 
19
19
  ## Quick Start
@@ -81,6 +81,7 @@ sdk.setListenerFromLSD(listenerPos, cameraPos, lookAtPos);
81
81
  - **Orientation math** – `setListenerFromLSD()` builds forward/right/up vectors from camera/LookAt to keep the listener aligned with head movement.
82
82
  - **Dynamic distance gain** – `updateSpatialAudio()` measures distance from listener β†’ source and applies a smooth rolloff curve, so distant avatars fade to silence.
83
83
  - **Noise handling** – optional AudioWorklet denoiser plus high/low-pass filters trim rumble & hiss before HRTF processing.
84
+ - **Dynamic gate (opt-in)** – enable via `noiseGate.enabled=true` to let the SDK automatically clamp remote tracks when they’re idle.
84
85
 
85
86
  #### How Spatial Audio Is Built
86
87
  1. **Telemetry ingestion** – each LSD packet is passed through `setListenerFromLSD(listenerPos, cameraPos, lookAtPos)` so the Web Audio listener matches the player’s real head/camera pose.
@@ -90,11 +91,12 @@ sdk.setListenerFromLSD(listenerPos, cameraPos, lookAtPos);
90
91
  4. **Distance-aware gain** – the manager stores the latest listener pose and computes the Euclidean distance to each remote participant on every update. A custom rolloff curve adjusts gain before the compressor, giving the β€œsomeone on my left / far away” perception without blowing out master levels.
91
92
  5. **Left/right rendering** – because the panner uses `panningModel = "HRTF"`, browsers feed the processed signal into the user’s audio hardware with head-related transfer functions, producing natural interaural time/intensity differences.
92
93
 
93
- #### How Microphone Audio Is Tuned Before Sending
94
+ #### How Microphone Audio Is Tuned Before Sending (Opt-In)
95
+ > Disabled by default. Enable via `new SpatialAudioManager({ outboundTuning: { enabled: true } })`.
94
96
  1. **Hardware constraints first** – the SDK requests `noiseSuppression`, `echoCancellation`, and `autoGainControl` on the raw `MediaStreamTrack` (plus Chromium-specific `goog*` flags).
95
97
  2. **Web Audio pre-flight** – `enhanceOutgoingAudioTrack(track)` clones the mic into a dedicated `AudioContext` and chain: `Denoiser β†’ 50/60β€―Hz notches β†’ Low-shelf rumble cut β†’ High-pass (95β€―Hz) β†’ Low-pass (7.2β€―kHz) β†’ High-shelf tame β†’ Presence boost β†’ Dynamics compressor β†’ Adaptive gate`.
96
98
  3. **Adaptive gate** – a lightweight RMS monitor clamps the gate gain when only background hiss remains, but opens instantly when speech energy rises.
97
- 4. **Clean stream to SFU** – the processed track is what you pass to `produceTrack`, so every participant receives the filtered audio (and your local store uses the same track for mute toggles).
99
+ 4. **Clean stream to SFU** – the processed track is what you pass to `produceTrack`, so every participant receives the filtered audio (and your local store uses the same track for mute toggles). Toggle the feature off to fall back to raw WebRTC audio instantly.
98
100
 
99
101
  ## Video Flow (Capture ↔ Rendering)
100
102
 
@@ -12,9 +12,17 @@ type DenoiserOptions = {
12
12
  noiseFloor?: number;
13
13
  release?: number;
14
14
  };
15
+ type NoiseGateOptions = {
16
+ enabled?: boolean;
17
+ };
18
+ type OutboundTuningOptions = {
19
+ enabled?: boolean;
20
+ };
15
21
  type SpatialAudioOptions = {
16
22
  distance?: SpatialAudioDistanceConfig;
17
23
  denoiser?: DenoiserOptions;
24
+ noiseGate?: NoiseGateOptions;
25
+ outboundTuning?: OutboundTuningOptions;
18
26
  };
19
27
  export declare class SpatialAudioManager extends EventManager {
20
28
  private audioContext;
@@ -31,6 +39,8 @@ export declare class SpatialAudioManager extends EventManager {
31
39
  private stabilityState;
32
40
  private outgoingProcessors;
33
41
  private listenerDirection;
42
+ private noiseGateEnabled;
43
+ private outboundTuningEnabled;
34
44
  constructor(options?: SpatialAudioOptions);
35
45
  getAudioContext(): AudioContext;
36
46
  /**
@@ -104,6 +114,10 @@ export declare class SpatialAudioManager extends EventManager {
104
114
  private applyHardwareNoiseConstraints;
105
115
  private startOutboundMonitor;
106
116
  private cleanupOutboundProcessor;
117
+ private toWebAudioPosition;
118
+ private toWebAudioDirection;
119
+ private convertListenerOrientation;
120
+ private normalizeVector;
107
121
  private ensureDenoiseWorklet;
108
122
  private resolveOptions;
109
123
  }
@@ -17,11 +17,13 @@ class SpatialAudioManager extends EventManager_1.EventManager {
17
17
  up: { x: 0, y: 0, z: 1 },
18
18
  };
19
19
  this.options = this.resolveOptions(options);
20
+ this.noiseGateEnabled = this.options.noiseGate?.enabled ?? false;
21
+ this.outboundTuningEnabled = this.options.outboundTuning?.enabled ?? false;
20
22
  // Use high sample rate for best audio quality
21
23
  this.audioContext = new AudioContext({ sampleRate: 48000 });
22
24
  // Master gain
23
25
  this.masterGainNode = this.audioContext.createGain();
24
- this.masterGainNode.gain.value = 5.0;
26
+ this.masterGainNode.gain.value = 1.8; // Lower headroom to avoid hiss from boosted noise floor
25
27
  // Compressor for dynamic range control and preventing distortion
26
28
  this.compressor = this.audioContext.createDynamicsCompressor();
27
29
  this.compressor.threshold.value = -24; // dB
@@ -126,16 +128,20 @@ class SpatialAudioManager extends EventManager_1.EventManager {
126
128
  }
127
129
  currentNode.connect(highpassFilter);
128
130
  highpassFilter.connect(lowpassFilter);
129
- lowpassFilter.connect(noiseGate);
131
+ let postFilterNode = lowpassFilter;
132
+ if (this.noiseGateEnabled) {
133
+ lowpassFilter.connect(noiseGate);
134
+ postFilterNode = noiseGate;
135
+ }
130
136
  if (bypassSpatialization) {
131
137
  console.log(`πŸ”Š TESTING: Connecting audio directly to destination (bypassing spatial audio) for ${participantId}`);
132
- noiseGate.connect(analyser);
138
+ postFilterNode.connect(analyser);
133
139
  analyser.connect(this.masterGainNode);
134
140
  }
135
141
  else {
136
142
  // Standard spatialized path with full audio chain
137
- // Audio Chain: source -> filters -> noiseGate -> panner -> analyser -> gain -> masterGain -> compressor -> destination
138
- noiseGate.connect(panner);
143
+ // Audio Chain: source -> filters -> (optional gate) -> panner -> analyser -> gain -> masterGain -> compressor -> destination
144
+ postFilterNode.connect(panner);
139
145
  panner.connect(analyser);
140
146
  analyser.connect(gain);
141
147
  gain.connect(this.masterGainNode);
@@ -156,7 +162,7 @@ class SpatialAudioManager extends EventManager_1.EventManager {
156
162
  targetGain: 1,
157
163
  networkMuted: false,
158
164
  });
159
- if (typeof track.onmute !== "undefined") {
165
+ if (this.noiseGateEnabled && typeof track.onmute !== "undefined") {
160
166
  track.onmute = () => this.handleTrackStability(participantId, true);
161
167
  track.onunmute = () => this.handleTrackStability(participantId, false);
162
168
  }
@@ -176,11 +182,13 @@ class SpatialAudioManager extends EventManager_1.EventManager {
176
182
  rolloffFactor: panner.rolloffFactor,
177
183
  },
178
184
  });
179
- // Start monitoring audio levels
180
- this.startMonitoring(participantId);
185
+ // Start monitoring audio levels if gate enabled
186
+ if (this.noiseGateEnabled) {
187
+ this.startMonitoring(participantId);
188
+ }
181
189
  }
182
190
  async enhanceOutgoingAudioTrack(track) {
183
- if (track.kind !== "audio") {
191
+ if (track.kind !== "audio" || !this.outboundTuningEnabled) {
184
192
  return track;
185
193
  }
186
194
  const existingProcessor = Array.from(this.outgoingProcessors.values()).find((processor) => processor.originalTrack === track);
@@ -305,6 +313,9 @@ class SpatialAudioManager extends EventManager_1.EventManager {
305
313
  return processedTrack;
306
314
  }
307
315
  startMonitoring(participantId) {
316
+ if (!this.noiseGateEnabled) {
317
+ return;
318
+ }
308
319
  const nodes = this.participantNodes.get(participantId);
309
320
  if (!nodes)
310
321
  return;
@@ -329,8 +340,9 @@ class SpatialAudioManager extends EventManager_1.EventManager {
329
340
  const smoothing = 0.2;
330
341
  stability.smoothedLevel =
331
342
  stability.smoothedLevel * (1 - smoothing) + normalizedLevel * smoothing;
332
- const gateOpenThreshold = 0.035; // empirical speech/noise split
333
- const gateCloseThreshold = 0.015;
343
+ const gateOpenThreshold = 0.028; // tuned for speech presence
344
+ const gateCloseThreshold = 0.012;
345
+ const noiseFloorGain = 0.12;
334
346
  let targetGain = stability.targetGain;
335
347
  if (stability.networkMuted) {
336
348
  targetGain = 0;
@@ -339,13 +351,13 @@ class SpatialAudioManager extends EventManager_1.EventManager {
339
351
  targetGain = 0;
340
352
  }
341
353
  else if (stability.smoothedLevel < gateOpenThreshold) {
342
- targetGain = 0.35;
354
+ targetGain = noiseFloorGain;
343
355
  }
344
356
  else {
345
357
  targetGain = 1;
346
358
  }
347
359
  if (Math.abs(targetGain - stability.targetGain) > 0.05) {
348
- const ramp = targetGain > stability.targetGain ? 0.03 : 0.12;
360
+ const ramp = targetGain > stability.targetGain ? 0.04 : 0.18;
349
361
  noiseGate.gain.setTargetAtTime(targetGain, this.audioContext.currentTime, ramp);
350
362
  stability.targetGain = targetGain;
351
363
  }
@@ -380,6 +392,9 @@ class SpatialAudioManager extends EventManager_1.EventManager {
380
392
  this.monitoringIntervals.set(participantId, interval);
381
393
  }
382
394
  handleTrackStability(participantId, muted) {
395
+ if (!this.noiseGateEnabled) {
396
+ return;
397
+ }
383
398
  const nodes = this.participantNodes.get(participantId);
384
399
  if (!nodes)
385
400
  return;
@@ -414,7 +429,8 @@ class SpatialAudioManager extends EventManager_1.EventManager {
414
429
  if (nodes?.panner) {
415
430
  const distanceConfig = this.getDistanceConfig();
416
431
  const normalizedPosition = this.normalizePositionUnits(position);
417
- const targetPosition = this.applySpatialBoostIfNeeded(normalizedPosition);
432
+ const webAudioPosition = this.toWebAudioPosition(normalizedPosition);
433
+ const targetPosition = this.applySpatialBoostIfNeeded(webAudioPosition);
418
434
  // Update position (where the sound is coming from)
419
435
  nodes.panner.positionX.setValueAtTime(targetPosition.x, this.audioContext.currentTime);
420
436
  nodes.panner.positionY.setValueAtTime(targetPosition.y, this.audioContext.currentTime);
@@ -422,18 +438,10 @@ class SpatialAudioManager extends EventManager_1.EventManager {
422
438
  // Update orientation (where the participant is facing)
423
439
  // This makes the audio source directional based on participant's direction
424
440
  if (direction) {
425
- // Normalize direction vector
426
- const length = Math.sqrt(direction.x * direction.x +
427
- direction.y * direction.y +
428
- direction.z * direction.z);
429
- if (length > 0.001) {
430
- const normX = direction.x / length;
431
- const normY = direction.y / length;
432
- const normZ = direction.z / length;
433
- nodes.panner.orientationX.setValueAtTime(normX, this.audioContext.currentTime);
434
- nodes.panner.orientationY.setValueAtTime(normY, this.audioContext.currentTime);
435
- nodes.panner.orientationZ.setValueAtTime(normZ, this.audioContext.currentTime);
436
- }
441
+ const convertedDirection = this.toWebAudioDirection(direction);
442
+ nodes.panner.orientationX.setValueAtTime(convertedDirection.x, this.audioContext.currentTime);
443
+ nodes.panner.orientationY.setValueAtTime(convertedDirection.y, this.audioContext.currentTime);
444
+ nodes.panner.orientationZ.setValueAtTime(convertedDirection.z, this.audioContext.currentTime);
437
445
  }
438
446
  const listenerPos = this.listenerPosition;
439
447
  const distance = this.getDistanceBetween(listenerPos, targetPosition);
@@ -513,32 +521,23 @@ class SpatialAudioManager extends EventManager_1.EventManager {
513
521
  if (!listener) {
514
522
  return;
515
523
  }
516
- this.listenerPosition = { ...normalizedPosition };
524
+ const convertedPosition = this.toWebAudioPosition(normalizedPosition);
525
+ const convertedOrientation = this.convertListenerOrientation(orientation);
526
+ this.listenerPosition = { ...convertedPosition };
517
527
  this.listenerInitialized = true;
518
- this.listenerDirection = {
519
- forward: {
520
- x: orientation.forwardX,
521
- y: orientation.forwardY,
522
- z: orientation.forwardZ,
523
- },
524
- up: {
525
- x: orientation.upX,
526
- y: orientation.upY,
527
- z: orientation.upZ,
528
- },
529
- };
528
+ this.listenerDirection = convertedOrientation;
530
529
  if (listener.positionX) {
531
- listener.positionX.setValueAtTime(normalizedPosition.x, this.audioContext.currentTime);
532
- listener.positionY.setValueAtTime(normalizedPosition.y, this.audioContext.currentTime);
533
- listener.positionZ.setValueAtTime(normalizedPosition.z, this.audioContext.currentTime);
530
+ listener.positionX.setValueAtTime(convertedPosition.x, this.audioContext.currentTime);
531
+ listener.positionY.setValueAtTime(convertedPosition.y, this.audioContext.currentTime);
532
+ listener.positionZ.setValueAtTime(convertedPosition.z, this.audioContext.currentTime);
534
533
  }
535
534
  if (listener.forwardX) {
536
- listener.forwardX.setValueAtTime(orientation.forwardX, this.audioContext.currentTime);
537
- listener.forwardY.setValueAtTime(orientation.forwardY, this.audioContext.currentTime);
538
- listener.forwardZ.setValueAtTime(orientation.forwardZ, this.audioContext.currentTime);
539
- listener.upX.setValueAtTime(orientation.upX, this.audioContext.currentTime);
540
- listener.upY.setValueAtTime(orientation.upY, this.audioContext.currentTime);
541
- listener.upZ.setValueAtTime(orientation.upZ, this.audioContext.currentTime);
535
+ listener.forwardX.setValueAtTime(convertedOrientation.forward.x, this.audioContext.currentTime);
536
+ listener.forwardY.setValueAtTime(convertedOrientation.forward.y, this.audioContext.currentTime);
537
+ listener.forwardZ.setValueAtTime(convertedOrientation.forward.z, this.audioContext.currentTime);
538
+ listener.upX.setValueAtTime(convertedOrientation.up.x, this.audioContext.currentTime);
539
+ listener.upY.setValueAtTime(convertedOrientation.up.y, this.audioContext.currentTime);
540
+ listener.upZ.setValueAtTime(convertedOrientation.up.z, this.audioContext.currentTime);
542
541
  }
543
542
  if (Math.random() < 0.01) {
544
543
  console.log(`🎧 [Spatial Audio] Listener updated:`, {
@@ -638,7 +637,7 @@ class SpatialAudioManager extends EventManager_1.EventManager {
638
637
  const normalized = (distance - config.refDistance) /
639
638
  Math.max(config.maxDistance - config.refDistance, 0.001);
640
639
  const shaped = Math.pow(Math.max(0, 1 - normalized), Math.max(1.2, config.rolloffFactor * 1.05));
641
- return Math.min(1, Math.max(0.01, shaped));
640
+ return Math.min(1, Math.max(0.001, shaped));
642
641
  }
643
642
  normalizePositionUnits(position) {
644
643
  const distanceConfig = this.getDistanceConfig();
@@ -729,17 +728,54 @@ class SpatialAudioManager extends EventManager_1.EventManager {
729
728
  clearInterval(processor.monitor);
730
729
  processor.processedTrack.removeEventListener("ended", processor.cleanupListener);
731
730
  processor.originalTrack.removeEventListener("ended", processor.cleanupListener);
732
- try {
733
- processor.originalTrack.stop();
734
- }
735
- catch (error) {
736
- console.warn("⚠️ Unable to stop original track during cleanup", error);
737
- }
738
731
  processor.destinationStream.getTracks().forEach((t) => t.stop());
739
732
  processor.sourceStream.getTracks().forEach((t) => t.stop());
740
733
  processor.context.close();
741
734
  this.outgoingProcessors.delete(processorId);
742
735
  }
736
+ toWebAudioPosition(position) {
737
+ return {
738
+ x: position.y,
739
+ y: position.z,
740
+ z: -position.x,
741
+ };
742
+ }
743
+ toWebAudioDirection(vector) {
744
+ return this.normalizeVector(this.toWebAudioPosition(vector));
745
+ }
746
+ convertListenerOrientation(orientation) {
747
+ const forward = this.toWebAudioDirection({
748
+ x: orientation.forwardX,
749
+ y: orientation.forwardY,
750
+ z: orientation.forwardZ,
751
+ });
752
+ const upRaw = this.toWebAudioDirection({
753
+ x: orientation.upX,
754
+ y: orientation.upY,
755
+ z: orientation.upZ,
756
+ });
757
+ const dot = forward.x * upRaw.x + forward.y * upRaw.y + forward.z * upRaw.z;
758
+ const upOrtho = {
759
+ x: upRaw.x - dot * forward.x,
760
+ y: upRaw.y - dot * forward.y,
761
+ z: upRaw.z - dot * forward.z,
762
+ };
763
+ return {
764
+ forward,
765
+ up: this.normalizeVector(upOrtho, { x: 0, y: 1, z: 0 }),
766
+ };
767
+ }
768
+ normalizeVector(vector, fallback = { x: 0, y: 0, z: -1 }) {
769
+ const length = Math.hypot(vector.x, vector.y, vector.z);
770
+ if (length < 1e-5) {
771
+ return { ...fallback };
772
+ }
773
+ return {
774
+ x: vector.x / length,
775
+ y: vector.y / length,
776
+ z: vector.z / length,
777
+ };
778
+ }
743
779
  async ensureDenoiseWorklet(targetContext = this.audioContext) {
744
780
  if (!this.isDenoiserEnabled()) {
745
781
  return;
@@ -843,6 +879,12 @@ registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
843
879
  noiseFloor: 0.004,
844
880
  release: 0.18,
845
881
  };
882
+ const noiseGateDefaults = {
883
+ enabled: true,
884
+ };
885
+ const outboundDefaults = {
886
+ enabled: false,
887
+ };
846
888
  return {
847
889
  distance: {
848
890
  refDistance: options?.distance?.refDistance ?? distanceDefaults.refDistance,
@@ -856,6 +898,12 @@ registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
856
898
  noiseFloor: options?.denoiser?.noiseFloor ?? denoiserDefaults.noiseFloor,
857
899
  release: options?.denoiser?.release ?? denoiserDefaults.release,
858
900
  },
901
+ noiseGate: {
902
+ enabled: options?.noiseGate?.enabled ?? noiseGateDefaults.enabled,
903
+ },
904
+ outboundTuning: {
905
+ enabled: options?.outboundTuning?.enabled ?? outboundDefaults.enabled,
906
+ },
859
907
  };
860
908
  }
861
909
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@newgameplusinc/odyssey-audio-video-sdk-dev",
3
- "version": "1.0.12",
3
+ "version": "1.0.14",
4
4
  "description": "Odyssey Spatial Audio & Video SDK using MediaSoup for real-time communication",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",