@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.20 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -6
- package/dist/SpatialAudioManager.d.ts +4 -0
- package/dist/SpatialAudioManager.js +93 -19
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -79,20 +79,32 @@ sdk.setListenerFromLSD(listenerPos, cameraPos, lookAtPos);
|
|
|
79
79
|
- **Coordinate normalization** – Unreal sends centimeters; `SpatialAudioManager` auto-detects large values and converts to meters once.
|
|
80
80
|
- **Orientation math** – `setListenerFromLSD()` builds forward/right/up vectors from camera/LookAt to keep the listener aligned with head movement.
|
|
81
81
|
- **Dynamic distance gain** – `updateSpatialAudio()` measures distance from listener → source and applies a smooth rolloff curve, so distant avatars fade to silence.
|
|
82
|
-
- **Noise handling** – the AudioWorklet denoiser now runs an adaptive multi-band gate (per W3C AudioWorklet guidance) before the high/low-pass filters, stripping constant HVAC/fan noise even when the speaker is close.
|
|
82
|
+
- **Noise handling** – the AudioWorklet denoiser now runs an adaptive multi-band gate (per W3C AudioWorklet guidance) before the high/low-pass filters, stripping constant HVAC/fan noise even when the speaker is close. A newly added silence gate mutes tracks entirely after ~250 ms of sub-noise-floor energy, eliminating hiss during dead air without touching spatial cues.
|
|
83
|
+
|
|
84
|
+
#### Noise-Cancellation Stack (What’s Included)
|
|
85
|
+
1. **Adaptive denoiser worklet** – learns each participant’s noise floor in real time, then applies a multi-band downward expander plus dynamic low/high-pass shaping.
|
|
86
|
+
2. **Optional voice enhancement** – autocorrelation-derived confidence (inspired by the tuner article) can raise the reduction floor when speech is present to keep vocals bright.
|
|
87
|
+
3. **Silence gate** – if energy stays below `silenceFloor` for a configurable hold window, the track ramps to true silence, removing hiss when nobody speaks.
|
|
88
|
+
4. **Classic filters** – fixed high-pass/low-pass filters shave off rumble and hiss before signals reach the HRTF panner.
|
|
89
|
+
|
|
90
|
+
These layers run entirely in Web Audio, so you can ship “AirPods-style” background rejection in any browser without native code.
|
|
83
91
|
```ts
|
|
84
92
|
const sdk = new OdysseySpatialComms(serverUrl, {
|
|
85
93
|
denoiser: {
|
|
86
|
-
threshold: 0.
|
|
87
|
-
maxReduction: 0.
|
|
88
|
-
hissCut: 0.
|
|
89
|
-
holdMs:
|
|
94
|
+
threshold: 0.0082,
|
|
95
|
+
maxReduction: 0.85,
|
|
96
|
+
hissCut: 0.5,
|
|
97
|
+
holdMs: 190,
|
|
90
98
|
voiceBoost: 0.7,
|
|
91
99
|
voiceSensitivity: 0.3,
|
|
100
|
+
voiceEnhancement: true,
|
|
101
|
+
silenceFloor: 0.0006,
|
|
102
|
+
silenceHoldMs: 420,
|
|
103
|
+
silenceReleaseMs: 260,
|
|
92
104
|
},
|
|
93
105
|
});
|
|
94
106
|
```
|
|
95
|
-
|
|
107
|
+
Voice enhancement (autocorrelation-based speech detection) is **off by default** to keep the gate extra quiet; enable it when you want brighter close-talk voicing. Pair this with the new, gentler proximity curve to keep far-away participants airy instead of “all in one room.”
|
|
96
108
|
|
|
97
109
|
#### How Spatial Audio Is Built
|
|
98
110
|
1. **Telemetry ingestion** – each LSD packet is passed through `setListenerFromLSD(listenerPos, cameraPos, lookAtPos)` so the Web Audio listener matches the player’s real head/camera pose.
|
|
@@ -19,6 +19,10 @@ type DenoiserOptions = {
|
|
|
19
19
|
learnRate?: number;
|
|
20
20
|
voiceBoost?: number;
|
|
21
21
|
voiceSensitivity?: number;
|
|
22
|
+
voiceEnhancement?: boolean;
|
|
23
|
+
silenceFloor?: number;
|
|
24
|
+
silenceHoldMs?: number;
|
|
25
|
+
silenceReleaseMs?: number;
|
|
22
26
|
};
|
|
23
27
|
export type SpatialAudioOptions = {
|
|
24
28
|
distance?: SpatialAudioDistanceConfig;
|
|
@@ -90,6 +90,10 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
90
90
|
learnRate: this.options.denoiser?.learnRate,
|
|
91
91
|
voiceBoost: this.options.denoiser?.voiceBoost,
|
|
92
92
|
voiceSensitivity: this.options.denoiser?.voiceSensitivity,
|
|
93
|
+
voiceEnhancement: this.options.denoiser?.voiceEnhancement,
|
|
94
|
+
silenceFloor: this.options.denoiser?.silenceFloor,
|
|
95
|
+
silenceHoldMs: this.options.denoiser?.silenceHoldMs,
|
|
96
|
+
silenceReleaseMs: this.options.denoiser?.silenceReleaseMs,
|
|
93
97
|
},
|
|
94
98
|
});
|
|
95
99
|
}
|
|
@@ -506,8 +510,8 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
506
510
|
return;
|
|
507
511
|
}
|
|
508
512
|
const clarityScore = this.calculateClarityScore(distance, vectorToSource);
|
|
509
|
-
const targetGain = 0.
|
|
510
|
-
const targetLowpass =
|
|
513
|
+
const targetGain = 0.48 + clarityScore * 0.72; // 0.48 → 1.20
|
|
514
|
+
const targetLowpass = 3600 + clarityScore * 4600; // 3.6kHz → ~8.2kHz
|
|
511
515
|
nodes.proximityGain.gain.setTargetAtTime(targetGain, this.audioContext.currentTime, 0.08);
|
|
512
516
|
nodes.dynamicLowpass.frequency.setTargetAtTime(targetLowpass, this.audioContext.currentTime, 0.12);
|
|
513
517
|
if (Math.random() < 0.005) {
|
|
@@ -526,8 +530,8 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
526
530
|
return this.clamp(0.2 + proximityWeight * 0.6 + focusWeight * 0.2, 0, 1);
|
|
527
531
|
}
|
|
528
532
|
calculateProximityWeight(distance) {
|
|
529
|
-
const closeRange =
|
|
530
|
-
const fadeRange =
|
|
533
|
+
const closeRange = 0.85;
|
|
534
|
+
const fadeRange = 18;
|
|
531
535
|
if (distance <= closeRange) {
|
|
532
536
|
return 1;
|
|
533
537
|
}
|
|
@@ -598,7 +602,21 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
598
602
|
this.learnRate = this._sanitize(cfg.learnRate, 0.001, 0.3, 0.08);
|
|
599
603
|
this.voiceBoost = this._sanitize(cfg.voiceBoost, 0, 1, 0.6);
|
|
600
604
|
this.voiceSensitivity = this._sanitize(cfg.voiceSensitivity, 0.05, 0.9, 0.35);
|
|
601
|
-
this.
|
|
605
|
+
this.voiceEnhancement = cfg.voiceEnhancement === true;
|
|
606
|
+
this.silenceFloor = this._sanitize(cfg.silenceFloor, 0.0002, 0.02, 0.0012);
|
|
607
|
+
this.silenceHoldSamples = Math.max(
|
|
608
|
+
8,
|
|
609
|
+
Math.round(
|
|
610
|
+
sampleRate * this._sanitize(cfg.silenceHoldMs, 40, 1200, 260) / 1000
|
|
611
|
+
)
|
|
612
|
+
);
|
|
613
|
+
this.silenceReleaseSamples = Math.max(
|
|
614
|
+
8,
|
|
615
|
+
Math.round(
|
|
616
|
+
sampleRate * this._sanitize(cfg.silenceReleaseMs, 30, 800, 140) / 1000
|
|
617
|
+
)
|
|
618
|
+
);
|
|
619
|
+
this.historySize = this.voiceEnhancement ? 512 : 0;
|
|
602
620
|
this.channelState = [];
|
|
603
621
|
this.hfAlpha = Math.exp(-2 * Math.PI * 3200 / sampleRate);
|
|
604
622
|
}
|
|
@@ -618,17 +636,24 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
618
636
|
gain: 1,
|
|
619
637
|
quietSamples: 0,
|
|
620
638
|
lpState: 0,
|
|
621
|
-
history: new Float32Array(this.historySize),
|
|
639
|
+
history: this.voiceEnhancement ? new Float32Array(this.historySize) : null,
|
|
622
640
|
historyIndex: 0,
|
|
623
641
|
historyFilled: 0,
|
|
624
|
-
tempBuffer: new Float32Array(this.historySize),
|
|
642
|
+
tempBuffer: this.voiceEnhancement ? new Float32Array(this.historySize) : null,
|
|
625
643
|
voiceConfidence: 0,
|
|
644
|
+
silenceSamples: 0,
|
|
645
|
+
silenceReleaseCounter: 0,
|
|
646
|
+
isSilenced: false,
|
|
647
|
+
muteGain: 1,
|
|
626
648
|
};
|
|
627
649
|
}
|
|
628
650
|
return this.channelState[index];
|
|
629
651
|
}
|
|
630
652
|
|
|
631
653
|
_pushHistory(state, sample) {
|
|
654
|
+
if (!this.voiceEnhancement || !state.history) {
|
|
655
|
+
return;
|
|
656
|
+
}
|
|
632
657
|
state.history[state.historyIndex] = sample;
|
|
633
658
|
state.historyIndex = (state.historyIndex + 1) % state.history.length;
|
|
634
659
|
if (state.historyFilled < state.history.length) {
|
|
@@ -637,6 +662,11 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
637
662
|
}
|
|
638
663
|
|
|
639
664
|
_updateVoiceConfidence(state) {
|
|
665
|
+
if (!this.voiceEnhancement || !state.history || !state.tempBuffer) {
|
|
666
|
+
state.voiceConfidence += (0 - state.voiceConfidence) * 0.2;
|
|
667
|
+
return state.voiceConfidence;
|
|
668
|
+
}
|
|
669
|
+
|
|
640
670
|
if (state.historyFilled < state.history.length * 0.6) {
|
|
641
671
|
state.voiceConfidence += (0 - state.voiceConfidence) * 0.15;
|
|
642
672
|
return state.voiceConfidence;
|
|
@@ -700,7 +730,9 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
700
730
|
}
|
|
701
731
|
|
|
702
732
|
const state = this._ensureState(channel);
|
|
703
|
-
const speechPresence = this.
|
|
733
|
+
const speechPresence = this.voiceEnhancement
|
|
734
|
+
? this.voiceBoost * state.voiceConfidence
|
|
735
|
+
: 0;
|
|
704
736
|
|
|
705
737
|
for (let i = 0; i < inChannel.length; i++) {
|
|
706
738
|
const sample = inChannel[i];
|
|
@@ -716,6 +748,33 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
716
748
|
state.quietSamples = 0;
|
|
717
749
|
}
|
|
718
750
|
|
|
751
|
+
if (state.envelope < this.silenceFloor && speechPresence < 0.2) {
|
|
752
|
+
state.silenceSamples++;
|
|
753
|
+
} else {
|
|
754
|
+
state.silenceSamples = Math.max(0, state.silenceSamples - 2);
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
if (!state.isSilenced && state.silenceSamples > this.silenceHoldSamples) {
|
|
758
|
+
state.isSilenced = true;
|
|
759
|
+
state.silenceReleaseCounter = 0;
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
if (state.isSilenced) {
|
|
763
|
+
if (
|
|
764
|
+
state.envelope > this.silenceFloor * 1.8 ||
|
|
765
|
+
speechPresence > 0.35
|
|
766
|
+
) {
|
|
767
|
+
state.silenceReleaseCounter++;
|
|
768
|
+
if (state.silenceReleaseCounter > this.silenceReleaseSamples) {
|
|
769
|
+
state.isSilenced = false;
|
|
770
|
+
state.silenceSamples = 0;
|
|
771
|
+
state.silenceReleaseCounter = 0;
|
|
772
|
+
}
|
|
773
|
+
} else {
|
|
774
|
+
state.silenceReleaseCounter = 0;
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
|
|
719
778
|
const ratio = state.noise / Math.max(state.envelope, 1e-6);
|
|
720
779
|
let gainTarget = 1 - Math.min(0.98, Math.pow(ratio, this.expansionRatio));
|
|
721
780
|
gainTarget = Math.max(0, Math.min(1, gainTarget));
|
|
@@ -724,12 +783,15 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
724
783
|
gainTarget *= 1 - this.maxReduction;
|
|
725
784
|
}
|
|
726
785
|
|
|
727
|
-
const reductionFloor =
|
|
786
|
+
const reductionFloor = this.voiceEnhancement
|
|
787
|
+
? 1 - this.maxReduction * (1 - Math.min(1, speechPresence * 0.85))
|
|
788
|
+
: 1 - this.maxReduction;
|
|
728
789
|
if (gainTarget < reductionFloor) {
|
|
729
790
|
gainTarget = reductionFloor;
|
|
730
791
|
}
|
|
731
792
|
|
|
732
|
-
const dynamicRelease = this.release *
|
|
793
|
+
const dynamicRelease = this.release *
|
|
794
|
+
(this.voiceEnhancement && speechPresence > 0.1 ? 0.6 : 1);
|
|
733
795
|
state.gain += (gainTarget - state.gain) * dynamicRelease;
|
|
734
796
|
let processed = sample * state.gain;
|
|
735
797
|
|
|
@@ -742,6 +804,10 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
742
804
|
const hissGain = 1 - hissRatio * (this.hissCut * (1 - 0.4 * speechPresence));
|
|
743
805
|
processed = state.lpState + highComponent * hissGain;
|
|
744
806
|
|
|
807
|
+
const muteTarget = state.isSilenced ? 0 : 1;
|
|
808
|
+
state.muteGain += (muteTarget - state.muteGain) * 0.35;
|
|
809
|
+
processed *= state.muteGain;
|
|
810
|
+
|
|
745
811
|
outChannel[i] = processed;
|
|
746
812
|
}
|
|
747
813
|
|
|
@@ -779,17 +845,21 @@ registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
|
|
|
779
845
|
};
|
|
780
846
|
const denoiserDefaults = {
|
|
781
847
|
enabled: true,
|
|
782
|
-
threshold: 0.
|
|
783
|
-
noiseFloor: 0.
|
|
784
|
-
release: 0.
|
|
785
|
-
attack: 0.
|
|
786
|
-
holdMs:
|
|
787
|
-
maxReduction: 0.
|
|
788
|
-
hissCut: 0.
|
|
789
|
-
expansionRatio: 2.
|
|
790
|
-
learnRate: 0.
|
|
848
|
+
threshold: 0.0082,
|
|
849
|
+
noiseFloor: 0.0022,
|
|
850
|
+
release: 0.28,
|
|
851
|
+
attack: 0.32,
|
|
852
|
+
holdMs: 190,
|
|
853
|
+
maxReduction: 0.85,
|
|
854
|
+
hissCut: 0.5,
|
|
855
|
+
expansionRatio: 2.1,
|
|
856
|
+
learnRate: 0.05,
|
|
791
857
|
voiceBoost: 0.6,
|
|
792
858
|
voiceSensitivity: 0.35,
|
|
859
|
+
voiceEnhancement: false,
|
|
860
|
+
silenceFloor: 0.0006,
|
|
861
|
+
silenceHoldMs: 420,
|
|
862
|
+
silenceReleaseMs: 260,
|
|
793
863
|
};
|
|
794
864
|
return {
|
|
795
865
|
distance: {
|
|
@@ -811,6 +881,10 @@ registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
|
|
|
811
881
|
learnRate: options?.denoiser?.learnRate ?? denoiserDefaults.learnRate,
|
|
812
882
|
voiceBoost: options?.denoiser?.voiceBoost ?? denoiserDefaults.voiceBoost,
|
|
813
883
|
voiceSensitivity: options?.denoiser?.voiceSensitivity ?? denoiserDefaults.voiceSensitivity,
|
|
884
|
+
voiceEnhancement: options?.denoiser?.voiceEnhancement ?? denoiserDefaults.voiceEnhancement,
|
|
885
|
+
silenceFloor: options?.denoiser?.silenceFloor ?? denoiserDefaults.silenceFloor,
|
|
886
|
+
silenceHoldMs: options?.denoiser?.silenceHoldMs ?? denoiserDefaults.silenceHoldMs,
|
|
887
|
+
silenceReleaseMs: options?.denoiser?.silenceReleaseMs ?? denoiserDefaults.silenceReleaseMs,
|
|
814
888
|
},
|
|
815
889
|
};
|
|
816
890
|
}
|
package/package.json
CHANGED