@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.24 → 1.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -7
- package/dist/SpatialAudioManager.d.ts +4 -0
- package/dist/SpatialAudioManager.js +65 -21
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -83,6 +83,8 @@ sdk.setListenerFromLSD(listenerPos, cameraPos, lookAtPos);
|
|
|
83
83
|
|
|
84
84
|
#### Noise-Cancellation Stack (What’s Included)
|
|
85
85
|
1. **Adaptive denoiser worklet** – learns each participant’s noise floor in real time, then applies a multi-band downward expander plus dynamic low/high-pass shaping.
|
|
86
|
+
- `speechBoost` lifts the low/mid band only when speech confidence is high, keeping consonants bright without reintroducing floor noise.
|
|
87
|
+
- `highBandGate` + `highBandAttack/Release` clamp constant fan hiss in the 4–12 kHz band whenever speechPresence is low, so background whoosh never leaks through live mics.
|
|
86
88
|
2. **Optional voice enhancement** – autocorrelation-derived confidence (inspired by the tuner article) can raise the reduction floor when speech is present to keep vocals bright.
|
|
87
89
|
3. **Silence gate** – if energy stays below `silenceFloor` for a configurable hold window, the track ramps to true silence, then wakes instantly once voice energy returns.
|
|
88
90
|
4. **Classic filters** – fixed high-pass/low-pass filters shave off rumble and hiss before signals reach the HRTF panner.
|
|
@@ -91,16 +93,20 @@ These layers run entirely in Web Audio, so you can ship “AirPods-style” back
|
|
|
91
93
|
```ts
|
|
92
94
|
const sdk = new OdysseySpatialComms(serverUrl, {
|
|
93
95
|
denoiser: {
|
|
94
|
-
threshold: 0.
|
|
95
|
-
maxReduction: 0.
|
|
96
|
-
hissCut: 0.
|
|
97
|
-
holdMs:
|
|
96
|
+
threshold: 0.008,
|
|
97
|
+
maxReduction: 0.88,
|
|
98
|
+
hissCut: 0.52,
|
|
99
|
+
holdMs: 260,
|
|
98
100
|
voiceBoost: 0.65,
|
|
99
101
|
voiceSensitivity: 0.33,
|
|
100
102
|
voiceEnhancement: true,
|
|
101
|
-
silenceFloor: 0.
|
|
102
|
-
silenceHoldMs:
|
|
103
|
-
silenceReleaseMs:
|
|
103
|
+
silenceFloor: 0.00075,
|
|
104
|
+
silenceHoldMs: 520,
|
|
105
|
+
silenceReleaseMs: 160,
|
|
106
|
+
speechBoost: 0.35,
|
|
107
|
+
highBandGate: 0.7,
|
|
108
|
+
highBandAttack: 0.25,
|
|
109
|
+
highBandRelease: 0.12,
|
|
104
110
|
},
|
|
105
111
|
});
|
|
106
112
|
```
|
|
@@ -23,6 +23,10 @@ type DenoiserOptions = {
|
|
|
23
23
|
silenceFloor?: number;
|
|
24
24
|
silenceHoldMs?: number;
|
|
25
25
|
silenceReleaseMs?: number;
|
|
26
|
+
speechBoost?: number;
|
|
27
|
+
highBandGate?: number;
|
|
28
|
+
highBandAttack?: number;
|
|
29
|
+
highBandRelease?: number;
|
|
26
30
|
};
|
|
27
31
|
export type SpatialAudioOptions = {
|
|
28
32
|
distance?: SpatialAudioDistanceConfig;
|
|
@@ -94,6 +94,10 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
94
94
|
silenceFloor: this.options.denoiser?.silenceFloor,
|
|
95
95
|
silenceHoldMs: this.options.denoiser?.silenceHoldMs,
|
|
96
96
|
silenceReleaseMs: this.options.denoiser?.silenceReleaseMs,
|
|
97
|
+
speechBoost: this.options.denoiser?.speechBoost,
|
|
98
|
+
highBandGate: this.options.denoiser?.highBandGate,
|
|
99
|
+
highBandAttack: this.options.denoiser?.highBandAttack,
|
|
100
|
+
highBandRelease: this.options.denoiser?.highBandRelease,
|
|
97
101
|
},
|
|
98
102
|
});
|
|
99
103
|
}
|
|
@@ -603,8 +607,13 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
603
607
|
this.voiceBoost = this._sanitize(cfg.voiceBoost, 0, 1, 0.6);
|
|
604
608
|
this.voiceSensitivity = this._sanitize(cfg.voiceSensitivity, 0.05, 0.9, 0.35);
|
|
605
609
|
this.voiceEnhancement = cfg.voiceEnhancement === true;
|
|
610
|
+
this.speechBoost = this._sanitize(cfg.speechBoost, 0, 1.5, 0.35);
|
|
611
|
+
this.highBandGate = this._sanitize(cfg.highBandGate, 0, 1, 0.7);
|
|
612
|
+
this.highBandAttack = this._sanitize(cfg.highBandAttack, 0.01, 1, 0.25);
|
|
613
|
+
this.highBandRelease = this._sanitize(cfg.highBandRelease, 0.01, 1, 0.12);
|
|
606
614
|
this.silenceFloor = this._sanitize(cfg.silenceFloor, 0.0002, 0.02, 0.00085);
|
|
607
615
|
this.gateGraceSamples = Math.round(sampleRate * 0.45);
|
|
616
|
+
this.postSpeechHoldSamples = Math.round(sampleRate * 0.35);
|
|
608
617
|
this.silenceHoldSamples = Math.max(
|
|
609
618
|
8,
|
|
610
619
|
Math.round(
|
|
@@ -647,6 +656,9 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
647
656
|
isSilenced: false,
|
|
648
657
|
muteGain: 1,
|
|
649
658
|
graceSamplesRemaining: this.gateGraceSamples,
|
|
659
|
+
postSpeechHold: 0,
|
|
660
|
+
highBandEnv: this.silenceFloor,
|
|
661
|
+
broadbandEnv: this.silenceFloor,
|
|
650
662
|
};
|
|
651
663
|
}
|
|
652
664
|
return this.channelState[index];
|
|
@@ -743,6 +755,13 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
743
755
|
|
|
744
756
|
state.envelope += (magnitude - state.envelope) * this.attack;
|
|
745
757
|
|
|
758
|
+
if (speechPresence > 0.12 || state.envelope > this.threshold * 1.1) {
|
|
759
|
+
state.graceSamplesRemaining = this.gateGraceSamples;
|
|
760
|
+
state.postSpeechHold = this.postSpeechHoldSamples;
|
|
761
|
+
} else if (state.postSpeechHold > 0) {
|
|
762
|
+
state.postSpeechHold--;
|
|
763
|
+
}
|
|
764
|
+
|
|
746
765
|
if (state.envelope < this.threshold) {
|
|
747
766
|
state.noise += (state.envelope - state.noise) * this.learnRate;
|
|
748
767
|
state.quietSamples++;
|
|
@@ -750,14 +769,14 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
750
769
|
state.quietSamples = 0;
|
|
751
770
|
}
|
|
752
771
|
|
|
753
|
-
if (state.graceSamplesRemaining > 0) {
|
|
772
|
+
if (state.graceSamplesRemaining > 0 || state.postSpeechHold > 0) {
|
|
754
773
|
state.graceSamplesRemaining--;
|
|
755
774
|
state.isSilenced = false;
|
|
756
775
|
state.silenceSamples = 0;
|
|
757
776
|
state.silenceReleaseCounter = 0;
|
|
758
777
|
} else {
|
|
759
778
|
const belowFloor = state.envelope < this.silenceFloor;
|
|
760
|
-
if (belowFloor && speechPresence < 0.
|
|
779
|
+
if (belowFloor && speechPresence < 0.15) {
|
|
761
780
|
state.silenceSamples++;
|
|
762
781
|
} else {
|
|
763
782
|
state.silenceSamples = Math.max(0, state.silenceSamples - 3);
|
|
@@ -769,12 +788,14 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
769
788
|
}
|
|
770
789
|
|
|
771
790
|
if (state.isSilenced) {
|
|
772
|
-
const wakeFromEnergy = state.envelope > this.silenceFloor * 1.
|
|
773
|
-
const wakeFromVoice = speechPresence > 0.
|
|
791
|
+
const wakeFromEnergy = state.envelope > this.silenceFloor * 1.2;
|
|
792
|
+
const wakeFromVoice = speechPresence > 0.15;
|
|
774
793
|
if (wakeFromEnergy || wakeFromVoice) {
|
|
775
794
|
state.isSilenced = false;
|
|
776
795
|
state.silenceSamples = 0;
|
|
777
796
|
state.silenceReleaseCounter = 0;
|
|
797
|
+
state.postSpeechHold = this.postSpeechHoldSamples;
|
|
798
|
+
state.graceSamplesRemaining = this.gateGraceSamples;
|
|
778
799
|
} else {
|
|
779
800
|
state.silenceReleaseCounter++;
|
|
780
801
|
if (state.silenceReleaseCounter > this.silenceReleaseSamples) {
|
|
@@ -809,16 +830,31 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
809
830
|
let processed = sample * state.gain;
|
|
810
831
|
|
|
811
832
|
state.lpState = this.hfAlpha * state.lpState + (1 - this.hfAlpha) * processed;
|
|
812
|
-
const
|
|
833
|
+
const lowComponent = state.lpState;
|
|
834
|
+
const highComponent = processed - lowComponent;
|
|
835
|
+
|
|
836
|
+
// Track broadband/high-band envelopes for multi-band gating
|
|
837
|
+
state.broadbandEnv += (Math.abs(processed) - state.broadbandEnv) * 0.12;
|
|
838
|
+
const highEnvDelta = Math.abs(highComponent) - state.highBandEnv;
|
|
839
|
+
const highEnvCoef = highEnvDelta > 0 ? this.highBandAttack : this.highBandRelease;
|
|
840
|
+
state.highBandEnv += highEnvDelta * highEnvCoef;
|
|
841
|
+
|
|
813
842
|
const hissRatio = Math.min(
|
|
814
843
|
1,
|
|
815
|
-
Math.abs(highComponent) / (Math.abs(
|
|
844
|
+
Math.abs(highComponent) / (Math.abs(lowComponent) + 1e-5)
|
|
816
845
|
);
|
|
817
846
|
const hissGain = 1 - hissRatio * (this.hissCut * (1 - 0.4 * speechPresence));
|
|
818
|
-
processed = state.lpState + highComponent * hissGain;
|
|
819
847
|
|
|
820
|
-
const
|
|
821
|
-
|
|
848
|
+
const highEnvRatio = state.highBandEnv / (state.broadbandEnv + 1e-5);
|
|
849
|
+
const gateAmount = this.highBandGate * Math.max(0, highEnvRatio - speechPresence * 0.5);
|
|
850
|
+
const gatedHigh = highComponent * hissGain * (1 - gateAmount);
|
|
851
|
+
|
|
852
|
+
const speechLift = 1 + this.speechBoost * speechPresence;
|
|
853
|
+
processed = lowComponent * speechLift + gatedHigh;
|
|
854
|
+
|
|
855
|
+
const muteTarget = state.isSilenced ? 0.05 : 1;
|
|
856
|
+
const smoothing = state.isSilenced ? 0.12 : 0.42;
|
|
857
|
+
state.muteGain += (muteTarget - state.muteGain) * smoothing;
|
|
822
858
|
processed *= state.muteGain;
|
|
823
859
|
|
|
824
860
|
outChannel[i] = processed;
|
|
@@ -858,21 +894,25 @@ registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
|
|
|
858
894
|
};
|
|
859
895
|
const denoiserDefaults = {
|
|
860
896
|
enabled: true,
|
|
861
|
-
threshold: 0.
|
|
862
|
-
noiseFloor: 0.
|
|
863
|
-
release: 0.
|
|
864
|
-
attack: 0.
|
|
865
|
-
holdMs:
|
|
866
|
-
maxReduction: 0.
|
|
867
|
-
hissCut: 0.
|
|
868
|
-
expansionRatio: 2.
|
|
869
|
-
learnRate: 0.
|
|
897
|
+
threshold: 0.008,
|
|
898
|
+
noiseFloor: 0.002,
|
|
899
|
+
release: 0.22,
|
|
900
|
+
attack: 0.36,
|
|
901
|
+
holdMs: 260,
|
|
902
|
+
maxReduction: 0.88,
|
|
903
|
+
hissCut: 0.52,
|
|
904
|
+
expansionRatio: 2.15,
|
|
905
|
+
learnRate: 0.045,
|
|
870
906
|
voiceBoost: 0.65,
|
|
871
907
|
voiceSensitivity: 0.33,
|
|
872
908
|
voiceEnhancement: false,
|
|
873
|
-
silenceFloor: 0.
|
|
874
|
-
silenceHoldMs:
|
|
875
|
-
silenceReleaseMs:
|
|
909
|
+
silenceFloor: 0.00075,
|
|
910
|
+
silenceHoldMs: 520,
|
|
911
|
+
silenceReleaseMs: 160,
|
|
912
|
+
speechBoost: 0.35,
|
|
913
|
+
highBandGate: 0.7,
|
|
914
|
+
highBandAttack: 0.25,
|
|
915
|
+
highBandRelease: 0.12,
|
|
876
916
|
};
|
|
877
917
|
return {
|
|
878
918
|
distance: {
|
|
@@ -898,6 +938,10 @@ registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
|
|
|
898
938
|
silenceFloor: options?.denoiser?.silenceFloor ?? denoiserDefaults.silenceFloor,
|
|
899
939
|
silenceHoldMs: options?.denoiser?.silenceHoldMs ?? denoiserDefaults.silenceHoldMs,
|
|
900
940
|
silenceReleaseMs: options?.denoiser?.silenceReleaseMs ?? denoiserDefaults.silenceReleaseMs,
|
|
941
|
+
speechBoost: options?.denoiser?.speechBoost ?? denoiserDefaults.speechBoost,
|
|
942
|
+
highBandGate: options?.denoiser?.highBandGate ?? denoiserDefaults.highBandGate,
|
|
943
|
+
highBandAttack: options?.denoiser?.highBandAttack ?? denoiserDefaults.highBandAttack,
|
|
944
|
+
highBandRelease: options?.denoiser?.highBandRelease ?? denoiserDefaults.highBandRelease,
|
|
901
945
|
},
|
|
902
946
|
};
|
|
903
947
|
}
|
package/package.json
CHANGED