@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.19 → 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -1
- package/dist/SpatialAudioManager.d.ts +7 -1
- package/dist/SpatialAudioManager.js +159 -11
- package/dist/index.d.ts +2 -1
- package/dist/index.js +2 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -79,7 +79,32 @@ sdk.setListenerFromLSD(listenerPos, cameraPos, lookAtPos);
|
|
|
79
79
|
- **Coordinate normalization** – Unreal sends centimeters; `SpatialAudioManager` auto-detects large values and converts to meters once.
|
|
80
80
|
- **Orientation math** – `setListenerFromLSD()` builds forward/right/up vectors from camera/LookAt to keep the listener aligned with head movement.
|
|
81
81
|
- **Dynamic distance gain** – `updateSpatialAudio()` measures distance from listener → source and applies a smooth rolloff curve, so distant avatars fade to silence.
|
|
82
|
-
- **Noise handling** – the AudioWorklet denoiser now runs an adaptive multi-band gate (per W3C AudioWorklet guidance) before the high/low-pass filters, stripping constant HVAC/fan noise even when the speaker is close.
|
|
82
|
+
- **Noise handling** – the AudioWorklet denoiser now runs an adaptive multi-band gate (per W3C AudioWorklet guidance) before the high/low-pass filters, stripping constant HVAC/fan noise even when the speaker is close. A newly added silence gate mutes tracks entirely after ~250 ms of sub-noise-floor energy, eliminating hiss during dead air without touching spatial cues.
|
|
83
|
+
|
|
84
|
+
#### Noise-Cancellation Stack (What’s Included)
|
|
85
|
+
1. **Adaptive denoiser worklet** – learns each participant’s noise floor in real time, then applies a multi-band downward expander plus dynamic low/high-pass shaping.
|
|
86
|
+
2. **Optional voice enhancement** – autocorrelation-derived confidence (inspired by the tuner article) can raise the reduction floor when speech is present to keep vocals bright.
|
|
87
|
+
3. **Silence gate** – if energy stays below `silenceFloor` for a configurable hold window, the track ramps to true silence, removing hiss when nobody speaks.
|
|
88
|
+
4. **Classic filters** – fixed high-pass/low-pass filters shave off rumble and hiss before signals reach the HRTF panner.
|
|
89
|
+
|
|
90
|
+
These layers run entirely in Web Audio, so you can ship “AirPods-style” background rejection in any browser without native code.
|
|
91
|
+
```ts
|
|
92
|
+
const sdk = new OdysseySpatialComms(serverUrl, {
|
|
93
|
+
denoiser: {
|
|
94
|
+
threshold: 0.0085,
|
|
95
|
+
maxReduction: 0.94,
|
|
96
|
+
hissCut: 0.7,
|
|
97
|
+
holdMs: 180,
|
|
98
|
+
voiceBoost: 0.7,
|
|
99
|
+
voiceSensitivity: 0.3,
|
|
100
|
+
voiceEnhancement: true,
|
|
101
|
+
silenceFloor: 0.001,
|
|
102
|
+
silenceHoldMs: 220,
|
|
103
|
+
silenceReleaseMs: 160,
|
|
104
|
+
},
|
|
105
|
+
});
|
|
106
|
+
```
|
|
107
|
+
Voice enhancement (autocorrelation-based speech detection) is **off by default** to keep the gate extra quiet; enable it when you want brighter close-talk voicing. Tweak these knobs if you need even more “AirPods Pro” style isolation.
|
|
83
108
|
|
|
84
109
|
#### How Spatial Audio Is Built
|
|
85
110
|
1. **Telemetry ingestion** – each LSD packet is passed through `setListenerFromLSD(listenerPos, cameraPos, lookAtPos)` so the Web Audio listener matches the player’s real head/camera pose.
|
|
@@ -17,8 +17,14 @@ type DenoiserOptions = {
|
|
|
17
17
|
hissCut?: number;
|
|
18
18
|
expansionRatio?: number;
|
|
19
19
|
learnRate?: number;
|
|
20
|
+
voiceBoost?: number;
|
|
21
|
+
voiceSensitivity?: number;
|
|
22
|
+
voiceEnhancement?: boolean;
|
|
23
|
+
silenceFloor?: number;
|
|
24
|
+
silenceHoldMs?: number;
|
|
25
|
+
silenceReleaseMs?: number;
|
|
20
26
|
};
|
|
21
|
-
type SpatialAudioOptions = {
|
|
27
|
+
export type SpatialAudioOptions = {
|
|
22
28
|
distance?: SpatialAudioDistanceConfig;
|
|
23
29
|
denoiser?: DenoiserOptions;
|
|
24
30
|
};
|
|
@@ -88,6 +88,12 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
88
88
|
hissCut: this.options.denoiser?.hissCut,
|
|
89
89
|
expansionRatio: this.options.denoiser?.expansionRatio,
|
|
90
90
|
learnRate: this.options.denoiser?.learnRate,
|
|
91
|
+
voiceBoost: this.options.denoiser?.voiceBoost,
|
|
92
|
+
voiceSensitivity: this.options.denoiser?.voiceSensitivity,
|
|
93
|
+
voiceEnhancement: this.options.denoiser?.voiceEnhancement,
|
|
94
|
+
silenceFloor: this.options.denoiser?.silenceFloor,
|
|
95
|
+
silenceHoldMs: this.options.denoiser?.silenceHoldMs,
|
|
96
|
+
silenceReleaseMs: this.options.denoiser?.silenceReleaseMs,
|
|
91
97
|
},
|
|
92
98
|
});
|
|
93
99
|
}
|
|
@@ -594,6 +600,23 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
594
600
|
this.hissCut = this._sanitize(cfg.hissCut, 0, 1, 0.45);
|
|
595
601
|
this.expansionRatio = this._sanitize(cfg.expansionRatio, 1.1, 4, 1.8);
|
|
596
602
|
this.learnRate = this._sanitize(cfg.learnRate, 0.001, 0.3, 0.08);
|
|
603
|
+
this.voiceBoost = this._sanitize(cfg.voiceBoost, 0, 1, 0.6);
|
|
604
|
+
this.voiceSensitivity = this._sanitize(cfg.voiceSensitivity, 0.05, 0.9, 0.35);
|
|
605
|
+
this.voiceEnhancement = cfg.voiceEnhancement === true;
|
|
606
|
+
this.silenceFloor = this._sanitize(cfg.silenceFloor, 0.0002, 0.02, 0.0012);
|
|
607
|
+
this.silenceHoldSamples = Math.max(
|
|
608
|
+
8,
|
|
609
|
+
Math.round(
|
|
610
|
+
sampleRate * this._sanitize(cfg.silenceHoldMs, 40, 1200, 260) / 1000
|
|
611
|
+
)
|
|
612
|
+
);
|
|
613
|
+
this.silenceReleaseSamples = Math.max(
|
|
614
|
+
8,
|
|
615
|
+
Math.round(
|
|
616
|
+
sampleRate * this._sanitize(cfg.silenceReleaseMs, 30, 800, 140) / 1000
|
|
617
|
+
)
|
|
618
|
+
);
|
|
619
|
+
this.historySize = this.voiceEnhancement ? 512 : 0;
|
|
597
620
|
this.channelState = [];
|
|
598
621
|
this.hfAlpha = Math.exp(-2 * Math.PI * 3200 / sampleRate);
|
|
599
622
|
}
|
|
@@ -613,11 +636,78 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
613
636
|
gain: 1,
|
|
614
637
|
quietSamples: 0,
|
|
615
638
|
lpState: 0,
|
|
639
|
+
history: this.voiceEnhancement ? new Float32Array(this.historySize) : null,
|
|
640
|
+
historyIndex: 0,
|
|
641
|
+
historyFilled: 0,
|
|
642
|
+
tempBuffer: this.voiceEnhancement ? new Float32Array(this.historySize) : null,
|
|
643
|
+
voiceConfidence: 0,
|
|
644
|
+
silenceSamples: 0,
|
|
645
|
+
silenceReleaseCounter: 0,
|
|
646
|
+
isSilenced: false,
|
|
647
|
+
muteGain: 1,
|
|
616
648
|
};
|
|
617
649
|
}
|
|
618
650
|
return this.channelState[index];
|
|
619
651
|
}
|
|
620
652
|
|
|
653
|
+
_pushHistory(state, sample) {
|
|
654
|
+
if (!this.voiceEnhancement || !state.history) {
|
|
655
|
+
return;
|
|
656
|
+
}
|
|
657
|
+
state.history[state.historyIndex] = sample;
|
|
658
|
+
state.historyIndex = (state.historyIndex + 1) % state.history.length;
|
|
659
|
+
if (state.historyFilled < state.history.length) {
|
|
660
|
+
state.historyFilled++;
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
_updateVoiceConfidence(state) {
|
|
665
|
+
if (!this.voiceEnhancement || !state.history || !state.tempBuffer) {
|
|
666
|
+
state.voiceConfidence += (0 - state.voiceConfidence) * 0.2;
|
|
667
|
+
return state.voiceConfidence;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
if (state.historyFilled < state.history.length * 0.6) {
|
|
671
|
+
state.voiceConfidence += (0 - state.voiceConfidence) * 0.15;
|
|
672
|
+
return state.voiceConfidence;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
const len = state.history.length;
|
|
676
|
+
let writeIndex = state.historyIndex;
|
|
677
|
+
for (let i = 0; i < len; i++) {
|
|
678
|
+
state.tempBuffer[i] = state.history[writeIndex];
|
|
679
|
+
writeIndex = (writeIndex + 1) % len;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
const minLag = 30;
|
|
683
|
+
const maxLag = 240;
|
|
684
|
+
let best = 0;
|
|
685
|
+
for (let lag = minLag; lag <= maxLag; lag += 2) {
|
|
686
|
+
let sum = 0;
|
|
687
|
+
let energyA = 0;
|
|
688
|
+
let energyB = 0;
|
|
689
|
+
for (let i = lag; i < len; i++) {
|
|
690
|
+
const a = state.tempBuffer[i];
|
|
691
|
+
const b = state.tempBuffer[i - lag];
|
|
692
|
+
sum += a * b;
|
|
693
|
+
energyA += a * a;
|
|
694
|
+
energyB += b * b;
|
|
695
|
+
}
|
|
696
|
+
const denom = Math.sqrt(energyA * energyB) + 1e-8;
|
|
697
|
+
const corr = Math.abs(sum) / denom;
|
|
698
|
+
if (corr > best) {
|
|
699
|
+
best = corr;
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
const normalized = Math.max(
|
|
704
|
+
0,
|
|
705
|
+
Math.min(1, (best - this.voiceSensitivity) / (1 - this.voiceSensitivity))
|
|
706
|
+
);
|
|
707
|
+
state.voiceConfidence += (normalized - state.voiceConfidence) * 0.2;
|
|
708
|
+
return state.voiceConfidence;
|
|
709
|
+
}
|
|
710
|
+
|
|
621
711
|
process(inputs, outputs) {
|
|
622
712
|
const input = inputs[0];
|
|
623
713
|
const output = outputs[0];
|
|
@@ -640,9 +730,13 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
640
730
|
}
|
|
641
731
|
|
|
642
732
|
const state = this._ensureState(channel);
|
|
733
|
+
const speechPresence = this.voiceEnhancement
|
|
734
|
+
? this.voiceBoost * state.voiceConfidence
|
|
735
|
+
: 0;
|
|
643
736
|
|
|
644
737
|
for (let i = 0; i < inChannel.length; i++) {
|
|
645
738
|
const sample = inChannel[i];
|
|
739
|
+
this._pushHistory(state, sample);
|
|
646
740
|
const magnitude = Math.abs(sample);
|
|
647
741
|
|
|
648
742
|
state.envelope += (magnitude - state.envelope) * this.attack;
|
|
@@ -654,6 +748,33 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
654
748
|
state.quietSamples = 0;
|
|
655
749
|
}
|
|
656
750
|
|
|
751
|
+
if (state.envelope < this.silenceFloor && speechPresence < 0.2) {
|
|
752
|
+
state.silenceSamples++;
|
|
753
|
+
} else {
|
|
754
|
+
state.silenceSamples = Math.max(0, state.silenceSamples - 2);
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
if (!state.isSilenced && state.silenceSamples > this.silenceHoldSamples) {
|
|
758
|
+
state.isSilenced = true;
|
|
759
|
+
state.silenceReleaseCounter = 0;
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
if (state.isSilenced) {
|
|
763
|
+
if (
|
|
764
|
+
state.envelope > this.silenceFloor * 1.8 ||
|
|
765
|
+
speechPresence > 0.35
|
|
766
|
+
) {
|
|
767
|
+
state.silenceReleaseCounter++;
|
|
768
|
+
if (state.silenceReleaseCounter > this.silenceReleaseSamples) {
|
|
769
|
+
state.isSilenced = false;
|
|
770
|
+
state.silenceSamples = 0;
|
|
771
|
+
state.silenceReleaseCounter = 0;
|
|
772
|
+
}
|
|
773
|
+
} else {
|
|
774
|
+
state.silenceReleaseCounter = 0;
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
|
|
657
778
|
const ratio = state.noise / Math.max(state.envelope, 1e-6);
|
|
658
779
|
let gainTarget = 1 - Math.min(0.98, Math.pow(ratio, this.expansionRatio));
|
|
659
780
|
gainTarget = Math.max(0, Math.min(1, gainTarget));
|
|
@@ -662,7 +783,16 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
662
783
|
gainTarget *= 1 - this.maxReduction;
|
|
663
784
|
}
|
|
664
785
|
|
|
665
|
-
|
|
786
|
+
const reductionFloor = this.voiceEnhancement
|
|
787
|
+
? 1 - this.maxReduction * (1 - Math.min(1, speechPresence * 0.85))
|
|
788
|
+
: 1 - this.maxReduction;
|
|
789
|
+
if (gainTarget < reductionFloor) {
|
|
790
|
+
gainTarget = reductionFloor;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
const dynamicRelease = this.release *
|
|
794
|
+
(this.voiceEnhancement && speechPresence > 0.1 ? 0.6 : 1);
|
|
795
|
+
state.gain += (gainTarget - state.gain) * dynamicRelease;
|
|
666
796
|
let processed = sample * state.gain;
|
|
667
797
|
|
|
668
798
|
state.lpState = this.hfAlpha * state.lpState + (1 - this.hfAlpha) * processed;
|
|
@@ -671,11 +801,17 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
671
801
|
1,
|
|
672
802
|
Math.abs(highComponent) / (Math.abs(state.lpState) + 1e-5)
|
|
673
803
|
);
|
|
674
|
-
const hissGain = 1 - hissRatio * this.hissCut;
|
|
804
|
+
const hissGain = 1 - hissRatio * (this.hissCut * (1 - 0.4 * speechPresence));
|
|
675
805
|
processed = state.lpState + highComponent * hissGain;
|
|
676
806
|
|
|
807
|
+
const muteTarget = state.isSilenced ? 0 : 1;
|
|
808
|
+
state.muteGain += (muteTarget - state.muteGain) * 0.35;
|
|
809
|
+
processed *= state.muteGain;
|
|
810
|
+
|
|
677
811
|
outChannel[i] = processed;
|
|
678
812
|
}
|
|
813
|
+
|
|
814
|
+
this._updateVoiceConfidence(state);
|
|
679
815
|
}
|
|
680
816
|
|
|
681
817
|
return true;
|
|
@@ -709,15 +845,21 @@ registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
|
|
|
709
845
|
};
|
|
710
846
|
const denoiserDefaults = {
|
|
711
847
|
enabled: true,
|
|
712
|
-
threshold: 0.
|
|
713
|
-
noiseFloor: 0.
|
|
714
|
-
release: 0.
|
|
715
|
-
attack: 0.
|
|
716
|
-
holdMs:
|
|
717
|
-
maxReduction: 0.
|
|
718
|
-
hissCut: 0.
|
|
719
|
-
expansionRatio:
|
|
720
|
-
learnRate: 0.
|
|
848
|
+
threshold: 0.009,
|
|
849
|
+
noiseFloor: 0.0025,
|
|
850
|
+
release: 0.24,
|
|
851
|
+
attack: 0.25,
|
|
852
|
+
holdMs: 150,
|
|
853
|
+
maxReduction: 0.92,
|
|
854
|
+
hissCut: 0.62,
|
|
855
|
+
expansionRatio: 2.35,
|
|
856
|
+
learnRate: 0.06,
|
|
857
|
+
voiceBoost: 0.6,
|
|
858
|
+
voiceSensitivity: 0.35,
|
|
859
|
+
voiceEnhancement: false,
|
|
860
|
+
silenceFloor: 0.0012,
|
|
861
|
+
silenceHoldMs: 260,
|
|
862
|
+
silenceReleaseMs: 140,
|
|
721
863
|
};
|
|
722
864
|
return {
|
|
723
865
|
distance: {
|
|
@@ -737,6 +879,12 @@ registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
|
|
|
737
879
|
hissCut: options?.denoiser?.hissCut ?? denoiserDefaults.hissCut,
|
|
738
880
|
expansionRatio: options?.denoiser?.expansionRatio ?? denoiserDefaults.expansionRatio,
|
|
739
881
|
learnRate: options?.denoiser?.learnRate ?? denoiserDefaults.learnRate,
|
|
882
|
+
voiceBoost: options?.denoiser?.voiceBoost ?? denoiserDefaults.voiceBoost,
|
|
883
|
+
voiceSensitivity: options?.denoiser?.voiceSensitivity ?? denoiserDefaults.voiceSensitivity,
|
|
884
|
+
voiceEnhancement: options?.denoiser?.voiceEnhancement ?? denoiserDefaults.voiceEnhancement,
|
|
885
|
+
silenceFloor: options?.denoiser?.silenceFloor ?? denoiserDefaults.silenceFloor,
|
|
886
|
+
silenceHoldMs: options?.denoiser?.silenceHoldMs ?? denoiserDefaults.silenceHoldMs,
|
|
887
|
+
silenceReleaseMs: options?.denoiser?.silenceReleaseMs ?? denoiserDefaults.silenceReleaseMs,
|
|
740
888
|
},
|
|
741
889
|
};
|
|
742
890
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { EventManager } from "./EventManager";
|
|
2
|
+
import { SpatialAudioOptions } from "./SpatialAudioManager";
|
|
2
3
|
import { Direction, MediaState, OdysseyEvent, Participant, Position, RoomJoinedData } from "./types";
|
|
3
4
|
export declare class OdysseySpatialComms extends EventManager {
|
|
4
5
|
private socket;
|
|
@@ -9,7 +10,7 @@ export declare class OdysseySpatialComms extends EventManager {
|
|
|
9
10
|
private localParticipant;
|
|
10
11
|
private mediasoupManager;
|
|
11
12
|
private spatialAudioManager;
|
|
12
|
-
constructor(serverUrl: string);
|
|
13
|
+
constructor(serverUrl: string, spatialOptions?: SpatialAudioOptions);
|
|
13
14
|
on(event: OdysseyEvent, listener: (...args: any[]) => void): this;
|
|
14
15
|
emit(event: OdysseyEvent, ...args: any[]): boolean;
|
|
15
16
|
joinRoom(data: {
|
package/dist/index.js
CHANGED
|
@@ -6,7 +6,7 @@ const EventManager_1 = require("./EventManager");
|
|
|
6
6
|
const MediasoupManager_1 = require("./MediasoupManager");
|
|
7
7
|
const SpatialAudioManager_1 = require("./SpatialAudioManager");
|
|
8
8
|
class OdysseySpatialComms extends EventManager_1.EventManager {
|
|
9
|
-
constructor(serverUrl) {
|
|
9
|
+
constructor(serverUrl, spatialOptions) {
|
|
10
10
|
super(); // Initialize the EventEmitter base class
|
|
11
11
|
this.room = null;
|
|
12
12
|
this.localParticipant = null;
|
|
@@ -14,7 +14,7 @@ class OdysseySpatialComms extends EventManager_1.EventManager {
|
|
|
14
14
|
transports: ["websocket"],
|
|
15
15
|
});
|
|
16
16
|
this.mediasoupManager = new MediasoupManager_1.MediasoupManager(this.socket);
|
|
17
|
-
this.spatialAudioManager = new SpatialAudioManager_1.SpatialAudioManager();
|
|
17
|
+
this.spatialAudioManager = new SpatialAudioManager_1.SpatialAudioManager(spatialOptions);
|
|
18
18
|
// Set max listeners to prevent warning
|
|
19
19
|
this.setMaxListeners(50);
|
|
20
20
|
this.listenForEvents();
|
package/package.json
CHANGED