@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.18 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -1
- package/dist/SpatialAudioManager.d.ts +9 -2
- package/dist/SpatialAudioManager.js +170 -27
- package/dist/index.d.ts +2 -1
- package/dist/index.js +2 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -79,7 +79,20 @@ sdk.setListenerFromLSD(listenerPos, cameraPos, lookAtPos);
|
|
|
79
79
|
- **Coordinate normalization** – Unreal sends centimeters; `SpatialAudioManager` auto-detects large values and converts to meters once.
|
|
80
80
|
- **Orientation math** – `setListenerFromLSD()` builds forward/right/up vectors from camera/LookAt to keep the listener aligned with head movement.
|
|
81
81
|
- **Dynamic distance gain** – `updateSpatialAudio()` measures distance from listener → source and applies a smooth rolloff curve, so distant avatars fade to silence.
|
|
82
|
-
- **Noise handling** –
|
|
82
|
+
- **Noise handling** – the AudioWorklet denoiser now runs an adaptive multi-band gate (per W3C AudioWorklet guidance) before the high/low-pass filters, stripping constant HVAC/fan noise even when the speaker is close.
|
|
83
|
+
```ts
|
|
84
|
+
const sdk = new OdysseySpatialComms(serverUrl, {
|
|
85
|
+
denoiser: {
|
|
86
|
+
threshold: 0.0085,
|
|
87
|
+
maxReduction: 0.94,
|
|
88
|
+
hissCut: 0.7,
|
|
89
|
+
holdMs: 180,
|
|
90
|
+
voiceBoost: 0.7,
|
|
91
|
+
voiceSensitivity: 0.3,
|
|
92
|
+
},
|
|
93
|
+
});
|
|
94
|
+
```
|
|
95
|
+
Tweak these knobs if you need even more “AirPods Pro” style isolation.
|
|
83
96
|
|
|
84
97
|
#### How Spatial Audio Is Built
|
|
85
98
|
1. **Telemetry ingestion** – each LSD packet is passed through `setListenerFromLSD(listenerPos, cameraPos, lookAtPos)` so the Web Audio listener matches the player’s real head/camera pose.
|
|
@@ -11,8 +11,16 @@ type DenoiserOptions = {
|
|
|
11
11
|
threshold?: number;
|
|
12
12
|
noiseFloor?: number;
|
|
13
13
|
release?: number;
|
|
14
|
+
attack?: number;
|
|
15
|
+
holdMs?: number;
|
|
16
|
+
maxReduction?: number;
|
|
17
|
+
hissCut?: number;
|
|
18
|
+
expansionRatio?: number;
|
|
19
|
+
learnRate?: number;
|
|
20
|
+
voiceBoost?: number;
|
|
21
|
+
voiceSensitivity?: number;
|
|
14
22
|
};
|
|
15
|
-
type SpatialAudioOptions = {
|
|
23
|
+
export type SpatialAudioOptions = {
|
|
16
24
|
distance?: SpatialAudioDistanceConfig;
|
|
17
25
|
denoiser?: DenoiserOptions;
|
|
18
26
|
};
|
|
@@ -25,7 +33,6 @@ export declare class SpatialAudioManager extends EventManager {
|
|
|
25
33
|
private options;
|
|
26
34
|
private denoiseWorkletReady;
|
|
27
35
|
private denoiseWorkletUrl?;
|
|
28
|
-
private denoiserWasmBytes?;
|
|
29
36
|
private listenerPosition;
|
|
30
37
|
private listenerInitialized;
|
|
31
38
|
private listenerDirection;
|
|
@@ -82,9 +82,14 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
82
82
|
threshold: this.options.denoiser?.threshold,
|
|
83
83
|
noiseFloor: this.options.denoiser?.noiseFloor,
|
|
84
84
|
release: this.options.denoiser?.release,
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
85
|
+
attack: this.options.denoiser?.attack,
|
|
86
|
+
holdMs: this.options.denoiser?.holdMs,
|
|
87
|
+
maxReduction: this.options.denoiser?.maxReduction,
|
|
88
|
+
hissCut: this.options.denoiser?.hissCut,
|
|
89
|
+
expansionRatio: this.options.denoiser?.expansionRatio,
|
|
90
|
+
learnRate: this.options.denoiser?.learnRate,
|
|
91
|
+
voiceBoost: this.options.denoiser?.voiceBoost,
|
|
92
|
+
voiceSensitivity: this.options.denoiser?.voiceSensitivity,
|
|
88
93
|
},
|
|
89
94
|
});
|
|
90
95
|
}
|
|
@@ -577,48 +582,170 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
577
582
|
super();
|
|
578
583
|
const cfg = (options && options.processorOptions) || {};
|
|
579
584
|
this.enabled = cfg.enabled !== false;
|
|
580
|
-
this.threshold =
|
|
581
|
-
this.noiseFloor =
|
|
582
|
-
this.
|
|
583
|
-
this.
|
|
585
|
+
this.threshold = this._sanitize(cfg.threshold, 0.003, 0.05, 0.012);
|
|
586
|
+
this.noiseFloor = this._sanitize(cfg.noiseFloor, 0.0005, 0.05, 0.004);
|
|
587
|
+
this.attack = this._sanitize(cfg.attack, 0.01, 0.9, 0.35);
|
|
588
|
+
this.release = this._sanitize(cfg.release, 0.01, 0.9, 0.18);
|
|
589
|
+
this.holdSamples = Math.max(
|
|
590
|
+
8,
|
|
591
|
+
Math.round(
|
|
592
|
+
sampleRate * this._sanitize(cfg.holdMs, 10, 400, 110) / 1000
|
|
593
|
+
)
|
|
594
|
+
);
|
|
595
|
+
this.maxReduction = this._sanitize(cfg.maxReduction, 0.1, 0.95, 0.85);
|
|
596
|
+
this.hissCut = this._sanitize(cfg.hissCut, 0, 1, 0.45);
|
|
597
|
+
this.expansionRatio = this._sanitize(cfg.expansionRatio, 1.1, 4, 1.8);
|
|
598
|
+
this.learnRate = this._sanitize(cfg.learnRate, 0.001, 0.3, 0.08);
|
|
599
|
+
this.voiceBoost = this._sanitize(cfg.voiceBoost, 0, 1, 0.6);
|
|
600
|
+
this.voiceSensitivity = this._sanitize(cfg.voiceSensitivity, 0.05, 0.9, 0.35);
|
|
601
|
+
this.historySize = 512;
|
|
602
|
+
this.channelState = [];
|
|
603
|
+
this.hfAlpha = Math.exp(-2 * Math.PI * 3200 / sampleRate);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
_sanitize(value, min, max, fallback) {
|
|
607
|
+
if (typeof value !== 'number' || !isFinite(value)) {
|
|
608
|
+
return fallback;
|
|
609
|
+
}
|
|
610
|
+
return Math.min(max, Math.max(min, value));
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
_ensureState(index) {
|
|
614
|
+
if (!this.channelState[index]) {
|
|
615
|
+
this.channelState[index] = {
|
|
616
|
+
envelope: this.noiseFloor,
|
|
617
|
+
noise: this.noiseFloor,
|
|
618
|
+
gain: 1,
|
|
619
|
+
quietSamples: 0,
|
|
620
|
+
lpState: 0,
|
|
621
|
+
history: new Float32Array(this.historySize),
|
|
622
|
+
historyIndex: 0,
|
|
623
|
+
historyFilled: 0,
|
|
624
|
+
tempBuffer: new Float32Array(this.historySize),
|
|
625
|
+
voiceConfidence: 0,
|
|
626
|
+
};
|
|
627
|
+
}
|
|
628
|
+
return this.channelState[index];
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
_pushHistory(state, sample) {
|
|
632
|
+
state.history[state.historyIndex] = sample;
|
|
633
|
+
state.historyIndex = (state.historyIndex + 1) % state.history.length;
|
|
634
|
+
if (state.historyFilled < state.history.length) {
|
|
635
|
+
state.historyFilled++;
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
_updateVoiceConfidence(state) {
|
|
640
|
+
if (state.historyFilled < state.history.length * 0.6) {
|
|
641
|
+
state.voiceConfidence += (0 - state.voiceConfidence) * 0.15;
|
|
642
|
+
return state.voiceConfidence;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
const len = state.history.length;
|
|
646
|
+
let writeIndex = state.historyIndex;
|
|
647
|
+
for (let i = 0; i < len; i++) {
|
|
648
|
+
state.tempBuffer[i] = state.history[writeIndex];
|
|
649
|
+
writeIndex = (writeIndex + 1) % len;
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
const minLag = 30;
|
|
653
|
+
const maxLag = 240;
|
|
654
|
+
let best = 0;
|
|
655
|
+
for (let lag = minLag; lag <= maxLag; lag += 2) {
|
|
656
|
+
let sum = 0;
|
|
657
|
+
let energyA = 0;
|
|
658
|
+
let energyB = 0;
|
|
659
|
+
for (let i = lag; i < len; i++) {
|
|
660
|
+
const a = state.tempBuffer[i];
|
|
661
|
+
const b = state.tempBuffer[i - lag];
|
|
662
|
+
sum += a * b;
|
|
663
|
+
energyA += a * a;
|
|
664
|
+
energyB += b * b;
|
|
665
|
+
}
|
|
666
|
+
const denom = Math.sqrt(energyA * energyB) + 1e-8;
|
|
667
|
+
const corr = Math.abs(sum) / denom;
|
|
668
|
+
if (corr > best) {
|
|
669
|
+
best = corr;
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
const normalized = Math.max(
|
|
674
|
+
0,
|
|
675
|
+
Math.min(1, (best - this.voiceSensitivity) / (1 - this.voiceSensitivity))
|
|
676
|
+
);
|
|
677
|
+
state.voiceConfidence += (normalized - state.voiceConfidence) * 0.2;
|
|
678
|
+
return state.voiceConfidence;
|
|
584
679
|
}
|
|
585
680
|
|
|
586
681
|
process(inputs, outputs) {
|
|
587
682
|
const input = inputs[0];
|
|
588
683
|
const output = outputs[0];
|
|
589
|
-
if (!input ||
|
|
684
|
+
if (!input || !output) {
|
|
590
685
|
return true;
|
|
591
686
|
}
|
|
592
687
|
|
|
593
|
-
for (let channel = 0; channel <
|
|
688
|
+
for (let channel = 0; channel < output.length; channel++) {
|
|
594
689
|
const inChannel = input[channel];
|
|
595
690
|
const outChannel = output[channel];
|
|
596
691
|
if (!inChannel || !outChannel) {
|
|
597
692
|
continue;
|
|
598
693
|
}
|
|
599
694
|
|
|
600
|
-
|
|
695
|
+
if (!this.enabled) {
|
|
696
|
+
for (let i = 0; i < inChannel.length; i++) {
|
|
697
|
+
outChannel[i] = inChannel[i];
|
|
698
|
+
}
|
|
699
|
+
continue;
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
const state = this._ensureState(channel);
|
|
703
|
+
const speechPresence = this.voiceBoost * state.voiceConfidence;
|
|
704
|
+
|
|
601
705
|
for (let i = 0; i < inChannel.length; i++) {
|
|
602
706
|
const sample = inChannel[i];
|
|
603
|
-
|
|
604
|
-
|
|
707
|
+
this._pushHistory(state, sample);
|
|
708
|
+
const magnitude = Math.abs(sample);
|
|
605
709
|
|
|
606
|
-
|
|
607
|
-
this.smoothedLevel += (rms - this.smoothedLevel) * this.release;
|
|
608
|
-
const dynamicThreshold = Math.max(
|
|
609
|
-
this.noiseFloor,
|
|
610
|
-
this.threshold * 0.6 + this.smoothedLevel * 0.4
|
|
611
|
-
);
|
|
710
|
+
state.envelope += (magnitude - state.envelope) * this.attack;
|
|
612
711
|
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
712
|
+
if (state.envelope < this.threshold) {
|
|
713
|
+
state.noise += (state.envelope - state.noise) * this.learnRate;
|
|
714
|
+
state.quietSamples++;
|
|
715
|
+
} else {
|
|
716
|
+
state.quietSamples = 0;
|
|
616
717
|
}
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
718
|
+
|
|
719
|
+
const ratio = state.noise / Math.max(state.envelope, 1e-6);
|
|
720
|
+
let gainTarget = 1 - Math.min(0.98, Math.pow(ratio, this.expansionRatio));
|
|
721
|
+
gainTarget = Math.max(0, Math.min(1, gainTarget));
|
|
722
|
+
|
|
723
|
+
if (state.quietSamples > this.holdSamples) {
|
|
724
|
+
gainTarget *= 1 - this.maxReduction;
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
const reductionFloor = 1 - this.maxReduction * (1 - Math.min(1, speechPresence * 0.85));
|
|
728
|
+
if (gainTarget < reductionFloor) {
|
|
729
|
+
gainTarget = reductionFloor;
|
|
620
730
|
}
|
|
731
|
+
|
|
732
|
+
const dynamicRelease = this.release * (speechPresence > 0.1 ? 0.6 : 1);
|
|
733
|
+
state.gain += (gainTarget - state.gain) * dynamicRelease;
|
|
734
|
+
let processed = sample * state.gain;
|
|
735
|
+
|
|
736
|
+
state.lpState = this.hfAlpha * state.lpState + (1 - this.hfAlpha) * processed;
|
|
737
|
+
const highComponent = processed - state.lpState;
|
|
738
|
+
const hissRatio = Math.min(
|
|
739
|
+
1,
|
|
740
|
+
Math.abs(highComponent) / (Math.abs(state.lpState) + 1e-5)
|
|
741
|
+
);
|
|
742
|
+
const hissGain = 1 - hissRatio * (this.hissCut * (1 - 0.4 * speechPresence));
|
|
743
|
+
processed = state.lpState + highComponent * hissGain;
|
|
744
|
+
|
|
745
|
+
outChannel[i] = processed;
|
|
621
746
|
}
|
|
747
|
+
|
|
748
|
+
this._updateVoiceConfidence(state);
|
|
622
749
|
}
|
|
623
750
|
|
|
624
751
|
return true;
|
|
@@ -652,9 +779,17 @@ registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
|
|
|
652
779
|
};
|
|
653
780
|
const denoiserDefaults = {
|
|
654
781
|
enabled: true,
|
|
655
|
-
threshold: 0.
|
|
656
|
-
noiseFloor: 0.
|
|
657
|
-
release: 0.
|
|
782
|
+
threshold: 0.009,
|
|
783
|
+
noiseFloor: 0.0025,
|
|
784
|
+
release: 0.24,
|
|
785
|
+
attack: 0.25,
|
|
786
|
+
holdMs: 150,
|
|
787
|
+
maxReduction: 0.92,
|
|
788
|
+
hissCut: 0.62,
|
|
789
|
+
expansionRatio: 2.35,
|
|
790
|
+
learnRate: 0.06,
|
|
791
|
+
voiceBoost: 0.6,
|
|
792
|
+
voiceSensitivity: 0.35,
|
|
658
793
|
};
|
|
659
794
|
return {
|
|
660
795
|
distance: {
|
|
@@ -668,6 +803,14 @@ registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
|
|
|
668
803
|
threshold: options?.denoiser?.threshold ?? denoiserDefaults.threshold,
|
|
669
804
|
noiseFloor: options?.denoiser?.noiseFloor ?? denoiserDefaults.noiseFloor,
|
|
670
805
|
release: options?.denoiser?.release ?? denoiserDefaults.release,
|
|
806
|
+
attack: options?.denoiser?.attack ?? denoiserDefaults.attack,
|
|
807
|
+
holdMs: options?.denoiser?.holdMs ?? denoiserDefaults.holdMs,
|
|
808
|
+
maxReduction: options?.denoiser?.maxReduction ?? denoiserDefaults.maxReduction,
|
|
809
|
+
hissCut: options?.denoiser?.hissCut ?? denoiserDefaults.hissCut,
|
|
810
|
+
expansionRatio: options?.denoiser?.expansionRatio ?? denoiserDefaults.expansionRatio,
|
|
811
|
+
learnRate: options?.denoiser?.learnRate ?? denoiserDefaults.learnRate,
|
|
812
|
+
voiceBoost: options?.denoiser?.voiceBoost ?? denoiserDefaults.voiceBoost,
|
|
813
|
+
voiceSensitivity: options?.denoiser?.voiceSensitivity ?? denoiserDefaults.voiceSensitivity,
|
|
671
814
|
},
|
|
672
815
|
};
|
|
673
816
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { EventManager } from "./EventManager";
|
|
2
|
+
import { SpatialAudioOptions } from "./SpatialAudioManager";
|
|
2
3
|
import { Direction, MediaState, OdysseyEvent, Participant, Position, RoomJoinedData } from "./types";
|
|
3
4
|
export declare class OdysseySpatialComms extends EventManager {
|
|
4
5
|
private socket;
|
|
@@ -9,7 +10,7 @@ export declare class OdysseySpatialComms extends EventManager {
|
|
|
9
10
|
private localParticipant;
|
|
10
11
|
private mediasoupManager;
|
|
11
12
|
private spatialAudioManager;
|
|
12
|
-
constructor(serverUrl: string);
|
|
13
|
+
constructor(serverUrl: string, spatialOptions?: SpatialAudioOptions);
|
|
13
14
|
on(event: OdysseyEvent, listener: (...args: any[]) => void): this;
|
|
14
15
|
emit(event: OdysseyEvent, ...args: any[]): boolean;
|
|
15
16
|
joinRoom(data: {
|
package/dist/index.js
CHANGED
|
@@ -6,7 +6,7 @@ const EventManager_1 = require("./EventManager");
|
|
|
6
6
|
const MediasoupManager_1 = require("./MediasoupManager");
|
|
7
7
|
const SpatialAudioManager_1 = require("./SpatialAudioManager");
|
|
8
8
|
class OdysseySpatialComms extends EventManager_1.EventManager {
|
|
9
|
-
constructor(serverUrl) {
|
|
9
|
+
constructor(serverUrl, spatialOptions) {
|
|
10
10
|
super(); // Initialize the EventEmitter base class
|
|
11
11
|
this.room = null;
|
|
12
12
|
this.localParticipant = null;
|
|
@@ -14,7 +14,7 @@ class OdysseySpatialComms extends EventManager_1.EventManager {
|
|
|
14
14
|
transports: ["websocket"],
|
|
15
15
|
});
|
|
16
16
|
this.mediasoupManager = new MediasoupManager_1.MediasoupManager(this.socket);
|
|
17
|
-
this.spatialAudioManager = new SpatialAudioManager_1.SpatialAudioManager();
|
|
17
|
+
this.spatialAudioManager = new SpatialAudioManager_1.SpatialAudioManager(spatialOptions);
|
|
18
18
|
// Set max listeners to prevent warning
|
|
19
19
|
this.setMaxListeners(50);
|
|
20
20
|
this.listenForEvents();
|
package/package.json
CHANGED