@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.258 → 1.0.260

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/README.md +45 -1
  2. package/dist/audio/AudioNodeFactory.d.ts +130 -0
  3. package/dist/audio/AudioNodeFactory.js +158 -0
  4. package/dist/audio/AudioPipeline.d.ts +89 -0
  5. package/dist/audio/AudioPipeline.js +138 -0
  6. package/dist/{MLNoiseSuppressor.d.ts → audio/MLNoiseSuppressor.d.ts} +7 -7
  7. package/dist/{MLNoiseSuppressor.js → audio/MLNoiseSuppressor.js} +19 -52
  8. package/dist/audio/index.d.ts +6 -0
  9. package/dist/audio/index.js +22 -0
  10. package/dist/channels/huddle/HuddleChannel.d.ts +87 -0
  11. package/dist/channels/huddle/HuddleChannel.js +152 -0
  12. package/dist/channels/huddle/HuddleTypes.d.ts +85 -0
  13. package/dist/channels/huddle/HuddleTypes.js +25 -0
  14. package/dist/channels/huddle/index.d.ts +5 -0
  15. package/dist/channels/huddle/index.js +21 -0
  16. package/dist/channels/index.d.ts +5 -0
  17. package/dist/channels/index.js +21 -0
  18. package/dist/channels/spatial/SpatialAudioChannel.d.ts +144 -0
  19. package/dist/channels/spatial/SpatialAudioChannel.js +476 -0
  20. package/dist/channels/spatial/SpatialAudioTypes.d.ts +85 -0
  21. package/dist/channels/spatial/SpatialAudioTypes.js +42 -0
  22. package/dist/channels/spatial/index.d.ts +5 -0
  23. package/dist/channels/spatial/index.js +21 -0
  24. package/dist/{EventManager.d.ts → core/EventManager.d.ts} +4 -2
  25. package/dist/{EventManager.js → core/EventManager.js} +5 -3
  26. package/dist/{MediasoupManager.d.ts → core/MediasoupManager.d.ts} +10 -4
  27. package/dist/{MediasoupManager.js → core/MediasoupManager.js} +56 -44
  28. package/dist/core/index.d.ts +5 -0
  29. package/dist/core/index.js +21 -0
  30. package/dist/index.d.ts +2 -2
  31. package/dist/index.js +43 -9
  32. package/dist/sdk/index.d.ts +36 -0
  33. package/dist/sdk/index.js +121 -0
  34. package/dist/types/events.d.ts +154 -0
  35. package/dist/{types.js → types/events.js} +3 -0
  36. package/dist/types/index.d.ts +7 -0
  37. package/dist/types/index.js +23 -0
  38. package/dist/types/participant.d.ts +65 -0
  39. package/dist/types/participant.js +5 -0
  40. package/dist/types/position.d.ts +47 -0
  41. package/dist/types/position.js +9 -0
  42. package/dist/types/room.d.ts +82 -0
  43. package/dist/types/room.js +5 -0
  44. package/dist/utils/audio/clarity-score.d.ts +33 -0
  45. package/dist/utils/audio/clarity-score.js +81 -0
  46. package/dist/utils/audio/index.d.ts +5 -0
  47. package/dist/utils/audio/index.js +21 -0
  48. package/dist/utils/audio/voice-filter.d.ts +30 -0
  49. package/dist/utils/audio/voice-filter.js +70 -0
  50. package/dist/utils/index.d.ts +7 -0
  51. package/dist/utils/index.js +23 -0
  52. package/dist/utils/position/coordinates.d.ts +37 -0
  53. package/dist/utils/position/coordinates.js +61 -0
  54. package/dist/utils/position/index.d.ts +6 -0
  55. package/dist/utils/position/index.js +22 -0
  56. package/dist/utils/position/normalize.d.ts +37 -0
  57. package/dist/utils/position/normalize.js +78 -0
  58. package/dist/utils/position/snap.d.ts +51 -0
  59. package/dist/utils/position/snap.js +81 -0
  60. package/dist/utils/smoothing/gain-smoothing.d.ts +45 -0
  61. package/dist/utils/smoothing/gain-smoothing.js +77 -0
  62. package/dist/utils/smoothing/index.d.ts +5 -0
  63. package/dist/utils/smoothing/index.js +21 -0
  64. package/dist/utils/smoothing/pan-smoothing.d.ts +43 -0
  65. package/dist/utils/smoothing/pan-smoothing.js +85 -0
  66. package/dist/utils/spatial/angle-calc.d.ts +24 -0
  67. package/dist/utils/spatial/angle-calc.js +69 -0
  68. package/dist/utils/spatial/distance-calc.d.ts +33 -0
  69. package/dist/utils/spatial/distance-calc.js +48 -0
  70. package/dist/utils/spatial/gain-calc.d.ts +37 -0
  71. package/dist/utils/spatial/gain-calc.js +52 -0
  72. package/dist/utils/spatial/head-position.d.ts +32 -0
  73. package/dist/utils/spatial/head-position.js +76 -0
  74. package/dist/utils/spatial/index.d.ts +9 -0
  75. package/dist/utils/spatial/index.js +25 -0
  76. package/dist/utils/spatial/listener-calc.d.ts +28 -0
  77. package/dist/utils/spatial/listener-calc.js +74 -0
  78. package/dist/utils/spatial/pan-calc.d.ts +48 -0
  79. package/dist/utils/spatial/pan-calc.js +80 -0
  80. package/package.json +1 -1
  81. package/dist/SpatialAudioManager.d.ts +0 -271
  82. package/dist/SpatialAudioManager.js +0 -1512
  83. package/dist/types.d.ts +0 -73
@@ -1,1512 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.SpatialAudioManager = void 0;
4
- const EventManager_1 = require("./EventManager");
5
- const MLNoiseSuppressor_1 = require("./MLNoiseSuppressor");
6
- class SpatialAudioManager extends EventManager_1.EventManager {
7
- constructor(options) {
8
- super();
9
- this.participantNodes = new Map();
10
- this.monitoringIntervals = new Map();
11
- this.denoiseWorkletReady = null;
12
- this.listenerPosition = { x: 0, y: 0, z: 0 };
13
- this.listenerInitialized = false;
14
- this.listenerDirection = {
15
- forward: { x: 0, y: 1, z: 0 },
16
- up: { x: 0, y: 0, z: 1 },
17
- };
18
- // Unit right-vector derived from listener yaw; used to project sources into listener space.
19
- this.listenerRight = { x: 1, z: 0 };
20
- this.localParticipantId = ''; // Local participant ID (listener)
21
- // MASTER MUTE: Mutes all incoming audio (used before "Join Space" is clicked)
22
- this.isMasterMuted = false;
23
- // PAN SMOOTHING: Prevents random left/right jumping
24
- // Stores the previous smoothed pan value for each participant
25
- this.smoothedPanValues = new Map();
26
- // PAN SMOOTHING: Tracks last applied pan value to skip micro-changes
27
- this.lastPanValue = new Map();
28
- // UPDATE THROTTLE: Tracks last spatial update time to prevent too-frequent updates
29
- this.lastSpatialUpdateTime = new Map();
30
- // DEBUG: Throttle spatial audio debug logs
31
- this._spatialDebugTimes = new Map();
32
- // Smoothing factor: 0.0 = no smoothing (instant), 1.0 = infinite smoothing (never changes)
33
- // 0.35 provides good stability without feeling sluggish
34
- this.panSmoothingFactor = 0.35;
35
- // Minimum pan change threshold - changes smaller than this are ignored to prevent jitter
36
- // 0.02 = 2% of pan range must change to trigger update (filters network jitter)
37
- this.panChangeThreshold = 0.02;
38
- // Dead-zone around center (0) to prevent oscillation when nearly face-to-face
39
- this.panCenterDeadZone = 0.03;
40
- // POSITION SNAPPING: Prevents jitter from micro-movements when "stationary"
41
- // Caches last known "snapped" position for each speaker
42
- this.cachedSpeakerPositions = new Map();
43
- // Cache for last snapped listener position
44
- this.cachedListenerPosition = { x: 0, y: 0, z: 0 };
45
- // Track if listener position has been set at least once
46
- this.listenerPositionInitialized = false;
47
- // Minimum position change (in meters) to trigger recalculation
48
- // 0.30m = 30cm - ignores pixel streaming jitter, physics wobble, breathing
49
- this.positionSnapThreshold = 0.30;
50
- // NOTE: Rate limiting variables removed - setTargetAtTime provides sufficient smoothing
51
- // The smoothPanValue() and position snapping handle jitter reduction
52
- // ML Noise Suppressor (TensorFlow.js-based)
53
- this.mlSuppressor = null;
54
- // NOTE: We currently LOAD the TFJS model, but we do not yet run it inside the
55
- // real-time WebAudio graph (AudioWorklet). This flag only indicates the model
56
- // is ready to be used.
57
- this.mlModelReady = false;
58
- this.options = this.resolveOptions(options);
59
- // Use high sample rate for best audio quality
60
- this.audioContext = new AudioContext({ sampleRate: 48000 });
61
- // Master gain - REDUCED to prevent echo/feedback
62
- this.masterGainNode = this.audioContext.createGain();
63
- this.masterGainNode.gain.value = 1.0; // Unity gain for crystal clear audio (was 2.5)
64
- // Compressor for dynamic range control - VERY GENTLE to prevent pumping/echo
65
- this.compressor = this.audioContext.createDynamicsCompressor();
66
- this.compressor.threshold.value = -15; // Even higher threshold = less compression
67
- this.compressor.knee.value = 40; // Soft knee for natural sound
68
- this.compressor.ratio.value = 2.5; // Very gentle ratio (was 3)
69
- this.compressor.attack.value = 0.02; // 20ms attack - very gentle
70
- this.compressor.release.value = 0.25; // 250ms release - smooth recovery
71
- // Connect master chain: masterGain -> compressor -> destination
72
- this.masterGainNode.connect(this.compressor);
73
- this.compressor.connect(this.audioContext.destination);
74
- }
75
- getAudioContext() {
76
- return this.audioContext;
77
- }
78
- /**
79
- * Initialize ML-based noise suppression (TensorFlow.js)
80
- * Falls back to AudioWorklet denoiser if ML initialization fails
81
- */
82
- async initializeMLNoiseSuppression(modelUrl) {
83
- try {
84
- this.mlSuppressor = new MLNoiseSuppressor_1.MLNoiseSuppressor();
85
- await this.mlSuppressor.initialize(modelUrl);
86
- if (this.mlSuppressor.isReady()) {
87
- this.mlModelReady = true;
88
- }
89
- else {
90
- throw new Error('ML processor not ready after initialization');
91
- }
92
- }
93
- catch (error) {
94
- this.mlSuppressor = null;
95
- this.mlModelReady = false;
96
- // AudioWorklet denoiser will be used as fallback (existing behavior)
97
- }
98
- }
99
- /**
100
- * Get current noise suppression mode
101
- */
102
- getNoiseSuppressionMode() {
103
- if (this.isDenoiserEnabled()) {
104
- return 'audioworklet';
105
- }
106
- return 'none';
107
- }
108
- /**
109
- * True if the TFJS model assets were loaded and the model is ready.
110
- * This does NOT mean it is currently processing the live WebAudio stream.
111
- */
112
- isMLModelLoaded() {
113
- return this.mlModelReady && !!this.mlSuppressor?.isReady();
114
- }
115
- /**
116
- * Setup spatial audio for a participant
117
- *
118
- * CRITICAL: Each participant gets their OWN audio processing chain:
119
- * Stream -> Source -> Panner -> Analyser -> Gain -> Compressor -> Output
120
- *
121
- * This ensures:
122
- * - Each voice is positioned independently in 3D space
123
- * - No server-side mixing required
124
- * - Scalable to unlimited participants (browser handles the mixing)
125
- *
126
- * @param participantId Unique ID for this participant
127
- * @param track Audio track from MediaSoup consumer
128
- * @param bypassSpatialization For testing - bypasses 3D positioning
129
- */
130
- async setupSpatialAudioForParticipant(participantId, track, bypassSpatialization = false // Default to false
131
- ) {
132
- if (this.audioContext.state === "suspended") {
133
- await this.audioContext.resume();
134
- }
135
- // Create stream with noise suppression constraints
136
- const stream = new MediaStream([track]);
137
- const source = this.audioContext.createMediaStreamSource(stream);
138
- const panner = this.audioContext.createPanner();
139
- const stereoPanner = this.audioContext.createStereoPanner(); // For stable L/R panning
140
- // Force a predictable mono signal before applying StereoPannerNode.
141
- // If the inbound track is stereo (or becomes stereo in the graph), StereoPannerNode can
142
- // still leak energy into both ears. Downmixing prevents that.
143
- const monoSplitter = this.audioContext.createChannelSplitter(2);
144
- const monoMerger = this.audioContext.createChannelMerger(1);
145
- const monoGainL = this.audioContext.createGain();
146
- const monoGainR = this.audioContext.createGain();
147
- monoGainL.gain.value = 0.5;
148
- monoGainR.gain.value = 0.5;
149
- // CRITICAL: Convert mono back to stereo for StereoPanner
150
- // monoMerger outputs 1 channel, but StereoPanner needs 2 channels
151
- const stereoUpmixer = this.audioContext.createChannelMerger(2);
152
- const analyser = this.audioContext.createAnalyser();
153
- const gain = this.audioContext.createGain();
154
- const proximityGain = this.audioContext.createGain();
155
- let denoiseNode;
156
- // ML-based denoising: Currently prepared but requires AudioWorklet integration
157
- // For now, using traditional AudioWorklet denoiser as it provides real-time processing
158
- // TODO: Integrate ML processor into AudioWorklet for hybrid ML+DSP approach
159
- if (this.isDenoiserEnabled() && typeof this.audioContext.audioWorklet !== "undefined") {
160
- try {
161
- await this.ensureDenoiseWorklet();
162
- denoiseNode = new AudioWorkletNode(this.audioContext, "odyssey-denoise", {
163
- numberOfInputs: 1,
164
- numberOfOutputs: 1,
165
- processorOptions: {
166
- enabled: this.options.denoiser?.enabled !== false,
167
- threshold: this.options.denoiser?.threshold,
168
- noiseFloor: this.options.denoiser?.noiseFloor,
169
- release: this.options.denoiser?.release,
170
- attack: this.options.denoiser?.attack,
171
- holdMs: this.options.denoiser?.holdMs,
172
- maxReduction: this.options.denoiser?.maxReduction,
173
- hissCut: this.options.denoiser?.hissCut,
174
- expansionRatio: this.options.denoiser?.expansionRatio,
175
- learnRate: this.options.denoiser?.learnRate,
176
- voiceBoost: this.options.denoiser?.voiceBoost,
177
- voiceSensitivity: this.options.denoiser?.voiceSensitivity,
178
- voiceEnhancement: this.options.denoiser?.voiceEnhancement,
179
- silenceFloor: this.options.denoiser?.silenceFloor,
180
- silenceHoldMs: this.options.denoiser?.silenceHoldMs,
181
- silenceReleaseMs: this.options.denoiser?.silenceReleaseMs,
182
- speechBoost: this.options.denoiser?.speechBoost,
183
- highBandGate: this.options.denoiser?.highBandGate,
184
- highBandAttack: this.options.denoiser?.highBandAttack,
185
- highBandRelease: this.options.denoiser?.highBandRelease,
186
- },
187
- });
188
- }
189
- catch (error) {
190
- denoiseNode = undefined;
191
- }
192
- }
193
- // Create BiquadFilter nodes for static/noise reduction
194
- // Based on: https://tagdiwalaviral.medium.com/struggles-of-noise-reduction-in-rtc-part-2-2526f8179442
195
- // HIGHPASS FILTER: Remove low-frequency rumble and plosives
196
- // Higher cutoff (100Hz) reduces room boom/echo effect
197
- const highpassFilter = this.audioContext.createBiquadFilter();
198
- highpassFilter.type = "highpass";
199
- highpassFilter.frequency.value = 100; // Cut below 100Hz (removes room boom/rumble)
200
- highpassFilter.Q.value = 0.5; // Very gentle slope - prevents resonance
201
- // LOWPASS FILTER: Remove high-frequency hiss
202
- // Slightly higher for clearer voice
203
- const lowpassFilter = this.audioContext.createBiquadFilter();
204
- lowpassFilter.type = "lowpass";
205
- lowpassFilter.frequency.value = 10000; // Cut above 10kHz (more open sound)
206
- lowpassFilter.Q.value = 0.5; // Low Q prevents ringing/echo
207
- // VOICE BAND EMPHASIS: DISABLED - was causing resonance/bathroom effect
208
- // The peaking filter at 180Hz can cause room-like coloration
209
- const voiceBandFilter = this.audioContext.createBiquadFilter();
210
- voiceBandFilter.type = "peaking";
211
- voiceBandFilter.frequency.value = 180;
212
- voiceBandFilter.Q.value = 0.5;
213
- voiceBandFilter.gain.value = 0; // DISABLED - no boost (was 1dB causing echo)
214
- const dynamicLowpass = this.audioContext.createBiquadFilter();
215
- dynamicLowpass.type = "lowpass";
216
- dynamicLowpass.frequency.value = 12000; // Higher = more natural (was 7500)
217
- dynamicLowpass.Q.value = 0.5; // Low Q prevents ringing
218
- proximityGain.gain.value = 1.0;
219
- // Configure Panner for realistic 3D spatial audio
220
- const distanceConfig = this.getDistanceConfig();
221
- panner.panningModel = "HRTF"; // Head-Related Transfer Function for realistic 3D
222
- panner.distanceModel = "inverse"; // Natural distance falloff
223
- panner.refDistance = distanceConfig.refDistance ?? 1.2;
224
- panner.maxDistance = distanceConfig.maxDistance ?? 15; // Cannot hear beyond 15 meters
225
- panner.rolloffFactor = distanceConfig.rolloffFactor ?? 1.35; // How quickly sound fades with distance
226
- panner.coneInnerAngle = 360; // Omnidirectional sound source
227
- panner.coneOuterAngle = 360;
228
- panner.coneOuterGain = 0.3; // Some sound even outside cone
229
- // Configure gain for individual participant volume control
230
- gain.gain.value = 1.0; // Unity gain for clean audio
231
- // Per-participant LIMITER (not compressor) - only catches peaks
232
- // Compressors can cause pumping/echo - limiter is transparent until clipping
233
- const participantCompressor = this.audioContext.createDynamicsCompressor();
234
- participantCompressor.threshold.value = -6; // Only activate near clipping (was -40!)
235
- participantCompressor.knee.value = 3; // Hard knee = limiter behavior (was 20)
236
- participantCompressor.ratio.value = 20; // High ratio = limiter (was 1.5)
237
- participantCompressor.attack.value = 0.001; // 1ms - catch peaks fast
238
- participantCompressor.release.value = 0.05; // 50ms - fast release prevents pumping
239
- let currentNode = source;
240
- // First apply compressor to tame initial transients (CRITICAL for preventing pops)
241
- currentNode.connect(participantCompressor);
242
- currentNode = participantCompressor;
243
- if (denoiseNode) {
244
- currentNode.connect(denoiseNode);
245
- currentNode = denoiseNode;
246
- }
247
- // Audio chain with voice optimization filters
248
- // Chain: source -> compressor -> [denoise] -> highpass -> voiceBand -> lowpass -> dynamicLowpass -> proximityGain -> panner -> analyser -> gain -> masterGain
249
- currentNode.connect(highpassFilter);
250
- highpassFilter.connect(voiceBandFilter);
251
- voiceBandFilter.connect(lowpassFilter);
252
- lowpassFilter.connect(dynamicLowpass);
253
- dynamicLowpass.connect(proximityGain);
254
- // Base routing (always): proximityGain -> mono downmix -> stereo upmix -> analyser
255
- proximityGain.connect(monoSplitter);
256
- monoSplitter.connect(monoGainL, 0);
257
- monoSplitter.connect(monoGainR, 1);
258
- monoGainL.connect(monoMerger, 0, 0);
259
- monoGainR.connect(monoMerger, 0, 0);
260
- // Convert mono to stereo (same signal on both channels) for StereoPanner
261
- monoMerger.connect(stereoUpmixer, 0, 0); // mono -> left channel
262
- monoMerger.connect(stereoUpmixer, 0, 1); // mono -> right channel
263
- stereoUpmixer.connect(analyser);
264
- // Output routing depends on spatialization mode:
265
- // - Spatial: analyser -> stereoPanner -> gain -> master
266
- // - Non-spatial: analyser -> gain -> master
267
- if (bypassSpatialization) {
268
- analyser.connect(gain);
269
- }
270
- else {
271
- analyser.connect(stereoPanner);
272
- stereoPanner.connect(gain);
273
- }
274
- gain.connect(this.masterGainNode);
275
- this.participantNodes.set(participantId, {
276
- source,
277
- panner,
278
- stereoPanner,
279
- monoSplitter,
280
- monoGainL,
281
- monoGainR,
282
- monoMerger,
283
- stereoUpmixer,
284
- analyser,
285
- gain,
286
- proximityGain,
287
- compressor: participantCompressor,
288
- highpassFilter,
289
- lowpassFilter,
290
- voiceBandFilter,
291
- dynamicLowpass,
292
- denoiseNode,
293
- stream,
294
- });
295
- // Start monitoring audio levels
296
- this.startMonitoring(participantId);
297
- }
298
- startMonitoring(participantId) {
299
- const nodes = this.participantNodes.get(participantId);
300
- if (!nodes)
301
- return;
302
- const { analyser, stream } = nodes;
303
- const dataArray = new Uint8Array(analyser.frequencyBinCount);
304
- // Clear any existing interval for this participant
305
- if (this.monitoringIntervals.has(participantId)) {
306
- clearInterval(this.monitoringIntervals.get(participantId));
307
- }
308
- const interval = setInterval(() => {
309
- analyser.getByteTimeDomainData(dataArray);
310
- let sum = 0;
311
- for (const amplitude of dataArray) {
312
- sum += Math.abs(amplitude - 128);
313
- }
314
- const average = sum / dataArray.length;
315
- const audioLevel = (average / 128) * 255; // Scale to 0-255
316
- // Silent monitoring - no logs needed
317
- }, 2000); // Check every 2 seconds
318
- this.monitoringIntervals.set(participantId, interval);
319
- }
320
- /**
321
- * Toggle spatialization for a participant (for huddle/spatial switching)
322
- * @param participantId The participant to update
323
- * @param enableSpatialization True for spatial audio, false for non-spatial (huddle)
324
- */
325
- setParticipantSpatialization(participantId, enableSpatialization) {
326
- const nodes = this.participantNodes.get(participantId);
327
- if (!nodes) {
328
- return;
329
- }
330
- try {
331
- // Keep input side unchanged; only re-route analyser output.
332
- nodes.analyser.disconnect();
333
- nodes.stereoPanner.disconnect();
334
- if (enableSpatialization) {
335
- nodes.analyser.connect(nodes.stereoPanner);
336
- nodes.stereoPanner.connect(nodes.gain);
337
- }
338
- else {
339
- nodes.analyser.connect(nodes.gain);
340
- }
341
- }
342
- catch (error) {
343
- // Error toggling spatialization - fail silently
344
- }
345
- }
346
- /**
347
- * Update spatial audio position and orientation for a participant
348
- *
349
- * This is called every time we receive position/direction updates from the server.
350
- *
351
- * Position: Where the participant is in 3D space (their location)
352
- * Direction: Which way they're facing (their forward vector)
353
- *
354
- * Example:
355
- * - Position: (x: -200, y: 0, z: 100) = 2m to your left
356
- * - Direction: (x: 0, y: 1, z: 0) = facing forward (away from you)
357
- * - Result: Sound comes from your left, oriented as if speaking away
358
- *
359
- * The Web Audio API's PannerNode uses HRTF to create realistic 3D audio
360
- * based on these parameters plus the listener's position/orientation.
361
- *
362
- * @param participantId Who to update
363
- * @param position Where they are (from socket data)
364
- * @param direction Which way they're facing (from socket data)
365
- */
366
- updateSpatialAudio(participantId, position, direction, _spatialMeta) {
367
- const nodes = this.participantNodes.get(participantId);
368
- if (!nodes) {
369
- return;
370
- }
371
- if (!nodes.panner) {
372
- return;
373
- }
374
- // NOTE: Removed throttle - it was causing audio dropouts
375
- // The smoothing in applyStereoPanning and gain ramping handles click prevention
376
- if (nodes?.panner) {
377
- // POSITION-BASED SPATIAL AUDIO
378
- // All calculations are in world space (datum at 0,0,0)
379
- //
380
- // Step 1: Get speaker head position (with jitter reduction)
381
- const normalizedBodyPosition = this.normalizePositionUnits(position);
382
- // SNAP: Reduce jitter by ignoring micro-movements (<15cm)
383
- const snappedSpeakerPos = this.snapPosition(normalizedBodyPosition, participantId);
384
- const speakerHeadPosition = this.computeHeadPosition(snappedSpeakerPos);
385
- // Use cached listener position if initialized, otherwise use current position
386
- const listenerPos = this.listenerPositionInitialized
387
- ? this.cachedListenerPosition
388
- : this.listenerPosition;
389
- // Step 2: Calculate 3D distance (Euclidean distance from datum-based positions)
390
- // distance = √(Δx² + Δy² + Δz²)
391
- const distance = this.getDistanceBetween(listenerPos, speakerHeadPosition);
392
- // DEBUG: Log distance and gain every 2 seconds
393
- const now = Date.now();
394
- const lastLog = this._spatialDebugTimes.get(participantId) || 0;
395
- if (now - lastLog > 2000) {
396
- this._spatialDebugTimes.set(participantId, now);
397
- const calculatedGainDebug = this.calculateLogarithmicGain(distance);
398
- console.log(`🔊 SPATIAL DEBUG [${participantId.substring(0, 8)}]: dist=${distance.toFixed(2)}m, gain=${calculatedGainDebug}%, listenerInit=${this.listenerPositionInitialized}, listener=(${listenerPos.x.toFixed(1)},${listenerPos.y.toFixed(1)},${listenerPos.z.toFixed(1)}), speaker=(${speakerHeadPosition.x.toFixed(1)},${speakerHeadPosition.y.toFixed(1)},${speakerHeadPosition.z.toFixed(1)})`);
399
- }
400
- // Step 3: Calculate relative vector (speaker relative to listener)
401
- // vecToSource = speaker.pos - listener.pos
402
- const vecToSource = {
403
- x: speakerHeadPosition.x - listenerPos.x,
404
- z: speakerHeadPosition.z - listenerPos.z,
405
- };
406
- // Step 4: Project onto listener's right-ear axis using dot product
407
- // dxLocal = vecToSource · listenerRight
408
- // Positive = sound is to the RIGHT of listener
409
- // Negative = sound is to the LEFT of listener
410
- // Near zero = sound is FRONT or BACK (center)
411
- const dxLocal = vecToSource.x * this.listenerRight.x + vecToSource.z * this.listenerRight.z;
412
- // Calculate dzLocal (forward/back component)
413
- // For CLOCKWISE rotation: Forward = right rotated 90° CW: (x,z) -> (-z,x)
414
- // At yaw=0°: right=(1,0) → forward=(0,1)=+Z ✓
415
- // At yaw=90°: right=(0,-1) → forward=(1,0)=+X ✓
416
- const listenerForward = { x: -this.listenerRight.z, z: this.listenerRight.x };
417
- const dzLocal = vecToSource.x * listenerForward.x + vecToSource.z * listenerForward.z;
418
- // TRUE 360° SPATIAL AUDIO PANNING
419
- // Calculate angle from listener to source using atan2
420
- // This gives full left/right separation:
421
- // - 90° (right side) = pan +1.0 (100% RIGHT ear, 0% LEFT ear)
422
- // - 270° (left side) = pan -1.0 (100% LEFT ear, 0% RIGHT ear)
423
- // - 0° (front) or 180° (back) = pan 0.0 (CENTER)
424
- const angleToSource = Math.atan2(dxLocal, dzLocal); // Radians: -π to +π
425
- const rawPanValue = Math.sin(angleToSource); // -1 to +1
426
- // SMOOTH THE PAN VALUE to prevent random left/right jumping
427
- const smoothedPanValue = this.smoothPanValue(participantId, rawPanValue);
428
- const panning = this.panningFromPanValue(smoothedPanValue, dxLocal);
429
- // Calculate gain based on distance
430
- const calculatedGain = this.calculateLogarithmicGain(distance);
431
- const gainValue = calculatedGain / 100; // Convert to 0-1 range
432
- // DEBUG: Log RSD processing in SDK with ALL data for matching with Vue
433
- console.log(`[SDK-RSD] id=${participantId.substring(0, 8)} | speakerPos=(${speakerHeadPosition.x.toFixed(1)},${speakerHeadPosition.y.toFixed(1)},${speakerHeadPosition.z.toFixed(1)}) | listenerPos=(${listenerPos.x.toFixed(1)},${listenerPos.y.toFixed(1)},${listenerPos.z.toFixed(1)}) | dist=${distance.toFixed(2)}m | gain=${(gainValue * 100).toFixed(0)}% | pan=${smoothedPanValue.toFixed(2)} (L:${panning.left.toFixed(0)}% R:${panning.right.toFixed(0)}%)`);
434
- // Apply panning
435
- this.applyStereoPanning(participantId, panning);
436
- // Apply gain with Web Audio's built-in smoothing (setTargetAtTime)
437
- // This provides all the smoothing needed - no additional caching required
438
- const currentTime = this.audioContext.currentTime;
439
- try {
440
- // setTargetAtTime provides smooth exponential interpolation
441
- // Time constant 0.1 = ~300ms to settle
442
- nodes.gain.gain.setTargetAtTime(gainValue, currentTime, 0.1);
443
- }
444
- catch (err) {
445
- // Fallback: If scheduling fails, set value directly (rare edge case)
446
- nodes.gain.gain.value = gainValue;
447
- }
448
- }
449
- }
450
- /**
451
- * Mute or unmute a participant's audio
452
- * Used for channel-based audio routing (huddle vs spatial)
453
- * @param participantId The participant to mute/unmute
454
- * @param muted True to mute, false to unmute
455
- */
456
- setParticipantMuted(participantId, muted) {
457
- const nodes = this.participantNodes.get(participantId);
458
- if (!nodes) {
459
- // Audio nodes don't exist yet - this is normal if called before consumer is set up
460
- return;
461
- }
462
- if (nodes.gain) {
463
- // Smooth ramp to 0 (muted) or 1 (unmuted) - prevents click
464
- // Time constant 0.05 = ~150ms to reach target (3 time constants)
465
- nodes.gain.gain.setTargetAtTime(muted ? 0 : 1, this.audioContext.currentTime, 0.05);
466
- }
467
- }
468
- /**
469
- * Master mute/unmute for ALL audio output
470
- * Use this to prevent audio from playing until user explicitly joins the space.
471
- * @param muted True to mute all audio, false to unmute
472
- */
473
- setMasterMuted(muted) {
474
- this.isMasterMuted = muted;
475
- // Smooth transition for master gain to prevent clicks
476
- const targetGain = muted ? 0 : 1;
477
- this.masterGainNode.gain.setTargetAtTime(targetGain, this.audioContext.currentTime, 0.05 // ~150ms transition
478
- );
479
- }
480
- /**
481
- * Get current master mute state
482
- * @returns True if master mute is enabled
483
- */
484
- getMasterMuted() {
485
- return this.isMasterMuted;
486
- }
487
- /**
488
- * Update listener position and orientation
489
- * The \"listener\" is YOU - where your ears/head are positioned
490
- *
491
- * @param position Your HEAD position (camera position), not body position!
492
- * @param orientation Which way your head is facing (forward and up vectors)
493
- */
494
- setListenerPosition(position, orientation) {
495
- const normalizedPosition = this.normalizePositionUnits(position);
496
- this.applyListenerTransform(normalizedPosition, orientation);
497
- }
498
- /**
499
- * POSITION-ONLY MODE: Set listener HEAD position (no direction needed)
500
- * IMPORTANT: Uses CAMERA position (head) as listener, not body position!
501
- *
502
- * @param listenerPos Player body position (for reference, not used as listener)
503
- * @param cameraPos Camera/HEAD position - THIS is the actual listener position for audio
504
- * @param lookAtPos Look-at position (where camera is pointing) - stored but not used for panning
505
- * @param rot Rotation data (pitch, yaw, roll) - stored but not used for panning
506
- */
507
- setListenerFromLSD(listenerPos, cameraPos, lookAtPos, rot, localParticipantId) {
508
- // DEBUG: Log LSD data received in SDK
509
- console.log(`[SDK-LSD] cam=(${cameraPos.x.toFixed(1)},${cameraPos.y.toFixed(1)},${cameraPos.z.toFixed(1)}) rot=(${rot?.x?.toFixed(1)},${rot?.y?.toFixed(1)},${rot?.z?.toFixed(1)})`);
510
- // Store local participant ID for logging
511
- if (localParticipantId && !this.localParticipantId) {
512
- this.localParticipantId = localParticipantId;
513
- }
514
- // USE CAMERA POSITION AS LISTENER (head position, not body!)
515
- const normalizedListener = this.normalizePositionUnits(cameraPos);
516
- // SNAP: Reduce jitter by ignoring micro-movements (<15cm)
517
- // This prevents gain/pan fluctuation when listener is "stationary"
518
- const snappedListener = this.snapPosition(normalizedListener, 'listener');
519
- // Store listener position (used for X-axis panning and distance calculation)
520
- this.listenerPosition = snappedListener;
521
- // CRITICAL: Derive listener right-vector from yaw (rot.y in degrees)
522
- // This determines which ear hears the sound based on 360-degree rotation
523
- //
524
- // COORDINATE SYSTEM: X=right, Y=up, Z=forward (datum at 0,0,0)
525
- //
526
- // Rotation convention (right-hand rule around Y-axis):
527
- // rot.y = 0° → facing +Z (forward/north), right ear points to +X
528
- // rot.y = 90° → facing +X (right/east), right ear points to -Z
529
- // rot.y = 180° → facing -Z (backward/south), right ear points to -X
530
- // rot.y = 270° → facing -X (left/west), right ear points to +Z
531
- if (rot && typeof rot.y === "number") {
532
- const yawRad = (rot.y * Math.PI) / 180;
533
- // Right ear vector - CLOCKWISE rotation (standard game engine convention)
534
- // At yaw=0: facing +Z, right ear at +X → (1, 0)
535
- // At yaw=90: facing +X, right ear at -Z → (0, -1)
536
- // At yaw=180: facing -Z, right ear at -X → (-1, 0)
537
- // At yaw=270: facing -X, right ear at +Z → (0, 1)
538
- this.listenerRight = {
539
- x: Math.cos(yawRad),
540
- z: -Math.sin(yawRad), // NEGATIVE for clockwise rotation
541
- };
542
- }
543
- else {
544
- // Fallback: assume facing forward with right ear to +X
545
- this.listenerRight = { x: 1, z: 0 };
546
- }
547
- // Position-only mode: No forward vector calculations needed
548
- // Panning is calculated purely from X-axis position difference in updateSpatialAudio()
549
- // LEGACY CODE BELOW - Kept for reference but not executed
550
- if (false) {
551
- const normalizedCamera = this.normalizePositionUnits(cameraPos);
552
- const normalizedLookAt = this.normalizePositionUnits(lookAtPos);
553
- // Calculate forward vector (from camera to look-at point)
554
- const forwardX = normalizedLookAt.x - normalizedCamera.x;
555
- const forwardY = normalizedLookAt.y - normalizedCamera.y;
556
- const forwardZ = normalizedLookAt.z - normalizedCamera.z;
557
- // Normalize forward vector
558
- const forwardLen = Math.sqrt(forwardX * forwardX + forwardY * forwardY + forwardZ * forwardZ);
559
- if (forwardLen < 0.001) {
560
- return;
561
- }
562
- const fwdX = forwardX / forwardLen;
563
- const fwdY = forwardY / forwardLen;
564
- const fwdZ = forwardZ / forwardLen;
565
- // Calculate right vector (cross product of world up and forward)
566
- // Web Audio API uses Y-up coordinate system, Unreal uses Z-up
567
- // We need to transform: Unreal (X,Y,Z) -> WebAudio (X,Z,-Y)
568
- const worldUp = { x: 0, y: 1, z: 0 }; // Web Audio Y-up
569
- const rightX = worldUp.y * fwdZ - worldUp.z * fwdY;
570
- const rightY = worldUp.z * fwdX - worldUp.x * fwdZ;
571
- const rightZ = worldUp.x * fwdY - worldUp.y * fwdX;
572
- const rightLen = Math.sqrt(rightX * rightX + rightY * rightY + rightZ * rightZ);
573
- if (rightLen < 0.001) {
574
- // Forward is parallel to world up, use fallback
575
- this.applyListenerTransform(normalizedListener, {
576
- forwardX: fwdX,
577
- forwardY: fwdY,
578
- forwardZ: fwdZ,
579
- upX: 0,
580
- upY: 1,
581
- upZ: 0,
582
- });
583
- return;
584
- }
585
- const rX = rightX / rightLen;
586
- const rY = rightY / rightLen;
587
- const rZ = rightZ / rightLen;
588
- // Calculate true up vector (cross product of forward and right)
589
- const upX = fwdY * rZ - fwdZ * rY;
590
- const upY = fwdZ * rX - fwdX * rZ;
591
- const upZ = fwdX * rY - fwdY * rX;
592
- this.applyListenerTransform(normalizedListener, {
593
- forwardX: fwdX,
594
- forwardY: fwdY,
595
- forwardZ: fwdZ,
596
- upX,
597
- upY,
598
- upZ,
599
- });
600
- } // End of legacy code block
601
- }
602
- applyListenerTransform(normalizedPosition, orientation) {
603
- const { listener } = this.audioContext;
604
- if (!listener) {
605
- return;
606
- }
607
- this.listenerPosition = { ...normalizedPosition };
608
- this.listenerInitialized = true;
609
- this.listenerDirection = {
610
- forward: {
611
- x: orientation.forwardX,
612
- y: orientation.forwardY,
613
- z: orientation.forwardZ,
614
- },
615
- up: {
616
- x: orientation.upX,
617
- y: orientation.upY,
618
- z: orientation.upZ,
619
- },
620
- };
621
- if (listener.positionX) {
622
- listener.positionX.setValueAtTime(normalizedPosition.x, this.audioContext.currentTime);
623
- listener.positionY.setValueAtTime(normalizedPosition.y, this.audioContext.currentTime);
624
- listener.positionZ.setValueAtTime(normalizedPosition.z, this.audioContext.currentTime);
625
- }
626
- if (listener.forwardX) {
627
- listener.forwardX.setValueAtTime(orientation.forwardX, this.audioContext.currentTime);
628
- listener.forwardY.setValueAtTime(orientation.forwardY, this.audioContext.currentTime);
629
- listener.forwardZ.setValueAtTime(orientation.forwardZ, this.audioContext.currentTime);
630
- listener.upX.setValueAtTime(orientation.upX, this.audioContext.currentTime);
631
- listener.upY.setValueAtTime(orientation.upY, this.audioContext.currentTime);
632
- listener.upZ.setValueAtTime(orientation.upZ, this.audioContext.currentTime);
633
- }
634
- }
635
- removeParticipant(participantId) {
636
- // Stop monitoring
637
- if (this.monitoringIntervals.has(participantId)) {
638
- clearInterval(this.monitoringIntervals.get(participantId));
639
- this.monitoringIntervals.delete(participantId);
640
- }
641
- const nodes = this.participantNodes.get(participantId);
642
- if (nodes) {
643
- nodes.source.disconnect();
644
- nodes.panner.disconnect();
645
- nodes.proximityGain.disconnect();
646
- nodes.monoSplitter.disconnect();
647
- nodes.monoGainL.disconnect();
648
- nodes.monoGainR.disconnect();
649
- nodes.monoMerger.disconnect();
650
- nodes.stereoUpmixer.disconnect();
651
- nodes.stereoPanner.disconnect();
652
- nodes.analyser.disconnect();
653
- nodes.gain.disconnect();
654
- if (nodes.denoiseNode) {
655
- nodes.denoiseNode.disconnect();
656
- }
657
- // DON'T stop tracks - they're managed by MediaSoup
658
- // Just disconnect the audio nodes from the graph
659
- // nodes.stream.getTracks().forEach((track) => track.stop());
660
- this.participantNodes.delete(participantId);
661
- // Clean up smoothed pan value tracking
662
- this.smoothedPanValues.delete(participantId);
663
- // Clean up cached speaker position
664
- this.cachedSpeakerPositions.delete(participantId);
665
- }
666
- }
667
- async resumeAudioContext() {
668
- if (this.audioContext.state === "suspended") {
669
- await this.audioContext.resume();
670
- }
671
- }
672
- getAudioContextState() {
673
- return this.audioContext.state;
674
- }
675
- getDistanceConfig() {
676
- return {
677
- refDistance: this.options.distance?.refDistance ?? 0.5, // Normal volume within 0.5m (very close)
678
- maxDistance: this.options.distance?.maxDistance ?? 15, // Cannot hear after 15 meters
679
- rolloffFactor: this.options.distance?.rolloffFactor ?? 2.5, // Aggressive falloff for realistic distance attenuation
680
- unit: this.options.distance?.unit ?? "auto",
681
- };
682
- }
683
- applySpatialBoostIfNeeded(position) {
684
- if (!this.listenerInitialized) {
685
- return position;
686
- }
687
- const boost = (this.options.distance?.rolloffFactor || 1) * 0.85;
688
- if (!isFinite(boost) || boost <= 1.01) {
689
- return position;
690
- }
691
- const listener = this.listenerPosition;
692
- return {
693
- x: listener.x + (position.x - listener.x) * boost,
694
- y: listener.y + (position.y - listener.y) * Math.min(boost, 1.2),
695
- z: listener.z + (position.z - listener.z) * boost,
696
- };
697
- }
698
- getDistanceBetween(a, b) {
699
- const dx = b.x - a.x;
700
- const dy = b.y - a.y;
701
- const dz = b.z - a.z;
702
- return Math.sqrt(dx * dx + dy * dy + dz * dz);
703
- }
704
- calculateDistanceGain(config, distance) {
705
- if (!this.listenerInitialized) {
706
- return 1;
707
- }
708
- if (distance <= config.refDistance) {
709
- return 1;
710
- }
711
- if (distance >= config.maxDistance) {
712
- return 0;
713
- }
714
- const normalized = (distance - config.refDistance) /
715
- Math.max(config.maxDistance - config.refDistance, 0.001);
716
- const shaped = Math.pow(Math.max(0, 1 - normalized), Math.max(1.2, config.rolloffFactor * 1.05));
717
- return Math.min(1, Math.max(0.01, shaped));
718
- }
719
- normalizePositionUnits(position) {
720
- const distanceConfig = this.getDistanceConfig();
721
- if (distanceConfig.unit === "meters") {
722
- return { ...position };
723
- }
724
- if (distanceConfig.unit === "centimeters") {
725
- return {
726
- x: position.x / 100,
727
- y: position.y / 100,
728
- z: position.z / 100,
729
- };
730
- }
731
- const maxAxis = Math.max(Math.abs(position.x), Math.abs(position.y), Math.abs(position.z));
732
- if (maxAxis > 50) {
733
- // Likely centimeters coming from server
734
- return {
735
- x: position.x / 100,
736
- y: position.y / 100,
737
- z: position.z / 100,
738
- };
739
- }
740
- return { ...position };
741
- }
742
- /**
743
- * Snap position to grid to prevent jitter from micro-movements
744
- * If the position hasn't changed significantly, return the cached position
745
- * This prevents gain/pan fluctuation when users are "stationary"
746
- *
747
- * @param position New incoming position
748
- * @param participantId Participant ID for caching (use 'listener' for listener)
749
- * @returns Snapped position (either new or cached)
750
- */
751
- snapPosition(position, participantId) {
752
- const isListener = participantId === 'listener';
753
- const cached = isListener
754
- ? this.cachedListenerPosition
755
- : this.cachedSpeakerPositions.get(participantId);
756
- // If no cached position or first time, use this one as the baseline
757
- const isFirstTime = !cached || (!this.listenerPositionInitialized && isListener);
758
- if (isFirstTime) {
759
- if (isListener) {
760
- this.cachedListenerPosition = { ...position };
761
- this.listenerPositionInitialized = true;
762
- }
763
- else {
764
- this.cachedSpeakerPositions.set(participantId, { ...position });
765
- }
766
- return position;
767
- }
768
- // Calculate how far the position has moved
769
- const dx = position.x - cached.x;
770
- const dy = position.y - cached.y;
771
- const dz = position.z - cached.z;
772
- const movedDistance = Math.sqrt(dx * dx + dy * dy + dz * dz);
773
- // If moved more than threshold, update cache and use new position
774
- if (movedDistance > this.positionSnapThreshold) {
775
- if (isListener) {
776
- this.cachedListenerPosition = { ...position };
777
- }
778
- else {
779
- this.cachedSpeakerPositions.set(participantId, { ...position });
780
- }
781
- return position;
782
- }
783
- // Position hasn't changed significantly - return cached position
784
- // This prevents jitter in gain/pan calculations
785
- return cached;
786
- }
787
- getVectorFromListener(targetPosition) {
788
- if (!this.listenerInitialized) {
789
- return { ...targetPosition };
790
- }
791
- return {
792
- x: targetPosition.x - this.listenerPosition.x,
793
- y: targetPosition.y - this.listenerPosition.y,
794
- z: targetPosition.z - this.listenerPosition.z,
795
- };
796
- }
797
- applyDirectionalSuppression(participantId, distance, vectorToSource) {
798
- const nodes = this.participantNodes.get(participantId);
799
- if (!nodes) {
800
- return;
801
- }
802
- const clarityScore = this.calculateClarityScore(distance, vectorToSource);
803
- const targetGain = 0.48 + clarityScore * 0.72; // 0.48 → 1.20
804
- // Only adjust gain based on angle, not frequency
805
- nodes.proximityGain.gain.setTargetAtTime(targetGain, this.audioContext.currentTime, 0.08);
806
- // Analyze voice and adjust highpass filter dynamically (85-300Hz)
807
- this.adjustVoiceAdaptiveFilter(participantId);
808
- }
809
- /**
810
- * Dynamically adjust highpass filter based on voice characteristics
811
- * Analyzes audio spectrum and sets filter between 85-300Hz
812
- */
813
- adjustVoiceAdaptiveFilter(participantId) {
814
- const nodes = this.participantNodes.get(participantId);
815
- if (!nodes?.analyser) {
816
- return;
817
- }
818
- const bufferLength = nodes.analyser.frequencyBinCount;
819
- const dataArray = new Uint8Array(bufferLength);
820
- nodes.analyser.getByteFrequencyData(dataArray);
821
- // Calculate spectral centroid in low frequency range (0-500Hz)
822
- const sampleRate = this.audioContext.sampleRate;
823
- const nyquist = sampleRate / 2;
824
- const binWidth = nyquist / bufferLength;
825
- let weightedSum = 0;
826
- let totalEnergy = 0;
827
- const maxBin = Math.floor(500 / binWidth); // Only analyze up to 500Hz
828
- for (let i = 0; i < Math.min(maxBin, bufferLength); i++) {
829
- const frequency = i * binWidth;
830
- const magnitude = dataArray[i] / 255.0;
831
- weightedSum += frequency * magnitude;
832
- totalEnergy += magnitude;
833
- }
834
- if (totalEnergy > 0.01) {
835
- const centroid = weightedSum / totalEnergy;
836
- // Map centroid to highpass frequency (85-300Hz)
837
- // Lower centroid = deeper voice = use lower highpass (preserve bass)
838
- // Higher centroid = higher voice = use higher highpass (remove mud)
839
- const targetFreq = Math.max(85, Math.min(300, 85 + (centroid - 100) * 0.5));
840
- nodes.highpassFilter.frequency.setTargetAtTime(targetFreq, this.audioContext.currentTime, 0.15 // Smooth transition
841
- );
842
- }
843
- }
844
- calculateClarityScore(distance, vectorToSource) {
845
- const proximityWeight = this.calculateProximityWeight(distance);
846
- const focusWeight = this.calculateDirectionFocus(vectorToSource);
847
- return this.clamp(0.2 + proximityWeight * 0.6 + focusWeight * 0.2, 0, 1);
848
- }
849
- calculateProximityWeight(distance) {
850
- const closeRange = 0.85;
851
- const fadeRange = 18;
852
- if (distance <= closeRange) {
853
- return 1;
854
- }
855
- if (distance >= fadeRange) {
856
- return 0;
857
- }
858
- return 1 - (distance - closeRange) / (fadeRange - closeRange);
859
- }
860
- calculateDirectionFocus(vectorToSource) {
861
- if (!this.listenerInitialized) {
862
- return 0.5;
863
- }
864
- const forward = this.normalizeVector(this.listenerDirection.forward);
865
- const source = this.normalizeVector(vectorToSource, { x: 0, y: 0, z: 1 });
866
- const dot = forward.x * source.x + forward.y * source.y + forward.z * source.z;
867
- return this.clamp((dot + 1) / 2, 0, 1);
868
- }
869
- normalizeVector(vector, fallback = { x: 0, y: 0, z: 1 }) {
870
- const length = Math.hypot(vector.x, vector.y, vector.z);
871
- if (length < 1e-4) {
872
- return { ...fallback };
873
- }
874
- return {
875
- x: vector.x / length,
876
- y: vector.y / length,
877
- z: vector.z / length,
878
- };
879
- }
880
- clamp(value, min, max) {
881
- return Math.min(max, Math.max(min, value));
882
- }
883
- isDenoiserEnabled() {
884
- // DISABLED BY DEFAULT: The AudioWorklet denoiser causes "chit-chit/pit" noise artifacts
885
- // Set options.denoiser.enabled = true explicitly to enable
886
- return this.options.denoiser?.enabled === true;
887
- }
888
- /**
889
- * Compute estimated head/mouth position from body position
890
- * Body position is typically at feet/base - add head height offset
891
- * Average human head height: 1.6m (adjustable based on avatar)
892
- */
893
- computeHeadPosition(bodyPosition) {
894
- return {
895
- x: bodyPosition.x,
896
- y: bodyPosition.y + 1.6, // Add typical head height in meters
897
- z: bodyPosition.z,
898
- };
899
- }
900
- calculatePanning(angle) {
901
- const rad = (angle * Math.PI) / 180;
902
- const panValue = Math.sin(rad);
903
- return this.panningFromPanValue(panValue);
904
- }
905
- panningFromPanValue(pan, dxLocal) {
906
- const clamped = this.clamp(pan, -1, 1);
907
- // Map pan to asymmetric gains while keeping center at 100/100
908
- const left = 100 * (1 - Math.max(0, clamped));
909
- const right = 100 * (1 + Math.min(0, clamped));
910
- return {
911
- left: this.clamp(left, 0, 100),
912
- right: this.clamp(right, 0, 100),
913
- };
914
- }
915
- /**
916
- * SMOOTH PAN VALUE to prevent random left/right jumping
917
- * Uses exponential moving average (EMA) to smooth out jittery position data
918
- * With SNAP behavior for large direction changes (e.g., turning around)
919
- * With DEAD-ZONE around center to prevent face-to-face oscillation
920
- * @param participantId The participant to smooth pan for
921
- * @param newPanValue The new calculated pan value (-1 to +1)
922
- * @returns Smoothed pan value
923
- */
924
- smoothPanValue(participantId, newPanValue) {
925
- const previousPan = this.smoothedPanValues.get(participantId);
926
- // DEAD-ZONE: If new pan is very close to center, snap to exactly 0
927
- // This prevents left/right oscillation when standing face-to-face
928
- let targetPan = newPanValue;
929
- if (Math.abs(newPanValue) < this.panCenterDeadZone) {
930
- targetPan = 0;
931
- }
932
- // If no previous value, initialize with current value
933
- if (previousPan === undefined) {
934
- this.smoothedPanValues.set(participantId, targetPan);
935
- return targetPan;
936
- }
937
- // Calculate the change from previous pan
938
- const panChange = Math.abs(targetPan - previousPan);
939
- // If change is below threshold, keep previous value (prevents micro-jitter)
940
- if (panChange < this.panChangeThreshold) {
941
- return previousPan;
942
- }
943
- // HEAVY SMOOTHING: Always use high smoothing factor to prevent audio disruption
944
- // Even for large changes, we want smooth transitions to avoid audio dropouts
945
- // The StereoPannerNode's setTargetAtTime provides additional smoothing
946
- let effectiveSmoothingFactor = 0.7; // High smoothing = slow response but stable audio
947
- const signFlipped = (previousPan > 0 && targetPan < 0) || (previousPan < 0 && targetPan > 0);
948
- const bothNearCenter = Math.abs(previousPan) < 0.2 && Math.abs(targetPan) < 0.2;
949
- // Only reduce smoothing for center transitions (natural movement)
950
- if (bothNearCenter) {
951
- // Near center - use moderate smoothing
952
- effectiveSmoothingFactor = 0.5;
953
- }
954
- else if (signFlipped && panChange > 1.0) {
955
- // FULL FLIP (e.g., +0.67 to -0.82) - this is likely jitter, use HEAVY smoothing
956
- // to reject the outlier and keep audio stable
957
- effectiveSmoothingFactor = 0.85; // Very heavy smoothing for suspected jitter
958
- }
959
- // Apply exponential moving average smoothing
960
- // smoothedValue = previousValue * factor + newValue * (1 - factor)
961
- // Higher factor = more smoothing (slower response)
962
- const smoothedPan = previousPan * effectiveSmoothingFactor + targetPan * (1 - effectiveSmoothingFactor);
963
- // Apply dead-zone to final smoothed value as well
964
- const finalPan = Math.abs(smoothedPan) < this.panCenterDeadZone ? 0 : smoothedPan;
965
- // Store for next update
966
- this.smoothedPanValues.set(participantId, finalPan);
967
- return finalPan;
968
- }
969
- computeAzimuthFromPositions(listener, source) {
970
- const vx = source.x - listener.x;
971
- const vz = source.z - listener.z;
972
- const angleRad = Math.atan2(vx, vz); // 0° = forward (Z+), 90° = right (X+)
973
- const deg = (angleRad * 180) / Math.PI;
974
- const normalized = (deg + 360) % 360;
975
- return normalized;
976
- }
977
- /**
978
- * OLD METHOD - Kept for reference but not used in position-only mode
979
- * Calculate angle between listener and sound source in degrees (0-360)
980
- * 0° = front, 90° = right, 180° = back, 270° = left
981
- */
982
- calculateAngle(listenerPos, sourcePos, listenerForward) {
983
- // IMPORTANT: In Odyssey runtime coords, Y behaves like height.
984
- // Use X/Z as the horizontal plane for 360° panning.
985
- const vx = sourcePos.x - listenerPos.x;
986
- const vz = sourcePos.z - listenerPos.z;
987
- // Project listener forward onto X/Z plane (prevents flips when looking up/down).
988
- let fx = listenerForward.x;
989
- let fz = listenerForward.z;
990
- const fLen = Math.hypot(fx, fz);
991
- if (fLen < 1e-4) {
992
- // Fallback: if forward is nearly vertical/invalid, assume world-forward.
993
- fx = 0;
994
- fz = 1;
995
- }
996
- else {
997
- fx /= fLen;
998
- fz /= fLen;
999
- }
1000
- const vLen = Math.hypot(vx, vz);
1001
- if (vLen < 1e-4) {
1002
- return 0;
1003
- }
1004
- const nx = vx / vLen;
1005
- const nz = vz / vLen;
1006
- // Signed angle from forward->source.
1007
- // cross(forward, source) sign determines left/right.
1008
- // We want: 0°=front, 90°=right, 180°=back, 270°=left.
1009
- const dot = fx * nx + fz * nz;
1010
- const cross = fx * nz - fz * nx;
1011
- let radians = Math.atan2(cross, dot);
1012
- // atan2 gives [-π, π]; map to [0, 2π)
1013
- if (radians < 0) {
1014
- radians += Math.PI * 2;
1015
- }
1016
- const degrees = (radians * 180) / Math.PI;
1017
- // Convert to our convention where +90 is to the right.
1018
- // The cross sign above produces +90 for left in a standard XZ system,
1019
- // so we invert.
1020
- const angle = (360 - degrees) % 360;
1021
- return angle;
1022
- }
1023
- /**
1024
- * Calculate gain based on distance using logarithmic scale
1025
- * Distance range: 0.5m to 15m
1026
- * Gain range: 100% to 20% (reduced minimum for audibility at distance)
1027
- * Uses smoother curve for more consistent audio across distances
1028
- *
1029
- * TUNED FOR CONSISTENT VOICE CLARITY:
1030
- * - Near range (0-3m): Full volume for clear conversation
1031
- * - Medium range (3-8m): Gradual falloff, still easily audible
1032
- * - Far range (8-15m): Soft but still present
1033
- */
1034
- /**
1035
- * Calculate gain based on distance - AGGRESSIVE FALLOFF
1036
- *
1037
- * Uses inverse-square law (realistic sound propagation) with floor:
1038
- * Gain = 1 / (1 + k * distance²)
1039
- *
1040
- * Distance → Gain mapping:
1041
- * - 0-1m → 100% (full volume, very close)
1042
- * - 2m → ~80%
1043
- * - 3m → ~55%
1044
- * - 5m → ~30%
1045
- * - 8m → ~15%
1046
- * - 10m+ → 5% (minimum, barely audible)
1047
- */
1048
- calculateLogarithmicGain(distance) {
1049
- const minDistance = 1.0; // Full volume at 1m or closer
1050
- const minGain = 15; // Minimum 15% at far distances (still audible)
1051
- const falloffRate = 0.12; // Controls how fast volume drops (gentler)
1052
- // Full volume within minimum distance
1053
- if (distance <= minDistance)
1054
- return 100;
1055
- // Inverse square falloff: gain = 1 / (1 + k * d²)
1056
- // This models realistic sound propagation in air
1057
- const effectiveDistance = distance - minDistance;
1058
- const attenuation = 1 / (1 + falloffRate * effectiveDistance * effectiveDistance);
1059
- // Scale to percentage range: minGain to 100
1060
- const gain = minGain + attenuation * (100 - minGain);
1061
- return Math.round(gain);
1062
- }
1063
- // NOTE: smoothGainValue removed - Web Audio's setTargetAtTime provides sufficient smoothing
1064
- // The previous rate-limiting approach was causing gain to get stuck at low values
1065
- /**
1066
- * Apply stereo panning to participant audio using StereoPannerNode
1067
- * This provides STABLE left-right panning without jitter
1068
- */
1069
- applyStereoPanning(participantId, panning) {
1070
- const nodes = this.participantNodes.get(participantId);
1071
- if (!nodes?.stereoPanner)
1072
- return;
1073
- // Convert left/right percentages to pan value (-1 to +1)
1074
- // If left=100, right=0 → pan = -1 (full left)
1075
- // If left=0, right=100 → pan = +1 (full right)
1076
- // If left=100, right=100 → pan = 0 (center)
1077
- const leftRatio = panning.left / 100;
1078
- const rightRatio = panning.right / 100;
1079
- // Calculate pan position
1080
- let panValue = 0;
1081
- if (leftRatio + rightRatio > 0) {
1082
- panValue = (rightRatio - leftRatio);
1083
- }
1084
- // NO RATE LIMITING - setTargetAtTime provides smooth transitions
1085
- // Rate limiting was causing sluggish panning when turning quickly
1086
- // The smoothPanValue() function already handles jitter reduction
1087
- // Apply pan with smooth transition
1088
- const currentTime = this.audioContext.currentTime;
1089
- // Use shorter time constant (0.05 = ~150ms to settle) for responsive panning
1090
- // This is smooth enough to prevent clicks but fast enough to track movement
1091
- nodes.stereoPanner.pan.setTargetAtTime(panValue, currentTime, 0.05);
1092
- }
1093
- async ensureDenoiseWorklet() {
1094
- if (!this.isDenoiserEnabled()) {
1095
- return;
1096
- }
1097
- if (!("audioWorklet" in this.audioContext)) {
1098
- this.options.denoiser = {
1099
- ...(this.options.denoiser || {}),
1100
- enabled: false,
1101
- };
1102
- return;
1103
- }
1104
- if (this.denoiseWorkletReady) {
1105
- return this.denoiseWorkletReady;
1106
- }
1107
- const processorSource = `class OdysseyDenoiseProcessor extends AudioWorkletProcessor {
1108
- constructor(options) {
1109
- super();
1110
- const cfg = (options && options.processorOptions) || {};
1111
- this.enabled = cfg.enabled !== false;
1112
- // TUNED FOR MAXIMUM VOICE CLARITY: Extremely gentle - NEVER cut speech
1113
- this.threshold = this._sanitize(cfg.threshold, 0.0005, 0.05, 0.002); // Very low threshold
1114
- this.noiseFloor = this._sanitize(cfg.noiseFloor, 0.0001, 0.02, 0.0005); // Very low noise floor
1115
- this.attack = this._sanitize(cfg.attack, 0.01, 0.9, 0.15); // Faster attack to catch speech onset
1116
- this.release = this._sanitize(cfg.release, 0.01, 0.95, 0.35); // Slower release - CRITICAL for word endings
1117
- this.holdSamples = Math.max(
1118
- 8,
1119
- Math.round(
1120
- sampleRate * this._sanitize(cfg.holdMs, 10, 600, 200) / 1000 // LONGER hold (200ms) - prevents mid-word cuts
1121
- )
1122
- );
1123
- // maxReduction: VERY GENTLE - only reduce obvious noise, never speech
1124
- this.maxReduction = this._sanitize(cfg.maxReduction, 0.05, 0.5, 0.15); // CRITICAL: Very low = preserve all speech
1125
- this.hissCut = this._sanitize(cfg.hissCut, 0, 1, 0.2); // Less aggressive hiss cut
1126
- this.expansionRatio = this._sanitize(cfg.expansionRatio, 1.05, 3, 1.1); // Very gentle expansion
1127
- this.learnRate = this._sanitize(cfg.learnRate, 0.001, 0.2, 0.04); // Slower learning
1128
- this.voiceBoost = this._sanitize(cfg.voiceBoost, 0, 1, 0.3);
1129
- this.voiceSensitivity = this._sanitize(cfg.voiceSensitivity, 0.02, 0.9, 0.15); // More sensitive to voice
1130
- this.voiceEnhancement = cfg.voiceEnhancement === true;
1131
- this.speechBoost = this._sanitize(cfg.speechBoost, 0, 1.5, 0.2);
1132
- this.highBandGate = this._sanitize(cfg.highBandGate, 0, 1, 0.25); // Less aggressive high band gate
1133
- this.highBandAttack = this._sanitize(cfg.highBandAttack, 0.01, 1, 0.1);
1134
- this.highBandRelease = this._sanitize(cfg.highBandRelease, 0.01, 1, 0.05);
1135
- this.silenceFloor = this._sanitize(cfg.silenceFloor, 0.0001, 0.01, 0.0004); // Much lower
1136
- this.gateGraceSamples = Math.round(sampleRate * 0.4); // Longer grace period (400ms)
1137
- this.postSpeechHoldSamples = Math.round(sampleRate * 0.35); // Longer post-speech hold (350ms)
1138
- this.silenceHoldSamples = Math.max(
1139
- 8,
1140
- Math.round(
1141
- sampleRate * this._sanitize(cfg.silenceHoldMs, 80, 2000, 300) / 1000 // Much longer silence hold
1142
- )
1143
- );
1144
- this.silenceReleaseSamples = Math.max(
1145
- 8,
1146
- Math.round(
1147
- sampleRate * this._sanitize(cfg.silenceReleaseMs, 50, 1000, 150) / 1000 // Longer release
1148
- )
1149
- );
1150
- this.historySize = this.voiceEnhancement ? 512 : 0;
1151
- this.channelState = [];
1152
- this.hfAlpha = Math.exp(-2 * Math.PI * 3200 / sampleRate);
1153
- this.feedbackDetectionWindow = Math.round(sampleRate * 0.8); // Longer feedback window
1154
- this.feedbackSamples = 0;
1155
- this.feedbackActive = false;
1156
- // NEW: Minimum gain floor to NEVER completely mute
1157
- // RAISED from 0.25 to 0.5 to preserve ALL speech
1158
- this.minGainFloor = 0.5; // Always let at least 50% through - preserves soft consonants
1159
- }
1160
-
1161
- _sanitize(value, min, max, fallback) {
1162
- if (typeof value !== 'number' || !isFinite(value)) {
1163
- return fallback;
1164
- }
1165
- return Math.min(max, Math.max(min, value));
1166
- }
1167
-
1168
- _ensureState(index) {
1169
- if (!this.channelState[index]) {
1170
- this.channelState[index] = {
1171
- envelope: this.noiseFloor,
1172
- noise: this.noiseFloor,
1173
- gain: 1,
1174
- quietSamples: 0,
1175
- lpState: 0,
1176
- history: this.voiceEnhancement ? new Float32Array(this.historySize) : null,
1177
- historyIndex: 0,
1178
- historyFilled: 0,
1179
- tempBuffer: this.voiceEnhancement ? new Float32Array(this.historySize) : null,
1180
- voiceConfidence: 0,
1181
- silenceSamples: 0,
1182
- silenceReleaseCounter: 0,
1183
- isSilenced: false,
1184
- muteGain: 1,
1185
- graceSamplesRemaining: this.gateGraceSamples,
1186
- postSpeechHold: 0,
1187
- highBandEnv: this.silenceFloor,
1188
- broadbandEnv: this.silenceFloor,
1189
- feedbackCounter: 0,
1190
- feedbackSuppress: 1.0,
1191
- avgLevel: 0,
1192
- levelVariance: 0,
1193
- };
1194
- }
1195
- return this.channelState[index];
1196
- }
1197
-
1198
- _pushHistory(state, sample) {
1199
- if (!this.voiceEnhancement || !state.history) {
1200
- return;
1201
- }
1202
- state.history[state.historyIndex] = sample;
1203
- state.historyIndex = (state.historyIndex + 1) % state.history.length;
1204
- if (state.historyFilled < state.history.length) {
1205
- state.historyFilled++;
1206
- }
1207
- }
1208
-
1209
- _updateVoiceConfidence(state) {
1210
- if (!this.voiceEnhancement || !state.history || !state.tempBuffer) {
1211
- state.voiceConfidence += (0 - state.voiceConfidence) * 0.2;
1212
- return state.voiceConfidence;
1213
- }
1214
-
1215
- if (state.historyFilled < state.history.length * 0.6) {
1216
- state.voiceConfidence += (0 - state.voiceConfidence) * 0.15;
1217
- return state.voiceConfidence;
1218
- }
1219
-
1220
- const len = state.history.length;
1221
- let writeIndex = state.historyIndex;
1222
- for (let i = 0; i < len; i++) {
1223
- state.tempBuffer[i] = state.history[writeIndex];
1224
- writeIndex = (writeIndex + 1) % len;
1225
- }
1226
-
1227
- const minLag = 30;
1228
- const maxLag = 240;
1229
- let best = 0;
1230
- for (let lag = minLag; lag <= maxLag; lag += 2) {
1231
- let sum = 0;
1232
- let energyA = 0;
1233
- let energyB = 0;
1234
- for (let i = lag; i < len; i++) {
1235
- const a = state.tempBuffer[i];
1236
- const b = state.tempBuffer[i - lag];
1237
- sum += a * b;
1238
- energyA += a * a;
1239
- energyB += b * b;
1240
- }
1241
- const denom = Math.sqrt(energyA * energyB) + 1e-8;
1242
- const corr = Math.abs(sum) / denom;
1243
- if (corr > best) {
1244
- best = corr;
1245
- }
1246
- }
1247
-
1248
- const normalized = Math.max(
1249
- 0,
1250
- Math.min(1, (best - this.voiceSensitivity) / (1 - this.voiceSensitivity))
1251
- );
1252
- state.voiceConfidence += (normalized - state.voiceConfidence) * 0.2;
1253
- return state.voiceConfidence;
1254
- }
1255
-
1256
- process(inputs, outputs) {
1257
- const input = inputs[0];
1258
- const output = outputs[0];
1259
- if (!input || !output) {
1260
- return true;
1261
- }
1262
-
1263
- for (let channel = 0; channel < output.length; channel++) {
1264
- const inChannel = input[channel];
1265
- const outChannel = output[channel];
1266
- if (!inChannel || !outChannel) {
1267
- continue;
1268
- }
1269
-
1270
- if (!this.enabled) {
1271
- for (let i = 0; i < inChannel.length; i++) {
1272
- outChannel[i] = inChannel[i];
1273
- }
1274
- continue;
1275
- }
1276
-
1277
- const state = this._ensureState(channel);
1278
- const speechPresence = this.voiceEnhancement
1279
- ? this.voiceBoost * state.voiceConfidence
1280
- : 0;
1281
-
1282
- for (let i = 0; i < inChannel.length; i++) {
1283
- const sample = inChannel[i];
1284
- this._pushHistory(state, sample);
1285
- const magnitude = Math.abs(sample);
1286
-
1287
- state.envelope += (magnitude - state.envelope) * this.attack;
1288
-
1289
- if (speechPresence > 0.12 || state.envelope > this.threshold * 1.1) {
1290
- state.graceSamplesRemaining = this.gateGraceSamples;
1291
- state.postSpeechHold = this.postSpeechHoldSamples;
1292
- } else if (state.postSpeechHold > 0) {
1293
- state.postSpeechHold--;
1294
- }
1295
-
1296
- if (state.envelope < this.threshold) {
1297
- state.noise += (state.envelope - state.noise) * this.learnRate;
1298
- state.quietSamples++;
1299
- } else {
1300
- state.quietSamples = 0;
1301
- }
1302
-
1303
- if (state.graceSamplesRemaining > 0 || state.postSpeechHold > 0) {
1304
- state.graceSamplesRemaining--;
1305
- state.isSilenced = false;
1306
- state.silenceSamples = 0;
1307
- state.silenceReleaseCounter = 0;
1308
- } else {
1309
- const belowFloor = state.envelope < this.silenceFloor;
1310
- if (belowFloor && speechPresence < 0.15) {
1311
- state.silenceSamples++;
1312
- } else {
1313
- state.silenceSamples = Math.max(0, state.silenceSamples - 3);
1314
- }
1315
-
1316
- if (!state.isSilenced && state.silenceSamples > this.silenceHoldSamples) {
1317
- state.isSilenced = true;
1318
- state.silenceReleaseCounter = 0;
1319
- }
1320
-
1321
- if (state.isSilenced) {
1322
- const wakeFromEnergy = state.envelope > this.silenceFloor * 1.2;
1323
- const wakeFromVoice = speechPresence > 0.15;
1324
- if (wakeFromEnergy || wakeFromVoice) {
1325
- state.isSilenced = false;
1326
- state.silenceSamples = 0;
1327
- state.silenceReleaseCounter = 0;
1328
- state.postSpeechHold = this.postSpeechHoldSamples;
1329
- state.graceSamplesRemaining = this.gateGraceSamples;
1330
- } else {
1331
- state.silenceReleaseCounter++;
1332
- if (state.silenceReleaseCounter > this.silenceReleaseSamples) {
1333
- state.isSilenced = false;
1334
- state.silenceSamples = 0;
1335
- state.silenceReleaseCounter = 0;
1336
- }
1337
- }
1338
- } else {
1339
- state.silenceReleaseCounter = 0;
1340
- }
1341
- }
1342
-
1343
- const ratio = state.noise / Math.max(state.envelope, 1e-6);
1344
- let gainTarget = 1 - Math.min(0.75, Math.pow(ratio, this.expansionRatio)); // Less aggressive
1345
- gainTarget = Math.max(this.minGainFloor, Math.min(1, gainTarget)); // NEVER below minGainFloor
1346
-
1347
- if (state.quietSamples > this.holdSamples) {
1348
- // Very gentle reduction during quiet - preserve soft speech
1349
- gainTarget = Math.max(this.minGainFloor, gainTarget * (1 - this.maxReduction * 0.2));
1350
- }
1351
-
1352
- const isLowLevelNoise = state.envelope > this.silenceFloor * 2.0 &&
1353
- state.envelope < this.threshold * 0.6;
1354
- const noVoicePresent = speechPresence < 0.08; // More strict voice check
1355
-
1356
- if (isLowLevelNoise && noVoicePresent) {
1357
- state.feedbackCounter++;
1358
- state.avgLevel = state.avgLevel * 0.98 + state.envelope * 0.02; // Slower averaging
1359
- const diff = Math.abs(state.envelope - state.avgLevel);
1360
- state.levelVariance = state.levelVariance * 0.98 + diff * 0.02;
1361
-
1362
- // Only suppress if VERY consistent (feedback is very steady)
1363
- if (state.feedbackCounter > this.feedbackDetectionWindow &&
1364
- state.levelVariance < state.avgLevel * 0.15) {
1365
- state.feedbackSuppress = Math.max(0.3, state.feedbackSuppress - 0.01); // Less aggressive
1366
- }
1367
- } else {
1368
- state.feedbackCounter = Math.max(0, state.feedbackCounter - 5);
1369
- state.feedbackSuppress = Math.min(1.0, state.feedbackSuppress + 0.05);
1370
- }
1371
-
1372
- const reductionFloor = this.voiceEnhancement
1373
- ? 1 - this.maxReduction * (1 - Math.min(1, speechPresence * 0.9))
1374
- : 1 - this.maxReduction;
1375
- // CRITICAL: Never go below minGainFloor
1376
- if (gainTarget < Math.max(reductionFloor, this.minGainFloor)) {
1377
- gainTarget = Math.max(reductionFloor, this.minGainFloor);
1378
- }
1379
-
1380
- gainTarget *= state.feedbackSuppress;
1381
- // Ensure gainTarget never goes below floor after feedback suppression
1382
- gainTarget = Math.max(gainTarget, this.minGainFloor);
1383
-
1384
- const dynamicRelease = this.release *
1385
- (this.voiceEnhancement && speechPresence > 0.1 ? 0.7 : 1);
1386
- state.gain += (gainTarget - state.gain) * dynamicRelease;
1387
- // Final gain floor check
1388
- state.gain = Math.max(state.gain, this.minGainFloor * 0.8);
1389
- let processed = sample * state.gain;
1390
-
1391
- state.lpState = this.hfAlpha * state.lpState + (1 - this.hfAlpha) * processed;
1392
- const lowComponent = state.lpState;
1393
- const highComponent = processed - lowComponent;
1394
-
1395
- state.broadbandEnv += (Math.abs(processed) - state.broadbandEnv) * 0.08; // Slower tracking
1396
- const highEnvDelta = Math.abs(highComponent) - state.highBandEnv;
1397
- const highEnvCoef = highEnvDelta > 0 ? this.highBandAttack : this.highBandRelease;
1398
- state.highBandEnv += highEnvDelta * highEnvCoef;
1399
-
1400
- const hissRatio = Math.min(
1401
- 1,
1402
- Math.abs(highComponent) / (Math.abs(lowComponent) + 1e-5)
1403
- );
1404
- // Less aggressive hiss removal - preserve consonants (S, T, F, etc.)
1405
- const hissGain = 1 - hissRatio * (this.hissCut * (1 - 0.6 * speechPresence));
1406
-
1407
- const highEnvRatio = state.highBandEnv / (state.broadbandEnv + 1e-5);
1408
- const gateAmount = this.highBandGate * Math.max(0, highEnvRatio - speechPresence * 0.6);
1409
- const gatedHigh = highComponent * hissGain * (1 - gateAmount * 0.7); // Less gate
1410
-
1411
- const speechLift = 1 + this.speechBoost * speechPresence;
1412
- processed = lowComponent * speechLift + gatedHigh;
1413
-
1414
- // CRITICAL FIX: Much higher minimum muteGain - never mute below 50%
1415
- // This ensures soft speech and consonants are ALWAYS audible
1416
- const muteTarget = state.isSilenced ? 0.5 : 1;
1417
- const smoothing = state.isSilenced ? 0.03 : 0.25; // Even slower mute transition
1418
- state.muteGain += (muteTarget - state.muteGain) * smoothing;
1419
- processed *= state.muteGain;
1420
-
1421
- outChannel[i] = processed;
1422
- }
1423
-
1424
- this._updateVoiceConfidence(state);
1425
- }
1426
-
1427
- return true;
1428
- }
1429
- }
1430
-
1431
- registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
1432
- `;
1433
- const blob = new Blob([processorSource], {
1434
- type: "application/javascript",
1435
- });
1436
- this.denoiseWorkletUrl = URL.createObjectURL(blob);
1437
- this.denoiseWorkletReady = this.audioContext.audioWorklet
1438
- .addModule(this.denoiseWorkletUrl)
1439
- .catch((error) => {
1440
- this.options.denoiser = {
1441
- ...(this.options.denoiser || {}),
1442
- enabled: false,
1443
- };
1444
- throw error;
1445
- });
1446
- return this.denoiseWorkletReady;
1447
- }
1448
- resolveOptions(options) {
1449
- const distanceDefaults = {
1450
- refDistance: 1.2,
1451
- maxDistance: 15, // Cannot hear beyond 15 meters
1452
- rolloffFactor: 1.35,
1453
- unit: "auto",
1454
- };
1455
- const denoiserDefaults = {
1456
- enabled: false, // DISABLED BY DEFAULT - test to see if denoiser causes audio cutting
1457
- // TUNED FOR MAXIMUM SPEECH CLARITY: Extremely gentle - only remove obvious noise
1458
- // These settings GUARANTEE no speech cutting - minGainFloor=0.5 means at least 50% always passes
1459
- threshold: 0.002, // Very low threshold - catch even soft speech
1460
- noiseFloor: 0.0005, // Very low noise floor
1461
- release: 0.4, // Slower release - preserve word endings completely
1462
- attack: 0.1, // Faster attack - catch speech onset instantly
1463
- holdMs: 200, // Long hold (200ms) - prevent mid-word cuts
1464
- maxReduction: 0.15, // VERY GENTLE - max 15% reduction, never cut speech
1465
- hissCut: 0.15, // Less hiss cut - preserve S, T, F consonants
1466
- expansionRatio: 1.05, // Almost no expansion
1467
- learnRate: 0.03, // Slower learning - don't adapt to speech as noise
1468
- voiceBoost: 0.35,
1469
- voiceSensitivity: 0.1, // Very sensitive to voice detection
1470
- voiceEnhancement: false,
1471
- silenceFloor: 0.0002, // Very low silence floor
1472
- silenceHoldMs: 400, // Much longer before silence gate (400ms)
1473
- silenceReleaseMs: 200, // Slower release from silence
1474
- speechBoost: 0.25,
1475
- highBandGate: 0.15, // Much less aggressive high band gate
1476
- highBandAttack: 0.08,
1477
- highBandRelease: 0.03,
1478
- };
1479
- return {
1480
- distance: {
1481
- refDistance: options?.distance?.refDistance ?? distanceDefaults.refDistance,
1482
- maxDistance: options?.distance?.maxDistance ?? distanceDefaults.maxDistance,
1483
- rolloffFactor: options?.distance?.rolloffFactor ?? distanceDefaults.rolloffFactor,
1484
- unit: options?.distance?.unit ?? distanceDefaults.unit,
1485
- },
1486
- denoiser: {
1487
- enabled: options?.denoiser?.enabled ?? denoiserDefaults.enabled,
1488
- threshold: options?.denoiser?.threshold ?? denoiserDefaults.threshold,
1489
- noiseFloor: options?.denoiser?.noiseFloor ?? denoiserDefaults.noiseFloor,
1490
- release: options?.denoiser?.release ?? denoiserDefaults.release,
1491
- attack: options?.denoiser?.attack ?? denoiserDefaults.attack,
1492
- holdMs: options?.denoiser?.holdMs ?? denoiserDefaults.holdMs,
1493
- maxReduction: options?.denoiser?.maxReduction ?? denoiserDefaults.maxReduction,
1494
- hissCut: options?.denoiser?.hissCut ?? denoiserDefaults.hissCut,
1495
- expansionRatio: options?.denoiser?.expansionRatio ?? denoiserDefaults.expansionRatio,
1496
- learnRate: options?.denoiser?.learnRate ?? denoiserDefaults.learnRate,
1497
- voiceBoost: options?.denoiser?.voiceBoost ?? denoiserDefaults.voiceBoost,
1498
- voiceSensitivity: options?.denoiser?.voiceSensitivity ?? denoiserDefaults.voiceSensitivity,
1499
- voiceEnhancement: options?.denoiser?.voiceEnhancement ?? denoiserDefaults.voiceEnhancement,
1500
- silenceFloor: options?.denoiser?.silenceFloor ?? denoiserDefaults.silenceFloor,
1501
- silenceHoldMs: options?.denoiser?.silenceHoldMs ?? denoiserDefaults.silenceHoldMs,
1502
- silenceReleaseMs: options?.denoiser?.silenceReleaseMs ?? denoiserDefaults.silenceReleaseMs,
1503
- speechBoost: options?.denoiser?.speechBoost ?? denoiserDefaults.speechBoost,
1504
- highBandGate: options?.denoiser?.highBandGate ?? denoiserDefaults.highBandGate,
1505
- highBandAttack: options?.denoiser?.highBandAttack ?? denoiserDefaults.highBandAttack,
1506
- highBandRelease: options?.denoiser?.highBandRelease ?? denoiserDefaults.highBandRelease,
1507
- },
1508
- };
1509
- }
1510
- }
1511
- exports.SpatialAudioManager = SpatialAudioManager;
1512
- SpatialAudioManager.BUILD_STAMP = "2026-01-07";