@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.257 → 1.0.259

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/README.md +45 -1
  2. package/dist/audio/AudioNodeFactory.d.ts +130 -0
  3. package/dist/audio/AudioNodeFactory.js +158 -0
  4. package/dist/audio/AudioPipeline.d.ts +89 -0
  5. package/dist/audio/AudioPipeline.js +138 -0
  6. package/dist/{MLNoiseSuppressor.d.ts → audio/MLNoiseSuppressor.d.ts} +7 -7
  7. package/dist/{MLNoiseSuppressor.js → audio/MLNoiseSuppressor.js} +13 -41
  8. package/dist/audio/index.d.ts +6 -0
  9. package/dist/audio/index.js +22 -0
  10. package/dist/channels/huddle/HuddleChannel.d.ts +87 -0
  11. package/dist/channels/huddle/HuddleChannel.js +152 -0
  12. package/dist/channels/huddle/HuddleTypes.d.ts +85 -0
  13. package/dist/channels/huddle/HuddleTypes.js +25 -0
  14. package/dist/channels/huddle/index.d.ts +5 -0
  15. package/dist/channels/huddle/index.js +21 -0
  16. package/dist/channels/index.d.ts +5 -0
  17. package/dist/channels/index.js +21 -0
  18. package/dist/channels/spatial/SpatialAudioChannel.d.ts +144 -0
  19. package/dist/channels/spatial/SpatialAudioChannel.js +455 -0
  20. package/dist/channels/spatial/SpatialAudioTypes.d.ts +85 -0
  21. package/dist/channels/spatial/SpatialAudioTypes.js +42 -0
  22. package/dist/channels/spatial/index.d.ts +5 -0
  23. package/dist/channels/spatial/index.js +21 -0
  24. package/dist/{EventManager.d.ts → core/EventManager.d.ts} +4 -2
  25. package/dist/{EventManager.js → core/EventManager.js} +5 -3
  26. package/dist/{MediasoupManager.d.ts → core/MediasoupManager.d.ts} +10 -4
  27. package/dist/{MediasoupManager.js → core/MediasoupManager.js} +31 -42
  28. package/dist/core/index.d.ts +5 -0
  29. package/dist/core/index.js +21 -0
  30. package/dist/index.d.ts +2 -2
  31. package/dist/index.js +30 -4
  32. package/dist/sdk/index.d.ts +36 -0
  33. package/dist/sdk/index.js +121 -0
  34. package/dist/types/events.d.ts +154 -0
  35. package/dist/{types.js → types/events.js} +3 -0
  36. package/dist/types/index.d.ts +7 -0
  37. package/dist/types/index.js +23 -0
  38. package/dist/types/participant.d.ts +65 -0
  39. package/dist/types/participant.js +5 -0
  40. package/dist/types/position.d.ts +47 -0
  41. package/dist/types/position.js +9 -0
  42. package/dist/types/room.d.ts +82 -0
  43. package/dist/types/room.js +5 -0
  44. package/dist/utils/audio/clarity-score.d.ts +33 -0
  45. package/dist/utils/audio/clarity-score.js +81 -0
  46. package/dist/utils/audio/index.d.ts +5 -0
  47. package/dist/utils/audio/index.js +21 -0
  48. package/dist/utils/audio/voice-filter.d.ts +30 -0
  49. package/dist/utils/audio/voice-filter.js +70 -0
  50. package/dist/utils/index.d.ts +7 -0
  51. package/dist/utils/index.js +23 -0
  52. package/dist/utils/position/coordinates.d.ts +37 -0
  53. package/dist/utils/position/coordinates.js +61 -0
  54. package/dist/utils/position/index.d.ts +6 -0
  55. package/dist/utils/position/index.js +22 -0
  56. package/dist/utils/position/normalize.d.ts +37 -0
  57. package/dist/utils/position/normalize.js +78 -0
  58. package/dist/utils/position/snap.d.ts +51 -0
  59. package/dist/utils/position/snap.js +81 -0
  60. package/dist/utils/smoothing/gain-smoothing.d.ts +45 -0
  61. package/dist/utils/smoothing/gain-smoothing.js +77 -0
  62. package/dist/utils/smoothing/index.d.ts +5 -0
  63. package/dist/utils/smoothing/index.js +21 -0
  64. package/dist/utils/smoothing/pan-smoothing.d.ts +43 -0
  65. package/dist/utils/smoothing/pan-smoothing.js +85 -0
  66. package/dist/utils/spatial/angle-calc.d.ts +24 -0
  67. package/dist/utils/spatial/angle-calc.js +69 -0
  68. package/dist/utils/spatial/distance-calc.d.ts +33 -0
  69. package/dist/utils/spatial/distance-calc.js +48 -0
  70. package/dist/utils/spatial/gain-calc.d.ts +37 -0
  71. package/dist/utils/spatial/gain-calc.js +52 -0
  72. package/dist/utils/spatial/head-position.d.ts +32 -0
  73. package/dist/utils/spatial/head-position.js +76 -0
  74. package/dist/utils/spatial/index.d.ts +9 -0
  75. package/dist/utils/spatial/index.js +25 -0
  76. package/dist/utils/spatial/listener-calc.d.ts +28 -0
  77. package/dist/utils/spatial/listener-calc.js +74 -0
  78. package/dist/utils/spatial/pan-calc.d.ts +48 -0
  79. package/dist/utils/spatial/pan-calc.js +80 -0
  80. package/package.json +1 -1
  81. package/dist/SpatialAudioManager.d.ts +0 -272
  82. package/dist/SpatialAudioManager.js +0 -1537
  83. package/dist/types.d.ts +0 -73
@@ -1,1537 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.SpatialAudioManager = void 0;
4
- const EventManager_1 = require("./EventManager");
5
- const MLNoiseSuppressor_1 = require("./MLNoiseSuppressor");
6
- class SpatialAudioManager extends EventManager_1.EventManager {
7
- constructor(options) {
8
- super();
9
- this.participantNodes = new Map();
10
- this.monitoringIntervals = new Map();
11
- this.denoiseWorkletReady = null;
12
- this.listenerPosition = { x: 0, y: 0, z: 0 };
13
- this.listenerInitialized = false;
14
- this.listenerDirection = {
15
- forward: { x: 0, y: 1, z: 0 },
16
- up: { x: 0, y: 0, z: 1 },
17
- };
18
- // Unit right-vector derived from listener yaw; used to project sources into listener space.
19
- this.listenerRight = { x: 1, z: 0 };
20
- this.localParticipantId = ''; // Local participant ID (listener)
21
- // MASTER MUTE: Mutes all incoming audio (used before "Join Space" is clicked)
22
- this.isMasterMuted = false;
23
- // PAN SMOOTHING: Prevents random left/right jumping
24
- // Stores the previous smoothed pan value for each participant
25
- this.smoothedPanValues = new Map();
26
- // PAN SMOOTHING: Tracks last applied pan value to skip micro-changes
27
- this.lastPanValue = new Map();
28
- // UPDATE THROTTLE: Tracks last spatial update time to prevent too-frequent updates
29
- this.lastSpatialUpdateTime = new Map();
30
- // DEBUG: Throttle spatial audio debug logs
31
- this._spatialDebugTimes = new Map();
32
- // Smoothing factor: 0.0 = no smoothing (instant), 1.0 = infinite smoothing (never changes)
33
- // 0.35 provides good stability without feeling sluggish
34
- this.panSmoothingFactor = 0.35;
35
- // Minimum pan change threshold - changes smaller than this are ignored to prevent jitter
36
- // 0.02 = 2% of pan range must change to trigger update (filters network jitter)
37
- this.panChangeThreshold = 0.02;
38
- // Dead-zone around center (0) to prevent oscillation when nearly face-to-face
39
- this.panCenterDeadZone = 0.03;
40
- // POSITION SNAPPING: Prevents jitter from micro-movements when "stationary"
41
- // Caches last known "snapped" position for each speaker
42
- this.cachedSpeakerPositions = new Map();
43
- // Cache for last snapped listener position
44
- this.cachedListenerPosition = { x: 0, y: 0, z: 0 };
45
- // Minimum position change (in meters) to trigger recalculation
46
- // 0.30m = 30cm - ignores pixel streaming jitter, physics wobble, breathing
47
- // ALIGNED with server-side 100cm filter - this catches smaller jitter
48
- this.positionSnapThreshold = 0.30;
49
- // GAIN STABILIZATION: Prevents gain fluctuations when distance is stable
50
- // Caches last calculated gain value for each participant
51
- this.cachedGainValues = new Map();
52
- // Minimum gain change (0-1 scale) to trigger update
53
- // 0.05 = 5% change required - filters out tiny distance jitter
54
- this.gainChangeThreshold = 0.05;
55
- // NOTE: Rate limiting variables removed - setTargetAtTime provides sufficient smoothing
56
- // The smoothPanValue() and position snapping handle jitter reduction
57
- // ML Noise Suppressor (TensorFlow.js-based)
58
- this.mlSuppressor = null;
59
- // NOTE: We currently LOAD the TFJS model, but we do not yet run it inside the
60
- // real-time WebAudio graph (AudioWorklet). This flag only indicates the model
61
- // is ready to be used.
62
- this.mlModelReady = false;
63
- this.options = this.resolveOptions(options);
64
- // Use high sample rate for best audio quality
65
- this.audioContext = new AudioContext({ sampleRate: 48000 });
66
- // Master gain - REDUCED to prevent echo/feedback
67
- this.masterGainNode = this.audioContext.createGain();
68
- this.masterGainNode.gain.value = 1.0; // Unity gain for crystal clear audio (was 2.5)
69
- // Compressor for dynamic range control - VERY GENTLE to prevent pumping/echo
70
- this.compressor = this.audioContext.createDynamicsCompressor();
71
- this.compressor.threshold.value = -15; // Even higher threshold = less compression
72
- this.compressor.knee.value = 40; // Soft knee for natural sound
73
- this.compressor.ratio.value = 2.5; // Very gentle ratio (was 3)
74
- this.compressor.attack.value = 0.02; // 20ms attack - very gentle
75
- this.compressor.release.value = 0.25; // 250ms release - smooth recovery
76
- // Connect master chain: masterGain -> compressor -> destination
77
- this.masterGainNode.connect(this.compressor);
78
- this.compressor.connect(this.audioContext.destination);
79
- }
80
- getAudioContext() {
81
- return this.audioContext;
82
- }
83
- /**
84
- * Initialize ML-based noise suppression (TensorFlow.js)
85
- * Falls back to AudioWorklet denoiser if ML initialization fails
86
- */
87
- async initializeMLNoiseSuppression(modelUrl) {
88
- try {
89
- this.mlSuppressor = new MLNoiseSuppressor_1.MLNoiseSuppressor();
90
- await this.mlSuppressor.initialize(modelUrl);
91
- if (this.mlSuppressor.isReady()) {
92
- this.mlModelReady = true;
93
- }
94
- else {
95
- throw new Error('ML processor not ready after initialization');
96
- }
97
- }
98
- catch (error) {
99
- this.mlSuppressor = null;
100
- this.mlModelReady = false;
101
- // AudioWorklet denoiser will be used as fallback (existing behavior)
102
- }
103
- }
104
- /**
105
- * Get current noise suppression mode
106
- */
107
- getNoiseSuppressionMode() {
108
- if (this.isDenoiserEnabled()) {
109
- return 'audioworklet';
110
- }
111
- return 'none';
112
- }
113
- /**
114
- * True if the TFJS model assets were loaded and the model is ready.
115
- * This does NOT mean it is currently processing the live WebAudio stream.
116
- */
117
- isMLModelLoaded() {
118
- return this.mlModelReady && !!this.mlSuppressor?.isReady();
119
- }
120
- /**
121
- * Setup spatial audio for a participant
122
- *
123
- * CRITICAL: Each participant gets their OWN audio processing chain:
124
- * Stream -> Source -> Panner -> Analyser -> Gain -> Compressor -> Output
125
- *
126
- * This ensures:
127
- * - Each voice is positioned independently in 3D space
128
- * - No server-side mixing required
129
- * - Scalable to unlimited participants (browser handles the mixing)
130
- *
131
- * @param participantId Unique ID for this participant
132
- * @param track Audio track from MediaSoup consumer
133
- * @param bypassSpatialization For testing - bypasses 3D positioning
134
- */
135
- async setupSpatialAudioForParticipant(participantId, track, bypassSpatialization = false // Default to false
136
- ) {
137
- if (this.audioContext.state === "suspended") {
138
- await this.audioContext.resume();
139
- }
140
- // Create stream with noise suppression constraints
141
- const stream = new MediaStream([track]);
142
- const source = this.audioContext.createMediaStreamSource(stream);
143
- const panner = this.audioContext.createPanner();
144
- const stereoPanner = this.audioContext.createStereoPanner(); // For stable L/R panning
145
- // Force a predictable mono signal before applying StereoPannerNode.
146
- // If the inbound track is stereo (or becomes stereo in the graph), StereoPannerNode can
147
- // still leak energy into both ears. Downmixing prevents that.
148
- const monoSplitter = this.audioContext.createChannelSplitter(2);
149
- const monoMerger = this.audioContext.createChannelMerger(1);
150
- const monoGainL = this.audioContext.createGain();
151
- const monoGainR = this.audioContext.createGain();
152
- monoGainL.gain.value = 0.5;
153
- monoGainR.gain.value = 0.5;
154
- // CRITICAL: Convert mono back to stereo for StereoPanner
155
- // monoMerger outputs 1 channel, but StereoPanner needs 2 channels
156
- const stereoUpmixer = this.audioContext.createChannelMerger(2);
157
- const analyser = this.audioContext.createAnalyser();
158
- const gain = this.audioContext.createGain();
159
- const proximityGain = this.audioContext.createGain();
160
- let denoiseNode;
161
- // ML-based denoising: Currently prepared but requires AudioWorklet integration
162
- // For now, using traditional AudioWorklet denoiser as it provides real-time processing
163
- // TODO: Integrate ML processor into AudioWorklet for hybrid ML+DSP approach
164
- if (this.isDenoiserEnabled() && typeof this.audioContext.audioWorklet !== "undefined") {
165
- try {
166
- await this.ensureDenoiseWorklet();
167
- denoiseNode = new AudioWorkletNode(this.audioContext, "odyssey-denoise", {
168
- numberOfInputs: 1,
169
- numberOfOutputs: 1,
170
- processorOptions: {
171
- enabled: this.options.denoiser?.enabled !== false,
172
- threshold: this.options.denoiser?.threshold,
173
- noiseFloor: this.options.denoiser?.noiseFloor,
174
- release: this.options.denoiser?.release,
175
- attack: this.options.denoiser?.attack,
176
- holdMs: this.options.denoiser?.holdMs,
177
- maxReduction: this.options.denoiser?.maxReduction,
178
- hissCut: this.options.denoiser?.hissCut,
179
- expansionRatio: this.options.denoiser?.expansionRatio,
180
- learnRate: this.options.denoiser?.learnRate,
181
- voiceBoost: this.options.denoiser?.voiceBoost,
182
- voiceSensitivity: this.options.denoiser?.voiceSensitivity,
183
- voiceEnhancement: this.options.denoiser?.voiceEnhancement,
184
- silenceFloor: this.options.denoiser?.silenceFloor,
185
- silenceHoldMs: this.options.denoiser?.silenceHoldMs,
186
- silenceReleaseMs: this.options.denoiser?.silenceReleaseMs,
187
- speechBoost: this.options.denoiser?.speechBoost,
188
- highBandGate: this.options.denoiser?.highBandGate,
189
- highBandAttack: this.options.denoiser?.highBandAttack,
190
- highBandRelease: this.options.denoiser?.highBandRelease,
191
- },
192
- });
193
- }
194
- catch (error) {
195
- denoiseNode = undefined;
196
- }
197
- }
198
- // Create BiquadFilter nodes for static/noise reduction
199
- // Based on: https://tagdiwalaviral.medium.com/struggles-of-noise-reduction-in-rtc-part-2-2526f8179442
200
- // HIGHPASS FILTER: Remove low-frequency rumble and plosives
201
- // Higher cutoff (100Hz) reduces room boom/echo effect
202
- const highpassFilter = this.audioContext.createBiquadFilter();
203
- highpassFilter.type = "highpass";
204
- highpassFilter.frequency.value = 100; // Cut below 100Hz (removes room boom/rumble)
205
- highpassFilter.Q.value = 0.5; // Very gentle slope - prevents resonance
206
- // LOWPASS FILTER: Remove high-frequency hiss
207
- // Slightly higher for clearer voice
208
- const lowpassFilter = this.audioContext.createBiquadFilter();
209
- lowpassFilter.type = "lowpass";
210
- lowpassFilter.frequency.value = 10000; // Cut above 10kHz (more open sound)
211
- lowpassFilter.Q.value = 0.5; // Low Q prevents ringing/echo
212
- // VOICE BAND EMPHASIS: DISABLED - was causing resonance/bathroom effect
213
- // The peaking filter at 180Hz can cause room-like coloration
214
- const voiceBandFilter = this.audioContext.createBiquadFilter();
215
- voiceBandFilter.type = "peaking";
216
- voiceBandFilter.frequency.value = 180;
217
- voiceBandFilter.Q.value = 0.5;
218
- voiceBandFilter.gain.value = 0; // DISABLED - no boost (was 1dB causing echo)
219
- const dynamicLowpass = this.audioContext.createBiquadFilter();
220
- dynamicLowpass.type = "lowpass";
221
- dynamicLowpass.frequency.value = 12000; // Higher = more natural (was 7500)
222
- dynamicLowpass.Q.value = 0.5; // Low Q prevents ringing
223
- proximityGain.gain.value = 1.0;
224
- // Configure Panner for realistic 3D spatial audio
225
- const distanceConfig = this.getDistanceConfig();
226
- panner.panningModel = "HRTF"; // Head-Related Transfer Function for realistic 3D
227
- panner.distanceModel = "inverse"; // Natural distance falloff
228
- panner.refDistance = distanceConfig.refDistance ?? 1.2;
229
- panner.maxDistance = distanceConfig.maxDistance ?? 15; // Cannot hear beyond 15 meters
230
- panner.rolloffFactor = distanceConfig.rolloffFactor ?? 1.35; // How quickly sound fades with distance
231
- panner.coneInnerAngle = 360; // Omnidirectional sound source
232
- panner.coneOuterAngle = 360;
233
- panner.coneOuterGain = 0.3; // Some sound even outside cone
234
- // Configure gain for individual participant volume control
235
- gain.gain.value = 1.0; // Unity gain for clean audio
236
- // Per-participant LIMITER (not compressor) - only catches peaks
237
- // Compressors can cause pumping/echo - limiter is transparent until clipping
238
- const participantCompressor = this.audioContext.createDynamicsCompressor();
239
- participantCompressor.threshold.value = -6; // Only activate near clipping (was -40!)
240
- participantCompressor.knee.value = 3; // Hard knee = limiter behavior (was 20)
241
- participantCompressor.ratio.value = 20; // High ratio = limiter (was 1.5)
242
- participantCompressor.attack.value = 0.001; // 1ms - catch peaks fast
243
- participantCompressor.release.value = 0.05; // 50ms - fast release prevents pumping
244
- let currentNode = source;
245
- // First apply compressor to tame initial transients (CRITICAL for preventing pops)
246
- currentNode.connect(participantCompressor);
247
- currentNode = participantCompressor;
248
- if (denoiseNode) {
249
- currentNode.connect(denoiseNode);
250
- currentNode = denoiseNode;
251
- }
252
- // Audio chain with voice optimization filters
253
- // Chain: source -> compressor -> [denoise] -> highpass -> voiceBand -> lowpass -> dynamicLowpass -> proximityGain -> panner -> analyser -> gain -> masterGain
254
- currentNode.connect(highpassFilter);
255
- highpassFilter.connect(voiceBandFilter);
256
- voiceBandFilter.connect(lowpassFilter);
257
- lowpassFilter.connect(dynamicLowpass);
258
- dynamicLowpass.connect(proximityGain);
259
- // Base routing (always): proximityGain -> mono downmix -> stereo upmix -> analyser
260
- proximityGain.connect(monoSplitter);
261
- monoSplitter.connect(monoGainL, 0);
262
- monoSplitter.connect(monoGainR, 1);
263
- monoGainL.connect(monoMerger, 0, 0);
264
- monoGainR.connect(monoMerger, 0, 0);
265
- // Convert mono to stereo (same signal on both channels) for StereoPanner
266
- monoMerger.connect(stereoUpmixer, 0, 0); // mono -> left channel
267
- monoMerger.connect(stereoUpmixer, 0, 1); // mono -> right channel
268
- stereoUpmixer.connect(analyser);
269
- // Output routing depends on spatialization mode:
270
- // - Spatial: analyser -> stereoPanner -> gain -> master
271
- // - Non-spatial: analyser -> gain -> master
272
- if (bypassSpatialization) {
273
- analyser.connect(gain);
274
- }
275
- else {
276
- analyser.connect(stereoPanner);
277
- stereoPanner.connect(gain);
278
- }
279
- gain.connect(this.masterGainNode);
280
- this.participantNodes.set(participantId, {
281
- source,
282
- panner,
283
- stereoPanner,
284
- monoSplitter,
285
- monoGainL,
286
- monoGainR,
287
- monoMerger,
288
- stereoUpmixer,
289
- analyser,
290
- gain,
291
- proximityGain,
292
- compressor: participantCompressor,
293
- highpassFilter,
294
- lowpassFilter,
295
- voiceBandFilter,
296
- dynamicLowpass,
297
- denoiseNode,
298
- stream,
299
- });
300
- // Start monitoring audio levels
301
- this.startMonitoring(participantId);
302
- }
303
- startMonitoring(participantId) {
304
- const nodes = this.participantNodes.get(participantId);
305
- if (!nodes)
306
- return;
307
- const { analyser, stream } = nodes;
308
- const dataArray = new Uint8Array(analyser.frequencyBinCount);
309
- // Clear any existing interval for this participant
310
- if (this.monitoringIntervals.has(participantId)) {
311
- clearInterval(this.monitoringIntervals.get(participantId));
312
- }
313
- const interval = setInterval(() => {
314
- analyser.getByteTimeDomainData(dataArray);
315
- let sum = 0;
316
- for (const amplitude of dataArray) {
317
- sum += Math.abs(amplitude - 128);
318
- }
319
- const average = sum / dataArray.length;
320
- const audioLevel = (average / 128) * 255; // Scale to 0-255
321
- // Silent monitoring - no logs needed
322
- }, 2000); // Check every 2 seconds
323
- this.monitoringIntervals.set(participantId, interval);
324
- }
325
- /**
326
- * Toggle spatialization for a participant (for huddle/spatial switching)
327
- * @param participantId The participant to update
328
- * @param enableSpatialization True for spatial audio, false for non-spatial (huddle)
329
- */
330
- setParticipantSpatialization(participantId, enableSpatialization) {
331
- const nodes = this.participantNodes.get(participantId);
332
- if (!nodes) {
333
- return;
334
- }
335
- try {
336
- // Keep input side unchanged; only re-route analyser output.
337
- nodes.analyser.disconnect();
338
- nodes.stereoPanner.disconnect();
339
- if (enableSpatialization) {
340
- nodes.analyser.connect(nodes.stereoPanner);
341
- nodes.stereoPanner.connect(nodes.gain);
342
- }
343
- else {
344
- nodes.analyser.connect(nodes.gain);
345
- }
346
- }
347
- catch (error) {
348
- // Error toggling spatialization - fail silently
349
- }
350
- }
351
- /**
352
- * Update spatial audio position and orientation for a participant
353
- *
354
- * This is called every time we receive position/direction updates from the server.
355
- *
356
- * Position: Where the participant is in 3D space (their location)
357
- * Direction: Which way they're facing (their forward vector)
358
- *
359
- * Example:
360
- * - Position: (x: -200, y: 0, z: 100) = 2m to your left
361
- * - Direction: (x: 0, y: 1, z: 0) = facing forward (away from you)
362
- * - Result: Sound comes from your left, oriented as if speaking away
363
- *
364
- * The Web Audio API's PannerNode uses HRTF to create realistic 3D audio
365
- * based on these parameters plus the listener's position/orientation.
366
- *
367
- * @param participantId Who to update
368
- * @param position Where they are (from socket data)
369
- * @param direction Which way they're facing (from socket data)
370
- */
371
- updateSpatialAudio(participantId, position, direction, _spatialMeta) {
372
- const nodes = this.participantNodes.get(participantId);
373
- if (!nodes) {
374
- return;
375
- }
376
- if (!nodes.panner) {
377
- return;
378
- }
379
- // NOTE: Removed throttle - it was causing audio dropouts
380
- // The smoothing in applyStereoPanning and gain ramping handles click prevention
381
- if (nodes?.panner) {
382
- // POSITION-BASED SPATIAL AUDIO
383
- // All calculations are in world space (datum at 0,0,0)
384
- //
385
- // Step 1: Get speaker head position (with jitter reduction)
386
- const normalizedBodyPosition = this.normalizePositionUnits(position);
387
- // SNAP: Reduce jitter by ignoring micro-movements (<15cm)
388
- const snappedSpeakerPos = this.snapPosition(normalizedBodyPosition, participantId);
389
- const speakerHeadPosition = this.computeHeadPosition(snappedSpeakerPos);
390
- // SNAP: Use cached listener position to reduce jitter
391
- const listenerPos = this.cachedListenerPosition.x !== 0 || this.cachedListenerPosition.z !== 0
392
- ? this.cachedListenerPosition
393
- : this.listenerPosition;
394
- // DEBUG: Log positions for troubleshooting distance issues
395
- const distX = speakerHeadPosition.x - listenerPos.x;
396
- const distY = speakerHeadPosition.y - listenerPos.y;
397
- const distZ = speakerHeadPosition.z - listenerPos.z;
398
- // Step 2: Calculate 3D distance (Euclidean distance from datum-based positions)
399
- // distance = √(Δx² + Δy² + Δz²)
400
- const distance = this.getDistanceBetween(listenerPos, speakerHeadPosition);
401
- // HARD CUTOFF: Fade out completely if beyond max distance (15m)
402
- // This prevents any audio processing for distant participants
403
- const maxDistance = 15.0;
404
- if (distance >= maxDistance) {
405
- // Smooth fade to zero - prevents click (100ms fade)
406
- nodes.gain.gain.setTargetAtTime(0, this.audioContext.currentTime, 0.033);
407
- this.cachedGainValues.set(participantId, 0);
408
- return; // Skip all other processing
409
- }
410
- // Step 3: Calculate relative vector (speaker relative to listener)
411
- // vecToSource = speaker.pos - listener.pos
412
- const vecToSource = {
413
- x: speakerHeadPosition.x - listenerPos.x,
414
- z: speakerHeadPosition.z - listenerPos.z,
415
- };
416
- // Step 4: Project onto listener's right-ear axis using dot product
417
- // dxLocal = vecToSource · listenerRight
418
- // Positive = sound is to the RIGHT of listener
419
- // Negative = sound is to the LEFT of listener
420
- // Near zero = sound is FRONT or BACK (center)
421
- const dxLocal = vecToSource.x * this.listenerRight.x + vecToSource.z * this.listenerRight.z;
422
- // Calculate dzLocal (forward/back component)
423
- // For CLOCKWISE rotation: Forward = right rotated 90° CW: (x,z) -> (-z,x)
424
- // At yaw=0°: right=(1,0) → forward=(0,1)=+Z ✓
425
- // At yaw=90°: right=(0,-1) → forward=(1,0)=+X ✓
426
- const listenerForward = { x: -this.listenerRight.z, z: this.listenerRight.x };
427
- const dzLocal = vecToSource.x * listenerForward.x + vecToSource.z * listenerForward.z;
428
- // TRUE 360° SPATIAL AUDIO PANNING
429
- // Calculate angle from listener to source using atan2
430
- // This gives full left/right separation:
431
- // - 90° (right side) = pan +1.0 (100% RIGHT ear, 0% LEFT ear)
432
- // - 270° (left side) = pan -1.0 (100% LEFT ear, 0% RIGHT ear)
433
- // - 0° (front) or 180° (back) = pan 0.0 (CENTER)
434
- const angleToSource = Math.atan2(dxLocal, dzLocal); // Radians: -π to +π
435
- const rawPanValue = Math.sin(angleToSource); // -1 to +1
436
- // SMOOTH THE PAN VALUE to prevent random left/right jumping
437
- const smoothedPanValue = this.smoothPanValue(participantId, rawPanValue);
438
- const panning = this.panningFromPanValue(smoothedPanValue, dxLocal);
439
- // Calculate gain based on distance
440
- const calculatedGain = this.calculateLogarithmicGain(distance);
441
- const newGainValue = calculatedGain / 100; // Convert to 0-1 range
442
- // SIMPLE GAIN STABILIZATION: Only update if change exceeds threshold
443
- const cachedGain = this.cachedGainValues.get(participantId);
444
- let finalGainValue = newGainValue;
445
- if (cachedGain !== undefined) {
446
- const gainChange = Math.abs(newGainValue - cachedGain);
447
- if (gainChange < this.gainChangeThreshold) {
448
- // Change too small - keep cached value for stability
449
- finalGainValue = cachedGain;
450
- }
451
- else {
452
- // Significant change - update cache
453
- this.cachedGainValues.set(participantId, newGainValue);
454
- }
455
- }
456
- else {
457
- // First time - cache the value
458
- this.cachedGainValues.set(participantId, newGainValue);
459
- }
460
- // Apply panning
461
- this.applyStereoPanning(participantId, panning);
462
- // Apply gain with Web Audio's built-in smoothing (setTargetAtTime)
463
- const currentTime = this.audioContext.currentTime;
464
- try {
465
- // setTargetAtTime provides smooth exponential interpolation
466
- // Time constant 0.1 = ~300ms to settle
467
- nodes.gain.gain.setTargetAtTime(finalGainValue, currentTime, 0.1);
468
- }
469
- catch (err) {
470
- // Fallback: If scheduling fails, set value directly (rare edge case)
471
- nodes.gain.gain.value = finalGainValue;
472
- }
473
- }
474
- }
475
- /**
476
- * Mute or unmute a participant's audio
477
- * Used for channel-based audio routing (huddle vs spatial)
478
- * @param participantId The participant to mute/unmute
479
- * @param muted True to mute, false to unmute
480
- */
481
- setParticipantMuted(participantId, muted) {
482
- const nodes = this.participantNodes.get(participantId);
483
- if (!nodes) {
484
- // Audio nodes don't exist yet - this is normal if called before consumer is set up
485
- return;
486
- }
487
- if (nodes.gain) {
488
- // Smooth ramp to 0 (muted) or 1 (unmuted) - prevents click
489
- // Time constant 0.05 = ~150ms to reach target (3 time constants)
490
- nodes.gain.gain.setTargetAtTime(muted ? 0 : 1, this.audioContext.currentTime, 0.05);
491
- // Update cached gain value when muting/unmuting
492
- this.cachedGainValues.set(participantId, muted ? 0 : 1);
493
- }
494
- }
495
- /**
496
- * Master mute/unmute for ALL audio output
497
- * Use this to prevent audio from playing until user explicitly joins the space.
498
- * @param muted True to mute all audio, false to unmute
499
- */
500
- setMasterMuted(muted) {
501
- this.isMasterMuted = muted;
502
- // Smooth transition for master gain to prevent clicks
503
- const targetGain = muted ? 0 : 1;
504
- this.masterGainNode.gain.setTargetAtTime(targetGain, this.audioContext.currentTime, 0.05 // ~150ms transition
505
- );
506
- }
507
- /**
508
- * Get current master mute state
509
- * @returns True if master mute is enabled
510
- */
511
- getMasterMuted() {
512
- return this.isMasterMuted;
513
- }
514
- /**
515
- * Update listener position and orientation
516
- * The \"listener\" is YOU - where your ears/head are positioned
517
- *
518
- * @param position Your HEAD position (camera position), not body position!
519
- * @param orientation Which way your head is facing (forward and up vectors)
520
- */
521
- setListenerPosition(position, orientation) {
522
- const normalizedPosition = this.normalizePositionUnits(position);
523
- this.applyListenerTransform(normalizedPosition, orientation);
524
- }
525
- /**
526
- * POSITION-ONLY MODE: Set listener HEAD position (no direction needed)
527
- * IMPORTANT: Uses CAMERA position (head) as listener, not body position!
528
- *
529
- * @param listenerPos Player body position (for reference, not used as listener)
530
- * @param cameraPos Camera/HEAD position - THIS is the actual listener position for audio
531
- * @param lookAtPos Look-at position (where camera is pointing) - stored but not used for panning
532
- * @param rot Rotation data (pitch, yaw, roll) - stored but not used for panning
533
- */
534
- setListenerFromLSD(listenerPos, cameraPos, lookAtPos, rot, localParticipantId) {
535
- // Store local participant ID for logging
536
- if (localParticipantId && !this.localParticipantId) {
537
- this.localParticipantId = localParticipantId;
538
- }
539
- // USE CAMERA POSITION AS LISTENER (head position, not body!)
540
- const normalizedListener = this.normalizePositionUnits(cameraPos);
541
- // SNAP: Reduce jitter by ignoring micro-movements (<15cm)
542
- // This prevents gain/pan fluctuation when listener is "stationary"
543
- const snappedListener = this.snapPosition(normalizedListener, 'listener');
544
- // Store listener position (used for X-axis panning and distance calculation)
545
- this.listenerPosition = snappedListener;
546
- // CRITICAL: Derive listener right-vector from yaw (rot.y in degrees)
547
- // This determines which ear hears the sound based on 360-degree rotation
548
- //
549
- // COORDINATE SYSTEM: X=right, Y=up, Z=forward (datum at 0,0,0)
550
- //
551
- // Rotation convention (right-hand rule around Y-axis):
552
- // rot.y = 0° → facing +Z (forward/north), right ear points to +X
553
- // rot.y = 90° → facing +X (right/east), right ear points to -Z
554
- // rot.y = 180° → facing -Z (backward/south), right ear points to -X
555
- // rot.y = 270° → facing -X (left/west), right ear points to +Z
556
- if (rot && typeof rot.y === "number") {
557
- const yawRad = (rot.y * Math.PI) / 180;
558
- // Right ear vector - CLOCKWISE rotation (standard game engine convention)
559
- // At yaw=0: facing +Z, right ear at +X → (1, 0)
560
- // At yaw=90: facing +X, right ear at -Z → (0, -1)
561
- // At yaw=180: facing -Z, right ear at -X → (-1, 0)
562
- // At yaw=270: facing -X, right ear at +Z → (0, 1)
563
- this.listenerRight = {
564
- x: Math.cos(yawRad),
565
- z: -Math.sin(yawRad), // NEGATIVE for clockwise rotation
566
- };
567
- }
568
- else {
569
- // Fallback: assume facing forward with right ear to +X
570
- this.listenerRight = { x: 1, z: 0 };
571
- }
572
- // Position-only mode: No forward vector calculations needed
573
- // Panning is calculated purely from X-axis position difference in updateSpatialAudio()
574
- // LEGACY CODE BELOW - Kept for reference but not executed
575
- if (false) {
576
- const normalizedCamera = this.normalizePositionUnits(cameraPos);
577
- const normalizedLookAt = this.normalizePositionUnits(lookAtPos);
578
- // Calculate forward vector (from camera to look-at point)
579
- const forwardX = normalizedLookAt.x - normalizedCamera.x;
580
- const forwardY = normalizedLookAt.y - normalizedCamera.y;
581
- const forwardZ = normalizedLookAt.z - normalizedCamera.z;
582
- // Normalize forward vector
583
- const forwardLen = Math.sqrt(forwardX * forwardX + forwardY * forwardY + forwardZ * forwardZ);
584
- if (forwardLen < 0.001) {
585
- return;
586
- }
587
- const fwdX = forwardX / forwardLen;
588
- const fwdY = forwardY / forwardLen;
589
- const fwdZ = forwardZ / forwardLen;
590
- // Calculate right vector (cross product of world up and forward)
591
- // Web Audio API uses Y-up coordinate system, Unreal uses Z-up
592
- // We need to transform: Unreal (X,Y,Z) -> WebAudio (X,Z,-Y)
593
- const worldUp = { x: 0, y: 1, z: 0 }; // Web Audio Y-up
594
- const rightX = worldUp.y * fwdZ - worldUp.z * fwdY;
595
- const rightY = worldUp.z * fwdX - worldUp.x * fwdZ;
596
- const rightZ = worldUp.x * fwdY - worldUp.y * fwdX;
597
- const rightLen = Math.sqrt(rightX * rightX + rightY * rightY + rightZ * rightZ);
598
- if (rightLen < 0.001) {
599
- // Forward is parallel to world up, use fallback
600
- this.applyListenerTransform(normalizedListener, {
601
- forwardX: fwdX,
602
- forwardY: fwdY,
603
- forwardZ: fwdZ,
604
- upX: 0,
605
- upY: 1,
606
- upZ: 0,
607
- });
608
- return;
609
- }
610
- const rX = rightX / rightLen;
611
- const rY = rightY / rightLen;
612
- const rZ = rightZ / rightLen;
613
- // Calculate true up vector (cross product of forward and right)
614
- const upX = fwdY * rZ - fwdZ * rY;
615
- const upY = fwdZ * rX - fwdX * rZ;
616
- const upZ = fwdX * rY - fwdY * rX;
617
- this.applyListenerTransform(normalizedListener, {
618
- forwardX: fwdX,
619
- forwardY: fwdY,
620
- forwardZ: fwdZ,
621
- upX,
622
- upY,
623
- upZ,
624
- });
625
- } // End of legacy code block
626
- }
627
- applyListenerTransform(normalizedPosition, orientation) {
628
- const { listener } = this.audioContext;
629
- if (!listener) {
630
- return;
631
- }
632
- this.listenerPosition = { ...normalizedPosition };
633
- this.listenerInitialized = true;
634
- this.listenerDirection = {
635
- forward: {
636
- x: orientation.forwardX,
637
- y: orientation.forwardY,
638
- z: orientation.forwardZ,
639
- },
640
- up: {
641
- x: orientation.upX,
642
- y: orientation.upY,
643
- z: orientation.upZ,
644
- },
645
- };
646
- if (listener.positionX) {
647
- listener.positionX.setValueAtTime(normalizedPosition.x, this.audioContext.currentTime);
648
- listener.positionY.setValueAtTime(normalizedPosition.y, this.audioContext.currentTime);
649
- listener.positionZ.setValueAtTime(normalizedPosition.z, this.audioContext.currentTime);
650
- }
651
- if (listener.forwardX) {
652
- listener.forwardX.setValueAtTime(orientation.forwardX, this.audioContext.currentTime);
653
- listener.forwardY.setValueAtTime(orientation.forwardY, this.audioContext.currentTime);
654
- listener.forwardZ.setValueAtTime(orientation.forwardZ, this.audioContext.currentTime);
655
- listener.upX.setValueAtTime(orientation.upX, this.audioContext.currentTime);
656
- listener.upY.setValueAtTime(orientation.upY, this.audioContext.currentTime);
657
- listener.upZ.setValueAtTime(orientation.upZ, this.audioContext.currentTime);
658
- }
659
- }
660
- removeParticipant(participantId) {
661
- // Stop monitoring
662
- if (this.monitoringIntervals.has(participantId)) {
663
- clearInterval(this.monitoringIntervals.get(participantId));
664
- this.monitoringIntervals.delete(participantId);
665
- }
666
- const nodes = this.participantNodes.get(participantId);
667
- if (nodes) {
668
- nodes.source.disconnect();
669
- nodes.panner.disconnect();
670
- nodes.proximityGain.disconnect();
671
- nodes.monoSplitter.disconnect();
672
- nodes.monoGainL.disconnect();
673
- nodes.monoGainR.disconnect();
674
- nodes.monoMerger.disconnect();
675
- nodes.stereoUpmixer.disconnect();
676
- nodes.stereoPanner.disconnect();
677
- nodes.analyser.disconnect();
678
- nodes.gain.disconnect();
679
- if (nodes.denoiseNode) {
680
- nodes.denoiseNode.disconnect();
681
- }
682
- // DON'T stop tracks - they're managed by MediaSoup
683
- // Just disconnect the audio nodes from the graph
684
- // nodes.stream.getTracks().forEach((track) => track.stop());
685
- this.participantNodes.delete(participantId);
686
- // Clean up smoothed pan value tracking
687
- this.smoothedPanValues.delete(participantId);
688
- // Clean up cached gain values
689
- this.cachedGainValues.delete(participantId);
690
- // Clean up cached speaker position
691
- this.cachedSpeakerPositions.delete(participantId);
692
- }
693
- }
694
- async resumeAudioContext() {
695
- if (this.audioContext.state === "suspended") {
696
- await this.audioContext.resume();
697
- }
698
- }
699
- getAudioContextState() {
700
- return this.audioContext.state;
701
- }
702
- getDistanceConfig() {
703
- return {
704
- refDistance: this.options.distance?.refDistance ?? 0.5, // Normal volume within 0.5m (very close)
705
- maxDistance: this.options.distance?.maxDistance ?? 15, // Cannot hear after 15 meters
706
- rolloffFactor: this.options.distance?.rolloffFactor ?? 2.5, // Aggressive falloff for realistic distance attenuation
707
- unit: this.options.distance?.unit ?? "auto",
708
- };
709
- }
710
- applySpatialBoostIfNeeded(position) {
711
- if (!this.listenerInitialized) {
712
- return position;
713
- }
714
- const boost = (this.options.distance?.rolloffFactor || 1) * 0.85;
715
- if (!isFinite(boost) || boost <= 1.01) {
716
- return position;
717
- }
718
- const listener = this.listenerPosition;
719
- return {
720
- x: listener.x + (position.x - listener.x) * boost,
721
- y: listener.y + (position.y - listener.y) * Math.min(boost, 1.2),
722
- z: listener.z + (position.z - listener.z) * boost,
723
- };
724
- }
725
- getDistanceBetween(a, b) {
726
- const dx = b.x - a.x;
727
- const dy = b.y - a.y;
728
- const dz = b.z - a.z;
729
- return Math.sqrt(dx * dx + dy * dy + dz * dz);
730
- }
731
- calculateDistanceGain(config, distance) {
732
- if (!this.listenerInitialized) {
733
- return 1;
734
- }
735
- if (distance <= config.refDistance) {
736
- return 1;
737
- }
738
- if (distance >= config.maxDistance) {
739
- return 0;
740
- }
741
- const normalized = (distance - config.refDistance) /
742
- Math.max(config.maxDistance - config.refDistance, 0.001);
743
- const shaped = Math.pow(Math.max(0, 1 - normalized), Math.max(1.2, config.rolloffFactor * 1.05));
744
- return Math.min(1, Math.max(0.01, shaped));
745
- }
746
- normalizePositionUnits(position) {
747
- const distanceConfig = this.getDistanceConfig();
748
- if (distanceConfig.unit === "meters") {
749
- return { ...position };
750
- }
751
- if (distanceConfig.unit === "centimeters") {
752
- return {
753
- x: position.x / 100,
754
- y: position.y / 100,
755
- z: position.z / 100,
756
- };
757
- }
758
- const maxAxis = Math.max(Math.abs(position.x), Math.abs(position.y), Math.abs(position.z));
759
- if (maxAxis > 50) {
760
- // Likely centimeters coming from server
761
- return {
762
- x: position.x / 100,
763
- y: position.y / 100,
764
- z: position.z / 100,
765
- };
766
- }
767
- return { ...position };
768
- }
769
- /**
770
- * Snap position to grid to prevent jitter from micro-movements
771
- * If the position hasn't changed significantly, return the cached position
772
- * This prevents gain/pan fluctuation when users are "stationary"
773
- *
774
- * @param position New incoming position
775
- * @param participantId Participant ID for caching (use 'listener' for listener)
776
- * @returns Snapped position (either new or cached)
777
- */
778
- snapPosition(position, participantId) {
779
- const isListener = participantId === 'listener';
780
- const cached = isListener
781
- ? this.cachedListenerPosition
782
- : this.cachedSpeakerPositions.get(participantId);
783
- // If no cached position, use this one as the baseline
784
- if (!cached || (cached.x === 0 && cached.y === 0 && cached.z === 0)) {
785
- if (isListener) {
786
- this.cachedListenerPosition = { ...position };
787
- }
788
- else {
789
- this.cachedSpeakerPositions.set(participantId, { ...position });
790
- }
791
- return position;
792
- }
793
- // Calculate how far the position has moved
794
- const dx = position.x - cached.x;
795
- const dy = position.y - cached.y;
796
- const dz = position.z - cached.z;
797
- const movedDistance = Math.sqrt(dx * dx + dy * dy + dz * dz);
798
- // If moved more than threshold, update cache and use new position
799
- if (movedDistance > this.positionSnapThreshold) {
800
- if (isListener) {
801
- this.cachedListenerPosition = { ...position };
802
- }
803
- else {
804
- this.cachedSpeakerPositions.set(participantId, { ...position });
805
- }
806
- return position;
807
- }
808
- // Position hasn't changed significantly - return cached position
809
- // This prevents jitter in gain/pan calculations
810
- return cached;
811
- }
812
- getVectorFromListener(targetPosition) {
813
- if (!this.listenerInitialized) {
814
- return { ...targetPosition };
815
- }
816
- return {
817
- x: targetPosition.x - this.listenerPosition.x,
818
- y: targetPosition.y - this.listenerPosition.y,
819
- z: targetPosition.z - this.listenerPosition.z,
820
- };
821
- }
822
- applyDirectionalSuppression(participantId, distance, vectorToSource) {
823
- const nodes = this.participantNodes.get(participantId);
824
- if (!nodes) {
825
- return;
826
- }
827
- const clarityScore = this.calculateClarityScore(distance, vectorToSource);
828
- const targetGain = 0.48 + clarityScore * 0.72; // 0.48 → 1.20
829
- // Only adjust gain based on angle, not frequency
830
- nodes.proximityGain.gain.setTargetAtTime(targetGain, this.audioContext.currentTime, 0.08);
831
- // Analyze voice and adjust highpass filter dynamically (85-300Hz)
832
- this.adjustVoiceAdaptiveFilter(participantId);
833
- }
834
- /**
835
- * Dynamically adjust highpass filter based on voice characteristics
836
- * Analyzes audio spectrum and sets filter between 85-300Hz
837
- */
838
- adjustVoiceAdaptiveFilter(participantId) {
839
- const nodes = this.participantNodes.get(participantId);
840
- if (!nodes?.analyser) {
841
- return;
842
- }
843
- const bufferLength = nodes.analyser.frequencyBinCount;
844
- const dataArray = new Uint8Array(bufferLength);
845
- nodes.analyser.getByteFrequencyData(dataArray);
846
- // Calculate spectral centroid in low frequency range (0-500Hz)
847
- const sampleRate = this.audioContext.sampleRate;
848
- const nyquist = sampleRate / 2;
849
- const binWidth = nyquist / bufferLength;
850
- let weightedSum = 0;
851
- let totalEnergy = 0;
852
- const maxBin = Math.floor(500 / binWidth); // Only analyze up to 500Hz
853
- for (let i = 0; i < Math.min(maxBin, bufferLength); i++) {
854
- const frequency = i * binWidth;
855
- const magnitude = dataArray[i] / 255.0;
856
- weightedSum += frequency * magnitude;
857
- totalEnergy += magnitude;
858
- }
859
- if (totalEnergy > 0.01) {
860
- const centroid = weightedSum / totalEnergy;
861
- // Map centroid to highpass frequency (85-300Hz)
862
- // Lower centroid = deeper voice = use lower highpass (preserve bass)
863
- // Higher centroid = higher voice = use higher highpass (remove mud)
864
- const targetFreq = Math.max(85, Math.min(300, 85 + (centroid - 100) * 0.5));
865
- nodes.highpassFilter.frequency.setTargetAtTime(targetFreq, this.audioContext.currentTime, 0.15 // Smooth transition
866
- );
867
- }
868
- }
869
- calculateClarityScore(distance, vectorToSource) {
870
- const proximityWeight = this.calculateProximityWeight(distance);
871
- const focusWeight = this.calculateDirectionFocus(vectorToSource);
872
- return this.clamp(0.2 + proximityWeight * 0.6 + focusWeight * 0.2, 0, 1);
873
- }
874
- calculateProximityWeight(distance) {
875
- const closeRange = 0.85;
876
- const fadeRange = 18;
877
- if (distance <= closeRange) {
878
- return 1;
879
- }
880
- if (distance >= fadeRange) {
881
- return 0;
882
- }
883
- return 1 - (distance - closeRange) / (fadeRange - closeRange);
884
- }
885
- calculateDirectionFocus(vectorToSource) {
886
- if (!this.listenerInitialized) {
887
- return 0.5;
888
- }
889
- const forward = this.normalizeVector(this.listenerDirection.forward);
890
- const source = this.normalizeVector(vectorToSource, { x: 0, y: 0, z: 1 });
891
- const dot = forward.x * source.x + forward.y * source.y + forward.z * source.z;
892
- return this.clamp((dot + 1) / 2, 0, 1);
893
- }
894
- normalizeVector(vector, fallback = { x: 0, y: 0, z: 1 }) {
895
- const length = Math.hypot(vector.x, vector.y, vector.z);
896
- if (length < 1e-4) {
897
- return { ...fallback };
898
- }
899
- return {
900
- x: vector.x / length,
901
- y: vector.y / length,
902
- z: vector.z / length,
903
- };
904
- }
905
- clamp(value, min, max) {
906
- return Math.min(max, Math.max(min, value));
907
- }
908
- isDenoiserEnabled() {
909
- // DISABLED BY DEFAULT: The AudioWorklet denoiser causes "chit-chit/pit" noise artifacts
910
- // Set options.denoiser.enabled = true explicitly to enable
911
- return this.options.denoiser?.enabled === true;
912
- }
913
- /**
914
- * Compute estimated head/mouth position from body position
915
- * Body position is typically at feet/base - add head height offset
916
- * Average human head height: 1.6m (adjustable based on avatar)
917
- */
918
- computeHeadPosition(bodyPosition) {
919
- return {
920
- x: bodyPosition.x,
921
- y: bodyPosition.y + 1.6, // Add typical head height in meters
922
- z: bodyPosition.z,
923
- };
924
- }
925
- calculatePanning(angle) {
926
- const rad = (angle * Math.PI) / 180;
927
- const panValue = Math.sin(rad);
928
- return this.panningFromPanValue(panValue);
929
- }
930
- panningFromPanValue(pan, dxLocal) {
931
- const clamped = this.clamp(pan, -1, 1);
932
- // Map pan to asymmetric gains while keeping center at 100/100
933
- const left = 100 * (1 - Math.max(0, clamped));
934
- const right = 100 * (1 + Math.min(0, clamped));
935
- return {
936
- left: this.clamp(left, 0, 100),
937
- right: this.clamp(right, 0, 100),
938
- };
939
- }
940
- /**
941
- * SMOOTH PAN VALUE to prevent random left/right jumping
942
- * Uses exponential moving average (EMA) to smooth out jittery position data
943
- * With SNAP behavior for large direction changes (e.g., turning around)
944
- * With DEAD-ZONE around center to prevent face-to-face oscillation
945
- * @param participantId The participant to smooth pan for
946
- * @param newPanValue The new calculated pan value (-1 to +1)
947
- * @returns Smoothed pan value
948
- */
949
- smoothPanValue(participantId, newPanValue) {
950
- const previousPan = this.smoothedPanValues.get(participantId);
951
- // DEAD-ZONE: If new pan is very close to center, snap to exactly 0
952
- // This prevents left/right oscillation when standing face-to-face
953
- let targetPan = newPanValue;
954
- if (Math.abs(newPanValue) < this.panCenterDeadZone) {
955
- targetPan = 0;
956
- }
957
- // If no previous value, initialize with current value
958
- if (previousPan === undefined) {
959
- this.smoothedPanValues.set(participantId, targetPan);
960
- return targetPan;
961
- }
962
- // Calculate the change from previous pan
963
- const panChange = Math.abs(targetPan - previousPan);
964
- // If change is below threshold, keep previous value (prevents micro-jitter)
965
- if (panChange < this.panChangeThreshold) {
966
- return previousPan;
967
- }
968
- // HEAVY SMOOTHING: Always use high smoothing factor to prevent audio disruption
969
- // Even for large changes, we want smooth transitions to avoid audio dropouts
970
- // The StereoPannerNode's setTargetAtTime provides additional smoothing
971
- let effectiveSmoothingFactor = 0.7; // High smoothing = slow response but stable audio
972
- const signFlipped = (previousPan > 0 && targetPan < 0) || (previousPan < 0 && targetPan > 0);
973
- const bothNearCenter = Math.abs(previousPan) < 0.2 && Math.abs(targetPan) < 0.2;
974
- // Only reduce smoothing for center transitions (natural movement)
975
- if (bothNearCenter) {
976
- // Near center - use moderate smoothing
977
- effectiveSmoothingFactor = 0.5;
978
- }
979
- else if (signFlipped && panChange > 1.0) {
980
- // FULL FLIP (e.g., +0.67 to -0.82) - this is likely jitter, use HEAVY smoothing
981
- // to reject the outlier and keep audio stable
982
- effectiveSmoothingFactor = 0.85; // Very heavy smoothing for suspected jitter
983
- }
984
- // Apply exponential moving average smoothing
985
- // smoothedValue = previousValue * factor + newValue * (1 - factor)
986
- // Higher factor = more smoothing (slower response)
987
- const smoothedPan = previousPan * effectiveSmoothingFactor + targetPan * (1 - effectiveSmoothingFactor);
988
- // Apply dead-zone to final smoothed value as well
989
- const finalPan = Math.abs(smoothedPan) < this.panCenterDeadZone ? 0 : smoothedPan;
990
- // Store for next update
991
- this.smoothedPanValues.set(participantId, finalPan);
992
- return finalPan;
993
- }
994
- computeAzimuthFromPositions(listener, source) {
995
- const vx = source.x - listener.x;
996
- const vz = source.z - listener.z;
997
- const angleRad = Math.atan2(vx, vz); // 0° = forward (Z+), 90° = right (X+)
998
- const deg = (angleRad * 180) / Math.PI;
999
- const normalized = (deg + 360) % 360;
1000
- return normalized;
1001
- }
1002
- /**
1003
- * OLD METHOD - Kept for reference but not used in position-only mode
1004
- * Calculate angle between listener and sound source in degrees (0-360)
1005
- * 0° = front, 90° = right, 180° = back, 270° = left
1006
- */
1007
- calculateAngle(listenerPos, sourcePos, listenerForward) {
1008
- // IMPORTANT: In Odyssey runtime coords, Y behaves like height.
1009
- // Use X/Z as the horizontal plane for 360° panning.
1010
- const vx = sourcePos.x - listenerPos.x;
1011
- const vz = sourcePos.z - listenerPos.z;
1012
- // Project listener forward onto X/Z plane (prevents flips when looking up/down).
1013
- let fx = listenerForward.x;
1014
- let fz = listenerForward.z;
1015
- const fLen = Math.hypot(fx, fz);
1016
- if (fLen < 1e-4) {
1017
- // Fallback: if forward is nearly vertical/invalid, assume world-forward.
1018
- fx = 0;
1019
- fz = 1;
1020
- }
1021
- else {
1022
- fx /= fLen;
1023
- fz /= fLen;
1024
- }
1025
- const vLen = Math.hypot(vx, vz);
1026
- if (vLen < 1e-4) {
1027
- return 0;
1028
- }
1029
- const nx = vx / vLen;
1030
- const nz = vz / vLen;
1031
- // Signed angle from forward->source.
1032
- // cross(forward, source) sign determines left/right.
1033
- // We want: 0°=front, 90°=right, 180°=back, 270°=left.
1034
- const dot = fx * nx + fz * nz;
1035
- const cross = fx * nz - fz * nx;
1036
- let radians = Math.atan2(cross, dot);
1037
- // atan2 gives [-π, π]; map to [0, 2π)
1038
- if (radians < 0) {
1039
- radians += Math.PI * 2;
1040
- }
1041
- const degrees = (radians * 180) / Math.PI;
1042
- // Convert to our convention where +90 is to the right.
1043
- // The cross sign above produces +90 for left in a standard XZ system,
1044
- // so we invert.
1045
- const angle = (360 - degrees) % 360;
1046
- return angle;
1047
- }
1048
- /**
1049
- * Calculate gain based on distance using logarithmic scale
1050
- * Distance range: 0.5m to 15m
1051
- * Gain range: 100% to 20% (reduced minimum for audibility at distance)
1052
- * Uses smoother curve for more consistent audio across distances
1053
- *
1054
- * TUNED FOR CONSISTENT VOICE CLARITY:
1055
- * - Near range (0-3m): Full volume for clear conversation
1056
- * - Medium range (3-8m): Gradual falloff, still easily audible
1057
- * - Far range (8-15m): Soft but still present
1058
- */
1059
- /**
1060
- * Calculate gain based on distance - AGGRESSIVE FALLOFF
1061
- *
1062
- * Uses inverse-square law (realistic sound propagation) with floor:
1063
- * Gain = 1 / (1 + k * distance²)
1064
- *
1065
- * Distance → Gain mapping:
1066
- * - 0-1m → 100% (full volume, very close)
1067
- * - 2m → ~80%
1068
- * - 3m → ~55%
1069
- * - 5m → ~30%
1070
- * - 8m → ~15%
1071
- * - 10m+ → 5% (minimum, barely audible)
1072
- */
1073
- calculateLogarithmicGain(distance) {
1074
- const minDistance = 1.0; // Full volume at 1m or closer
1075
- const minGain = 15; // Minimum 15% at far distances (still audible)
1076
- const falloffRate = 0.12; // Controls how fast volume drops (gentler)
1077
- // Full volume within minimum distance
1078
- if (distance <= minDistance)
1079
- return 100;
1080
- // Inverse square falloff: gain = 1 / (1 + k * d²)
1081
- // This models realistic sound propagation in air
1082
- const effectiveDistance = distance - minDistance;
1083
- const attenuation = 1 / (1 + falloffRate * effectiveDistance * effectiveDistance);
1084
- // Scale to percentage range: minGain to 100
1085
- const gain = minGain + attenuation * (100 - minGain);
1086
- return Math.round(gain);
1087
- }
1088
- // NOTE: smoothGainValue removed - Web Audio's setTargetAtTime provides sufficient smoothing
1089
- // The previous rate-limiting approach was causing gain to get stuck at low values
1090
- /**
1091
- * Apply stereo panning to participant audio using StereoPannerNode
1092
- * This provides STABLE left-right panning without jitter
1093
- */
1094
- applyStereoPanning(participantId, panning) {
1095
- const nodes = this.participantNodes.get(participantId);
1096
- if (!nodes?.stereoPanner)
1097
- return;
1098
- // Convert left/right percentages to pan value (-1 to +1)
1099
- // If left=100, right=0 → pan = -1 (full left)
1100
- // If left=0, right=100 → pan = +1 (full right)
1101
- // If left=100, right=100 → pan = 0 (center)
1102
- const leftRatio = panning.left / 100;
1103
- const rightRatio = panning.right / 100;
1104
- // Calculate pan position
1105
- let panValue = 0;
1106
- if (leftRatio + rightRatio > 0) {
1107
- panValue = (rightRatio - leftRatio);
1108
- }
1109
- // NO RATE LIMITING - setTargetAtTime provides smooth transitions
1110
- // Rate limiting was causing sluggish panning when turning quickly
1111
- // The smoothPanValue() function already handles jitter reduction
1112
- // Apply pan with smooth transition
1113
- const currentTime = this.audioContext.currentTime;
1114
- // Use shorter time constant (0.05 = ~150ms to settle) for responsive panning
1115
- // This is smooth enough to prevent clicks but fast enough to track movement
1116
- nodes.stereoPanner.pan.setTargetAtTime(panValue, currentTime, 0.05);
1117
- }
1118
- async ensureDenoiseWorklet() {
1119
- if (!this.isDenoiserEnabled()) {
1120
- return;
1121
- }
1122
- if (!("audioWorklet" in this.audioContext)) {
1123
- this.options.denoiser = {
1124
- ...(this.options.denoiser || {}),
1125
- enabled: false,
1126
- };
1127
- return;
1128
- }
1129
- if (this.denoiseWorkletReady) {
1130
- return this.denoiseWorkletReady;
1131
- }
1132
- const processorSource = `class OdysseyDenoiseProcessor extends AudioWorkletProcessor {
1133
- constructor(options) {
1134
- super();
1135
- const cfg = (options && options.processorOptions) || {};
1136
- this.enabled = cfg.enabled !== false;
1137
- // TUNED FOR MAXIMUM VOICE CLARITY: Extremely gentle - NEVER cut speech
1138
- this.threshold = this._sanitize(cfg.threshold, 0.0005, 0.05, 0.002); // Very low threshold
1139
- this.noiseFloor = this._sanitize(cfg.noiseFloor, 0.0001, 0.02, 0.0005); // Very low noise floor
1140
- this.attack = this._sanitize(cfg.attack, 0.01, 0.9, 0.15); // Faster attack to catch speech onset
1141
- this.release = this._sanitize(cfg.release, 0.01, 0.95, 0.35); // Slower release - CRITICAL for word endings
1142
- this.holdSamples = Math.max(
1143
- 8,
1144
- Math.round(
1145
- sampleRate * this._sanitize(cfg.holdMs, 10, 600, 200) / 1000 // LONGER hold (200ms) - prevents mid-word cuts
1146
- )
1147
- );
1148
- // maxReduction: VERY GENTLE - only reduce obvious noise, never speech
1149
- this.maxReduction = this._sanitize(cfg.maxReduction, 0.05, 0.5, 0.15); // CRITICAL: Very low = preserve all speech
1150
- this.hissCut = this._sanitize(cfg.hissCut, 0, 1, 0.2); // Less aggressive hiss cut
1151
- this.expansionRatio = this._sanitize(cfg.expansionRatio, 1.05, 3, 1.1); // Very gentle expansion
1152
- this.learnRate = this._sanitize(cfg.learnRate, 0.001, 0.2, 0.04); // Slower learning
1153
- this.voiceBoost = this._sanitize(cfg.voiceBoost, 0, 1, 0.3);
1154
- this.voiceSensitivity = this._sanitize(cfg.voiceSensitivity, 0.02, 0.9, 0.15); // More sensitive to voice
1155
- this.voiceEnhancement = cfg.voiceEnhancement === true;
1156
- this.speechBoost = this._sanitize(cfg.speechBoost, 0, 1.5, 0.2);
1157
- this.highBandGate = this._sanitize(cfg.highBandGate, 0, 1, 0.25); // Less aggressive high band gate
1158
- this.highBandAttack = this._sanitize(cfg.highBandAttack, 0.01, 1, 0.1);
1159
- this.highBandRelease = this._sanitize(cfg.highBandRelease, 0.01, 1, 0.05);
1160
- this.silenceFloor = this._sanitize(cfg.silenceFloor, 0.0001, 0.01, 0.0004); // Much lower
1161
- this.gateGraceSamples = Math.round(sampleRate * 0.4); // Longer grace period (400ms)
1162
- this.postSpeechHoldSamples = Math.round(sampleRate * 0.35); // Longer post-speech hold (350ms)
1163
- this.silenceHoldSamples = Math.max(
1164
- 8,
1165
- Math.round(
1166
- sampleRate * this._sanitize(cfg.silenceHoldMs, 80, 2000, 300) / 1000 // Much longer silence hold
1167
- )
1168
- );
1169
- this.silenceReleaseSamples = Math.max(
1170
- 8,
1171
- Math.round(
1172
- sampleRate * this._sanitize(cfg.silenceReleaseMs, 50, 1000, 150) / 1000 // Longer release
1173
- )
1174
- );
1175
- this.historySize = this.voiceEnhancement ? 512 : 0;
1176
- this.channelState = [];
1177
- this.hfAlpha = Math.exp(-2 * Math.PI * 3200 / sampleRate);
1178
- this.feedbackDetectionWindow = Math.round(sampleRate * 0.8); // Longer feedback window
1179
- this.feedbackSamples = 0;
1180
- this.feedbackActive = false;
1181
- // NEW: Minimum gain floor to NEVER completely mute
1182
- // RAISED from 0.25 to 0.5 to preserve ALL speech
1183
- this.minGainFloor = 0.5; // Always let at least 50% through - preserves soft consonants
1184
- }
1185
-
1186
- _sanitize(value, min, max, fallback) {
1187
- if (typeof value !== 'number' || !isFinite(value)) {
1188
- return fallback;
1189
- }
1190
- return Math.min(max, Math.max(min, value));
1191
- }
1192
-
1193
- _ensureState(index) {
1194
- if (!this.channelState[index]) {
1195
- this.channelState[index] = {
1196
- envelope: this.noiseFloor,
1197
- noise: this.noiseFloor,
1198
- gain: 1,
1199
- quietSamples: 0,
1200
- lpState: 0,
1201
- history: this.voiceEnhancement ? new Float32Array(this.historySize) : null,
1202
- historyIndex: 0,
1203
- historyFilled: 0,
1204
- tempBuffer: this.voiceEnhancement ? new Float32Array(this.historySize) : null,
1205
- voiceConfidence: 0,
1206
- silenceSamples: 0,
1207
- silenceReleaseCounter: 0,
1208
- isSilenced: false,
1209
- muteGain: 1,
1210
- graceSamplesRemaining: this.gateGraceSamples,
1211
- postSpeechHold: 0,
1212
- highBandEnv: this.silenceFloor,
1213
- broadbandEnv: this.silenceFloor,
1214
- feedbackCounter: 0,
1215
- feedbackSuppress: 1.0,
1216
- avgLevel: 0,
1217
- levelVariance: 0,
1218
- };
1219
- }
1220
- return this.channelState[index];
1221
- }
1222
-
1223
- _pushHistory(state, sample) {
1224
- if (!this.voiceEnhancement || !state.history) {
1225
- return;
1226
- }
1227
- state.history[state.historyIndex] = sample;
1228
- state.historyIndex = (state.historyIndex + 1) % state.history.length;
1229
- if (state.historyFilled < state.history.length) {
1230
- state.historyFilled++;
1231
- }
1232
- }
1233
-
1234
- _updateVoiceConfidence(state) {
1235
- if (!this.voiceEnhancement || !state.history || !state.tempBuffer) {
1236
- state.voiceConfidence += (0 - state.voiceConfidence) * 0.2;
1237
- return state.voiceConfidence;
1238
- }
1239
-
1240
- if (state.historyFilled < state.history.length * 0.6) {
1241
- state.voiceConfidence += (0 - state.voiceConfidence) * 0.15;
1242
- return state.voiceConfidence;
1243
- }
1244
-
1245
- const len = state.history.length;
1246
- let writeIndex = state.historyIndex;
1247
- for (let i = 0; i < len; i++) {
1248
- state.tempBuffer[i] = state.history[writeIndex];
1249
- writeIndex = (writeIndex + 1) % len;
1250
- }
1251
-
1252
- const minLag = 30;
1253
- const maxLag = 240;
1254
- let best = 0;
1255
- for (let lag = minLag; lag <= maxLag; lag += 2) {
1256
- let sum = 0;
1257
- let energyA = 0;
1258
- let energyB = 0;
1259
- for (let i = lag; i < len; i++) {
1260
- const a = state.tempBuffer[i];
1261
- const b = state.tempBuffer[i - lag];
1262
- sum += a * b;
1263
- energyA += a * a;
1264
- energyB += b * b;
1265
- }
1266
- const denom = Math.sqrt(energyA * energyB) + 1e-8;
1267
- const corr = Math.abs(sum) / denom;
1268
- if (corr > best) {
1269
- best = corr;
1270
- }
1271
- }
1272
-
1273
- const normalized = Math.max(
1274
- 0,
1275
- Math.min(1, (best - this.voiceSensitivity) / (1 - this.voiceSensitivity))
1276
- );
1277
- state.voiceConfidence += (normalized - state.voiceConfidence) * 0.2;
1278
- return state.voiceConfidence;
1279
- }
1280
-
1281
- process(inputs, outputs) {
1282
- const input = inputs[0];
1283
- const output = outputs[0];
1284
- if (!input || !output) {
1285
- return true;
1286
- }
1287
-
1288
- for (let channel = 0; channel < output.length; channel++) {
1289
- const inChannel = input[channel];
1290
- const outChannel = output[channel];
1291
- if (!inChannel || !outChannel) {
1292
- continue;
1293
- }
1294
-
1295
- if (!this.enabled) {
1296
- for (let i = 0; i < inChannel.length; i++) {
1297
- outChannel[i] = inChannel[i];
1298
- }
1299
- continue;
1300
- }
1301
-
1302
- const state = this._ensureState(channel);
1303
- const speechPresence = this.voiceEnhancement
1304
- ? this.voiceBoost * state.voiceConfidence
1305
- : 0;
1306
-
1307
- for (let i = 0; i < inChannel.length; i++) {
1308
- const sample = inChannel[i];
1309
- this._pushHistory(state, sample);
1310
- const magnitude = Math.abs(sample);
1311
-
1312
- state.envelope += (magnitude - state.envelope) * this.attack;
1313
-
1314
- if (speechPresence > 0.12 || state.envelope > this.threshold * 1.1) {
1315
- state.graceSamplesRemaining = this.gateGraceSamples;
1316
- state.postSpeechHold = this.postSpeechHoldSamples;
1317
- } else if (state.postSpeechHold > 0) {
1318
- state.postSpeechHold--;
1319
- }
1320
-
1321
- if (state.envelope < this.threshold) {
1322
- state.noise += (state.envelope - state.noise) * this.learnRate;
1323
- state.quietSamples++;
1324
- } else {
1325
- state.quietSamples = 0;
1326
- }
1327
-
1328
- if (state.graceSamplesRemaining > 0 || state.postSpeechHold > 0) {
1329
- state.graceSamplesRemaining--;
1330
- state.isSilenced = false;
1331
- state.silenceSamples = 0;
1332
- state.silenceReleaseCounter = 0;
1333
- } else {
1334
- const belowFloor = state.envelope < this.silenceFloor;
1335
- if (belowFloor && speechPresence < 0.15) {
1336
- state.silenceSamples++;
1337
- } else {
1338
- state.silenceSamples = Math.max(0, state.silenceSamples - 3);
1339
- }
1340
-
1341
- if (!state.isSilenced && state.silenceSamples > this.silenceHoldSamples) {
1342
- state.isSilenced = true;
1343
- state.silenceReleaseCounter = 0;
1344
- }
1345
-
1346
- if (state.isSilenced) {
1347
- const wakeFromEnergy = state.envelope > this.silenceFloor * 1.2;
1348
- const wakeFromVoice = speechPresence > 0.15;
1349
- if (wakeFromEnergy || wakeFromVoice) {
1350
- state.isSilenced = false;
1351
- state.silenceSamples = 0;
1352
- state.silenceReleaseCounter = 0;
1353
- state.postSpeechHold = this.postSpeechHoldSamples;
1354
- state.graceSamplesRemaining = this.gateGraceSamples;
1355
- } else {
1356
- state.silenceReleaseCounter++;
1357
- if (state.silenceReleaseCounter > this.silenceReleaseSamples) {
1358
- state.isSilenced = false;
1359
- state.silenceSamples = 0;
1360
- state.silenceReleaseCounter = 0;
1361
- }
1362
- }
1363
- } else {
1364
- state.silenceReleaseCounter = 0;
1365
- }
1366
- }
1367
-
1368
- const ratio = state.noise / Math.max(state.envelope, 1e-6);
1369
- let gainTarget = 1 - Math.min(0.75, Math.pow(ratio, this.expansionRatio)); // Less aggressive
1370
- gainTarget = Math.max(this.minGainFloor, Math.min(1, gainTarget)); // NEVER below minGainFloor
1371
-
1372
- if (state.quietSamples > this.holdSamples) {
1373
- // Very gentle reduction during quiet - preserve soft speech
1374
- gainTarget = Math.max(this.minGainFloor, gainTarget * (1 - this.maxReduction * 0.2));
1375
- }
1376
-
1377
- const isLowLevelNoise = state.envelope > this.silenceFloor * 2.0 &&
1378
- state.envelope < this.threshold * 0.6;
1379
- const noVoicePresent = speechPresence < 0.08; // More strict voice check
1380
-
1381
- if (isLowLevelNoise && noVoicePresent) {
1382
- state.feedbackCounter++;
1383
- state.avgLevel = state.avgLevel * 0.98 + state.envelope * 0.02; // Slower averaging
1384
- const diff = Math.abs(state.envelope - state.avgLevel);
1385
- state.levelVariance = state.levelVariance * 0.98 + diff * 0.02;
1386
-
1387
- // Only suppress if VERY consistent (feedback is very steady)
1388
- if (state.feedbackCounter > this.feedbackDetectionWindow &&
1389
- state.levelVariance < state.avgLevel * 0.15) {
1390
- state.feedbackSuppress = Math.max(0.3, state.feedbackSuppress - 0.01); // Less aggressive
1391
- }
1392
- } else {
1393
- state.feedbackCounter = Math.max(0, state.feedbackCounter - 5);
1394
- state.feedbackSuppress = Math.min(1.0, state.feedbackSuppress + 0.05);
1395
- }
1396
-
1397
- const reductionFloor = this.voiceEnhancement
1398
- ? 1 - this.maxReduction * (1 - Math.min(1, speechPresence * 0.9))
1399
- : 1 - this.maxReduction;
1400
- // CRITICAL: Never go below minGainFloor
1401
- if (gainTarget < Math.max(reductionFloor, this.minGainFloor)) {
1402
- gainTarget = Math.max(reductionFloor, this.minGainFloor);
1403
- }
1404
-
1405
- gainTarget *= state.feedbackSuppress;
1406
- // Ensure gainTarget never goes below floor after feedback suppression
1407
- gainTarget = Math.max(gainTarget, this.minGainFloor);
1408
-
1409
- const dynamicRelease = this.release *
1410
- (this.voiceEnhancement && speechPresence > 0.1 ? 0.7 : 1);
1411
- state.gain += (gainTarget - state.gain) * dynamicRelease;
1412
- // Final gain floor check
1413
- state.gain = Math.max(state.gain, this.minGainFloor * 0.8);
1414
- let processed = sample * state.gain;
1415
-
1416
- state.lpState = this.hfAlpha * state.lpState + (1 - this.hfAlpha) * processed;
1417
- const lowComponent = state.lpState;
1418
- const highComponent = processed - lowComponent;
1419
-
1420
- state.broadbandEnv += (Math.abs(processed) - state.broadbandEnv) * 0.08; // Slower tracking
1421
- const highEnvDelta = Math.abs(highComponent) - state.highBandEnv;
1422
- const highEnvCoef = highEnvDelta > 0 ? this.highBandAttack : this.highBandRelease;
1423
- state.highBandEnv += highEnvDelta * highEnvCoef;
1424
-
1425
- const hissRatio = Math.min(
1426
- 1,
1427
- Math.abs(highComponent) / (Math.abs(lowComponent) + 1e-5)
1428
- );
1429
- // Less aggressive hiss removal - preserve consonants (S, T, F, etc.)
1430
- const hissGain = 1 - hissRatio * (this.hissCut * (1 - 0.6 * speechPresence));
1431
-
1432
- const highEnvRatio = state.highBandEnv / (state.broadbandEnv + 1e-5);
1433
- const gateAmount = this.highBandGate * Math.max(0, highEnvRatio - speechPresence * 0.6);
1434
- const gatedHigh = highComponent * hissGain * (1 - gateAmount * 0.7); // Less gate
1435
-
1436
- const speechLift = 1 + this.speechBoost * speechPresence;
1437
- processed = lowComponent * speechLift + gatedHigh;
1438
-
1439
- // CRITICAL FIX: Much higher minimum muteGain - never mute below 50%
1440
- // This ensures soft speech and consonants are ALWAYS audible
1441
- const muteTarget = state.isSilenced ? 0.5 : 1;
1442
- const smoothing = state.isSilenced ? 0.03 : 0.25; // Even slower mute transition
1443
- state.muteGain += (muteTarget - state.muteGain) * smoothing;
1444
- processed *= state.muteGain;
1445
-
1446
- outChannel[i] = processed;
1447
- }
1448
-
1449
- this._updateVoiceConfidence(state);
1450
- }
1451
-
1452
- return true;
1453
- }
1454
- }
1455
-
1456
- registerProcessor('odyssey-denoise', OdysseyDenoiseProcessor);
1457
- `;
1458
- const blob = new Blob([processorSource], {
1459
- type: "application/javascript",
1460
- });
1461
- this.denoiseWorkletUrl = URL.createObjectURL(blob);
1462
- this.denoiseWorkletReady = this.audioContext.audioWorklet
1463
- .addModule(this.denoiseWorkletUrl)
1464
- .catch((error) => {
1465
- this.options.denoiser = {
1466
- ...(this.options.denoiser || {}),
1467
- enabled: false,
1468
- };
1469
- throw error;
1470
- });
1471
- return this.denoiseWorkletReady;
1472
- }
1473
- resolveOptions(options) {
1474
- const distanceDefaults = {
1475
- refDistance: 1.2,
1476
- maxDistance: 15, // Cannot hear beyond 15 meters
1477
- rolloffFactor: 1.35,
1478
- unit: "auto",
1479
- };
1480
- const denoiserDefaults = {
1481
- enabled: false, // DISABLED BY DEFAULT - test to see if denoiser causes audio cutting
1482
- // TUNED FOR MAXIMUM SPEECH CLARITY: Extremely gentle - only remove obvious noise
1483
- // These settings GUARANTEE no speech cutting - minGainFloor=0.5 means at least 50% always passes
1484
- threshold: 0.002, // Very low threshold - catch even soft speech
1485
- noiseFloor: 0.0005, // Very low noise floor
1486
- release: 0.4, // Slower release - preserve word endings completely
1487
- attack: 0.1, // Faster attack - catch speech onset instantly
1488
- holdMs: 200, // Long hold (200ms) - prevent mid-word cuts
1489
- maxReduction: 0.15, // VERY GENTLE - max 15% reduction, never cut speech
1490
- hissCut: 0.15, // Less hiss cut - preserve S, T, F consonants
1491
- expansionRatio: 1.05, // Almost no expansion
1492
- learnRate: 0.03, // Slower learning - don't adapt to speech as noise
1493
- voiceBoost: 0.35,
1494
- voiceSensitivity: 0.1, // Very sensitive to voice detection
1495
- voiceEnhancement: false,
1496
- silenceFloor: 0.0002, // Very low silence floor
1497
- silenceHoldMs: 400, // Much longer before silence gate (400ms)
1498
- silenceReleaseMs: 200, // Slower release from silence
1499
- speechBoost: 0.25,
1500
- highBandGate: 0.15, // Much less aggressive high band gate
1501
- highBandAttack: 0.08,
1502
- highBandRelease: 0.03,
1503
- };
1504
- return {
1505
- distance: {
1506
- refDistance: options?.distance?.refDistance ?? distanceDefaults.refDistance,
1507
- maxDistance: options?.distance?.maxDistance ?? distanceDefaults.maxDistance,
1508
- rolloffFactor: options?.distance?.rolloffFactor ?? distanceDefaults.rolloffFactor,
1509
- unit: options?.distance?.unit ?? distanceDefaults.unit,
1510
- },
1511
- denoiser: {
1512
- enabled: options?.denoiser?.enabled ?? denoiserDefaults.enabled,
1513
- threshold: options?.denoiser?.threshold ?? denoiserDefaults.threshold,
1514
- noiseFloor: options?.denoiser?.noiseFloor ?? denoiserDefaults.noiseFloor,
1515
- release: options?.denoiser?.release ?? denoiserDefaults.release,
1516
- attack: options?.denoiser?.attack ?? denoiserDefaults.attack,
1517
- holdMs: options?.denoiser?.holdMs ?? denoiserDefaults.holdMs,
1518
- maxReduction: options?.denoiser?.maxReduction ?? denoiserDefaults.maxReduction,
1519
- hissCut: options?.denoiser?.hissCut ?? denoiserDefaults.hissCut,
1520
- expansionRatio: options?.denoiser?.expansionRatio ?? denoiserDefaults.expansionRatio,
1521
- learnRate: options?.denoiser?.learnRate ?? denoiserDefaults.learnRate,
1522
- voiceBoost: options?.denoiser?.voiceBoost ?? denoiserDefaults.voiceBoost,
1523
- voiceSensitivity: options?.denoiser?.voiceSensitivity ?? denoiserDefaults.voiceSensitivity,
1524
- voiceEnhancement: options?.denoiser?.voiceEnhancement ?? denoiserDefaults.voiceEnhancement,
1525
- silenceFloor: options?.denoiser?.silenceFloor ?? denoiserDefaults.silenceFloor,
1526
- silenceHoldMs: options?.denoiser?.silenceHoldMs ?? denoiserDefaults.silenceHoldMs,
1527
- silenceReleaseMs: options?.denoiser?.silenceReleaseMs ?? denoiserDefaults.silenceReleaseMs,
1528
- speechBoost: options?.denoiser?.speechBoost ?? denoiserDefaults.speechBoost,
1529
- highBandGate: options?.denoiser?.highBandGate ?? denoiserDefaults.highBandGate,
1530
- highBandAttack: options?.denoiser?.highBandAttack ?? denoiserDefaults.highBandAttack,
1531
- highBandRelease: options?.denoiser?.highBandRelease ?? denoiserDefaults.highBandRelease,
1532
- },
1533
- };
1534
- }
1535
- }
1536
- exports.SpatialAudioManager = SpatialAudioManager;
1537
- SpatialAudioManager.BUILD_STAMP = "2026-01-07";