npm - @newgameplusinc/odyssey-audio-video-sdk-dev - Versions diffs - 1.0.352 → 1.0.353 - Mend

@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.352 → 1.0.353

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/audio/MLNoiseSuppressor.js +16 -14
package/package.json +1 -1

package/dist/audio/MLNoiseSuppressor.js CHANGED Viewed

@@ -1122,22 +1122,24 @@ class MLNoiseSuppressor {
                 gains[k] = filtTotal[k] > 1e-8 ? gains[k] / filtTotal[k] : 1.0;
                 gains[k] = Math.max(0.0, Math.min(1.0, gains[k]));
             }
-            // ── Step 5: Wiener-style gain compression ─────────────────────────────
-            // Problem: fan noise is in the same freq bands as speech (100-500 Hz).
-            // The model gives IRM≈0.85 to speech bins AND to fan bins that overlap,
-            // so the fan rides through at 85% amplitude on speech-containing bins.
+            // ── Step 5: Speech-band protection floor ──────────────────────────────
+            // β=1.8 power law was crushing voice to 12% amplitude (mean IRM=0.12
+            // across 257 bins) — user heard noise only, no voice. Root cause:
+            // β^1.8 was computed before STFT was applied (binary gate hid the damage);
+            // now that per-bin STFT gains are real, voice must be protected.
             //
-            // Fix: apply power law  gains[k] = gains[k]^β  (β > 1).
-            // This is mathematically equivalent to Wiener spectral subtraction and
-            // is used by every production noise suppressor (RNNoise, Google, WebRTC).
+            // During speech frames: enforce minimum gain of 0.20 on all bins.
             // Effect:
-            //   IRM=0.85 (strong speech) → 0.85^1.8 = 0.763  (−2.3dB, speech fine)
-            //   IRM=0.60 (mixed)         → 0.60^1.8 = 0.425  (−7.4dB, fan partly removed)
-            //   IRM=0.35 (fan+speech)    → 0.35^1.8 = 0.143  (−16.9dB, fan mostly gone)
-            //   IRM=0.10 (pure noise)    → 0.10^1.8 = 0.016  (−36dB, near silence)
-            const GAIN_BETA = 1.8;
-            for (let k = 1; k < bins; k++) {
-                gains[k] = Math.pow(gains[k], GAIN_BETA);
+            //   Speech fundamental (raw IRM≈0.86) → 0.86  (unchanged, full voice)
+            //   Speech formants    (raw IRM≈0.65) → 0.65  (unchanged, full voice)
+            //   Mixed fan+speech   (raw IRM≈0.30) → 0.30  (−10dB vs binary gate)
+            //   Pure fan bins      (raw IRM≈0.10) → 0.20  (−14dB vs binary gate)
+            // During noise frames: no floor — model can suppress to full silence.
+            const IRM_SPEECH_FLOOR = 0.2;
+            if (isSpeechFrame) {
+                for (let k = 1; k < bins; k++) {
+                    gains[k] = Math.max(IRM_SPEECH_FLOOR, gains[k]);
+                }
             }
             // ── Step 6: Override gains[0] as explicit gate signal ────────────────
             // The DC bin (k=0) has no mel filter coverage → filtTotal[0] ≤ 1e-8 →

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@newgameplusinc/odyssey-audio-video-sdk-dev",
-  "version": "1.0.352",
+  "version": "1.0.353",
   "description": "Odyssey Spatial Audio & Video SDK using MediaSoup for real-time communication",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",