@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.352 → 1.0.353

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1122,22 +1122,24 @@ class MLNoiseSuppressor {
1122
1122
  gains[k] = filtTotal[k] > 1e-8 ? gains[k] / filtTotal[k] : 1.0;
1123
1123
  gains[k] = Math.max(0.0, Math.min(1.0, gains[k]));
1124
1124
  }
1125
- // ── Step 5: Wiener-style gain compression ─────────────────────────────
1126
- // Problem: fan noise is in the same freq bands as speech (100-500 Hz).
1127
- // The model gives IRM≈0.85 to speech bins AND to fan bins that overlap,
1128
- // so the fan rides through at 85% amplitude on speech-containing bins.
1125
+ // ── Step 5: Speech-band protection floor ──────────────────────────────
1126
+ // β=1.8 power law was crushing voice to 12% amplitude (mean IRM=0.12
1127
+ // across 257 bins) user heard noise only, no voice. Root cause:
1128
+ // β^1.8 was computed before STFT was applied (binary gate hid the damage);
1129
+ // now that per-bin STFT gains are real, voice must be protected.
1129
1130
  //
1130
- // Fix: apply power law gains[k] = gains[k]^β (β > 1).
1131
- // This is mathematically equivalent to Wiener spectral subtraction and
1132
- // is used by every production noise suppressor (RNNoise, Google, WebRTC).
1131
+ // During speech frames: enforce minimum gain of 0.20 on all bins.
1133
1132
  // Effect:
1134
- // IRM=0.85 (strong speech) → 0.85^1.8 = 0.763 (−2.3dB, speech fine)
1135
- // IRM=0.60 (mixed) → 0.60^1.8 = 0.425 (−7.4dB, fan partly removed)
1136
- // IRM=0.35 (fan+speech) → 0.35^1.8 = 0.143 (−16.9dB, fan mostly gone)
1137
- // IRM=0.10 (pure noise) → 0.10^1.8 = 0.016 (−36dB, near silence)
1138
- const GAIN_BETA = 1.8;
1139
- for (let k = 1; k < bins; k++) {
1140
- gains[k] = Math.pow(gains[k], GAIN_BETA);
1133
+ // Speech fundamental (raw IRM0.86) → 0.86 (unchanged, full voice)
1134
+ // Speech formants (raw IRM≈0.65) 0.65 (unchanged, full voice)
1135
+ // Mixed fan+speech (raw IRM≈0.30) 0.30 (−10dB vs binary gate)
1136
+ // Pure fan bins (raw IRM≈0.10) 0.20 (−14dB vs binary gate)
1137
+ // During noise frames: no floor — model can suppress to full silence.
1138
+ const IRM_SPEECH_FLOOR = 0.2;
1139
+ if (isSpeechFrame) {
1140
+ for (let k = 1; k < bins; k++) {
1141
+ gains[k] = Math.max(IRM_SPEECH_FLOOR, gains[k]);
1142
+ }
1141
1143
  }
1142
1144
  // ── Step 6: Override gains[0] as explicit gate signal ────────────────
1143
1145
  // The DC bin (k=0) has no mel filter coverage → filtTotal[0] ≤ 1e-8 →
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@newgameplusinc/odyssey-audio-video-sdk-dev",
3
- "version": "1.0.352",
3
+ "version": "1.0.353",
4
4
  "description": "Odyssey Spatial Audio & Video SDK using MediaSoup for real-time communication",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",