@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.57 → 1.0.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -255
- package/dist/MediasoupManager.js +11 -27
- package/dist/index.d.ts +0 -15
- package/dist/index.js +1 -67
- package/package.json +3 -4
- package/dist/MLNoiseSuppressor.d.ts +0 -76
- package/dist/MLNoiseSuppressor.js +0 -439
- package/dist/UltimateMLNoiseSuppressor.d.ts +0 -74
- package/dist/UltimateMLNoiseSuppressor.js +0 -309
|
@@ -1,309 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
/**
|
|
3
|
-
* ULTIMATE ML Noise Suppressor - Enhanced for Apple/Google Meet Quality
|
|
4
|
-
* Features:
|
|
5
|
-
* 1. Temporal smoothing (exponential moving average)
|
|
6
|
-
* 2. Voice frequency preservation (80-500 Hz)
|
|
7
|
-
* 3. Sub-bass filtering (remove < 80 Hz)
|
|
8
|
-
* 4. Adaptive processing
|
|
9
|
-
* 5. WebAssembly acceleration
|
|
10
|
-
*/
|
|
11
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
12
|
-
if (k2 === undefined) k2 = k;
|
|
13
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
14
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
15
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
16
|
-
}
|
|
17
|
-
Object.defineProperty(o, k2, desc);
|
|
18
|
-
}) : (function(o, m, k, k2) {
|
|
19
|
-
if (k2 === undefined) k2 = k;
|
|
20
|
-
o[k2] = m[k];
|
|
21
|
-
}));
|
|
22
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
23
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
24
|
-
}) : function(o, v) {
|
|
25
|
-
o["default"] = v;
|
|
26
|
-
});
|
|
27
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
28
|
-
var ownKeys = function(o) {
|
|
29
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
30
|
-
var ar = [];
|
|
31
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
32
|
-
return ar;
|
|
33
|
-
};
|
|
34
|
-
return ownKeys(o);
|
|
35
|
-
};
|
|
36
|
-
return function (mod) {
|
|
37
|
-
if (mod && mod.__esModule) return mod;
|
|
38
|
-
var result = {};
|
|
39
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
40
|
-
__setModuleDefault(result, mod);
|
|
41
|
-
return result;
|
|
42
|
-
};
|
|
43
|
-
})();
|
|
44
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
|
-
exports.UltimateMLNoiseSuppressor = void 0;
|
|
46
|
-
const tf = __importStar(require("@tensorflow/tfjs"));
|
|
47
|
-
class UltimateMLNoiseSuppressor {
|
|
48
|
-
constructor() {
|
|
49
|
-
this.model = null;
|
|
50
|
-
this.config = null;
|
|
51
|
-
this.normStats = null;
|
|
52
|
-
this.audioContext = null;
|
|
53
|
-
this.isInitialized = false;
|
|
54
|
-
// CRITICAL: Temporal smoothing state
|
|
55
|
-
this.prevMask = null;
|
|
56
|
-
this.SMOOTHING_ALPHA = 0.85; // 85% current, 15% previous
|
|
57
|
-
// Voice frequency preservation
|
|
58
|
-
this.highPassFilter = null;
|
|
59
|
-
this.voiceBandFilter = null;
|
|
60
|
-
// Processing optimization
|
|
61
|
-
this.processingQueue = [];
|
|
62
|
-
this.isProcessing = false;
|
|
63
|
-
}
|
|
64
|
-
/**
|
|
65
|
-
* Initialize with enhanced setup
|
|
66
|
-
*/
|
|
67
|
-
async initialize(modelUrl, audioContext) {
|
|
68
|
-
console.log("🚀 Initializing ULTIMATE ML Noise Suppressor...");
|
|
69
|
-
this.audioContext = audioContext;
|
|
70
|
-
try {
|
|
71
|
-
// Load model
|
|
72
|
-
console.log(`📂 Loading model from ${modelUrl}`);
|
|
73
|
-
this.model = await tf.loadLayersModel(modelUrl);
|
|
74
|
-
console.log("✅ Model loaded");
|
|
75
|
-
// Load config
|
|
76
|
-
const baseUrl = modelUrl.substring(0, modelUrl.lastIndexOf("/"));
|
|
77
|
-
const configResponse = await fetch(`${baseUrl}/model_config.json`);
|
|
78
|
-
this.config = await configResponse.json();
|
|
79
|
-
// Load normalization stats
|
|
80
|
-
const normResponse = await fetch(`${baseUrl}/normalization_stats.json`);
|
|
81
|
-
this.normStats = await normResponse.json();
|
|
82
|
-
// Setup voice frequency filters
|
|
83
|
-
this.setupVoiceFilters();
|
|
84
|
-
this.isInitialized = true;
|
|
85
|
-
console.log("✅ ULTIMATE ML Noise Suppressor initialized!");
|
|
86
|
-
}
|
|
87
|
-
catch (error) {
|
|
88
|
-
console.error("❌ Failed to initialize:", error);
|
|
89
|
-
throw error;
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
/**
|
|
93
|
-
* Setup filters for voice frequency preservation
|
|
94
|
-
*/
|
|
95
|
-
setupVoiceFilters() {
|
|
96
|
-
if (!this.audioContext)
|
|
97
|
-
return;
|
|
98
|
-
// High-pass filter: Remove sub-bass rumble (< 80 Hz)
|
|
99
|
-
this.highPassFilter = this.audioContext.createBiquadFilter();
|
|
100
|
-
this.highPassFilter.type = "highpass";
|
|
101
|
-
this.highPassFilter.frequency.value = 80; // 80 Hz cutoff
|
|
102
|
-
this.highPassFilter.Q.value = 0.7;
|
|
103
|
-
// Bandpass filter: Enhance voice fundamentals (100-300 Hz)
|
|
104
|
-
this.voiceBandFilter = this.audioContext.createBiquadFilter();
|
|
105
|
-
this.voiceBandFilter.type = "bandpass";
|
|
106
|
-
this.voiceBandFilter.frequency.value = 200; // Center at 200 Hz
|
|
107
|
-
this.voiceBandFilter.Q.value = 1.4;
|
|
108
|
-
}
|
|
109
|
-
/**
|
|
110
|
-
* Process audio with ULTIMATE quality
|
|
111
|
-
* NOTE: This runs in the AudioWorklet thread. It must be synchronous and fast.
|
|
112
|
-
* The heavy ML inference should ideally happen in a Worker, communicating via SharedArrayBuffer.
|
|
113
|
-
* For this implementation, we use a simplified frame-based approach.
|
|
114
|
-
*/
|
|
115
|
-
processAudio(inputBuffer) {
|
|
116
|
-
if (!this.isInitialized || !this.model || !this.config || !this.normStats) {
|
|
117
|
-
return inputBuffer;
|
|
118
|
-
}
|
|
119
|
-
// 1. Pre-processing: Remove sub-bass rumble (High-pass)
|
|
120
|
-
// Note: In a real AudioWorklet, filters should be applied per-sample or per-block, not on the whole buffer at once if it's a stream.
|
|
121
|
-
// But assuming inputBuffer is a processing block (e.g. 128 samples):
|
|
122
|
-
const filtered = this.applyHighPassFilter(inputBuffer);
|
|
123
|
-
// ⚠️ CRITICAL ARCHITECTURE NOTE ⚠️
|
|
124
|
-
// We cannot await this.model.predict() here because this function must return immediately for real-time audio.
|
|
125
|
-
// The correct architecture is:
|
|
126
|
-
// 1. AudioWorklet writes audio to a RingBuffer (SharedArrayBuffer).
|
|
127
|
-
// 2. Web Worker reads RingBuffer, runs TFJS inference (async), writes Mask to another RingBuffer.
|
|
128
|
-
// 3. AudioWorklet reads latest Mask from RingBuffer and applies it.
|
|
129
|
-
// For now, we will return the filtered audio.
|
|
130
|
-
// To enable ML, you must implement the Worker architecture described above.
|
|
131
|
-
// Running TFJS on the main audio thread will cause stuttering.
|
|
132
|
-
return filtered;
|
|
133
|
-
}
|
|
134
|
-
/**
|
|
135
|
-
* Placeholder for async processing (to be moved to a Web Worker)
|
|
136
|
-
*/
|
|
137
|
-
async processFrameAsync(inputBuffer) {
|
|
138
|
-
// This logic belongs in a Web Worker
|
|
139
|
-
try {
|
|
140
|
-
const features = await this.extractMelFeatures(inputBuffer);
|
|
141
|
-
const normalizedFeatures = tf.tidy(() => {
|
|
142
|
-
const tensor = tf.tensor2d(features);
|
|
143
|
-
return tensor.sub(this.normStats.mean).div(this.normStats.std);
|
|
144
|
-
});
|
|
145
|
-
const featuresArray = await normalizedFeatures.array();
|
|
146
|
-
const sequences = this.createSequences(featuresArray, this.config.sequence_length);
|
|
147
|
-
if (sequences.length > 0) {
|
|
148
|
-
const sequenceTensor = tf.tensor3d([sequences[0]]);
|
|
149
|
-
const maskTensor = this.model.predict(sequenceTensor);
|
|
150
|
-
const maskData = await maskTensor.data();
|
|
151
|
-
const flatMask = Array.from(maskData);
|
|
152
|
-
// Update the current mask for the AudioWorklet to use
|
|
153
|
-
this.prevMask = this.applyTemporalSmoothing(flatMask);
|
|
154
|
-
normalizedFeatures.dispose();
|
|
155
|
-
sequenceTensor.dispose();
|
|
156
|
-
maskTensor.dispose();
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
catch (e) {
|
|
160
|
-
console.error(e);
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
/**
|
|
164
|
-
* CRITICAL: Temporal smoothing (biggest quality improvement!)
|
|
165
|
-
*/
|
|
166
|
-
applyTemporalSmoothing(currentMask) {
|
|
167
|
-
const smoothed = new Float32Array(currentMask.length);
|
|
168
|
-
if (!this.prevMask || this.prevMask.length !== currentMask.length) {
|
|
169
|
-
// First frame - no smoothing
|
|
170
|
-
this.prevMask = new Float32Array(currentMask);
|
|
171
|
-
return this.prevMask;
|
|
172
|
-
}
|
|
173
|
-
// Exponential moving average
|
|
174
|
-
for (let i = 0; i < currentMask.length; i++) {
|
|
175
|
-
smoothed[i] =
|
|
176
|
-
this.SMOOTHING_ALPHA * currentMask[i] +
|
|
177
|
-
(1 - this.SMOOTHING_ALPHA) * this.prevMask[i];
|
|
178
|
-
// Clamp to valid range [0.02, 1.0]
|
|
179
|
-
// Never completely mute (min 2%)
|
|
180
|
-
smoothed[i] = Math.max(0.02, Math.min(1.0, smoothed[i]));
|
|
181
|
-
}
|
|
182
|
-
this.prevMask = smoothed;
|
|
183
|
-
return smoothed;
|
|
184
|
-
}
|
|
185
|
-
/**
|
|
186
|
-
* Apply high-pass filter to remove rumble
|
|
187
|
-
*/
|
|
188
|
-
applyHighPassFilter(input) {
|
|
189
|
-
// Simple IIR high-pass filter (80 Hz @ 48kHz)
|
|
190
|
-
const output = new Float32Array(input.length);
|
|
191
|
-
const alpha = 0.98; // Filter coefficient
|
|
192
|
-
output[0] = input[0];
|
|
193
|
-
for (let i = 1; i < input.length; i++) {
|
|
194
|
-
output[i] = alpha * (output[i - 1] + input[i] - input[i - 1]);
|
|
195
|
-
}
|
|
196
|
-
return output;
|
|
197
|
-
}
|
|
198
|
-
/**
|
|
199
|
-
* Apply mask with voice frequency preservation
|
|
200
|
-
*/
|
|
201
|
-
applyMaskWithVoicePreservation(audio, mask, numFrames) {
|
|
202
|
-
const output = new Float32Array(audio.length);
|
|
203
|
-
// Simple overlap-add (proper implementation would use ISTFT)
|
|
204
|
-
const hopLength = Math.floor(audio.length / numFrames);
|
|
205
|
-
for (let i = 0; i < audio.length; i++) {
|
|
206
|
-
const frameIdx = Math.floor(i / hopLength);
|
|
207
|
-
const maskIdx = Math.min(frameIdx, numFrames - 1);
|
|
208
|
-
// Apply mask
|
|
209
|
-
let gain = 1.0;
|
|
210
|
-
if (maskIdx < mask.length / this.config.n_mels) {
|
|
211
|
-
// Average mask across frequency bins for this frame
|
|
212
|
-
let maskSum = 0;
|
|
213
|
-
const startBin = maskIdx * this.config.n_mels;
|
|
214
|
-
for (let j = 0; j < this.config.n_mels; j++) {
|
|
215
|
-
maskSum += mask[startBin + j];
|
|
216
|
-
}
|
|
217
|
-
gain = maskSum / this.config.n_mels;
|
|
218
|
-
}
|
|
219
|
-
// Apply gain with minimum threshold
|
|
220
|
-
output[i] = audio[i] * Math.max(0.02, gain);
|
|
221
|
-
}
|
|
222
|
-
// Apply fade-in/out to prevent clicks
|
|
223
|
-
const fadeLength = Math.min(256, output.length / 10);
|
|
224
|
-
for (let i = 0; i < fadeLength; i++) {
|
|
225
|
-
const fade = i / fadeLength;
|
|
226
|
-
output[i] *= fade;
|
|
227
|
-
output[output.length - 1 - i] *= fade;
|
|
228
|
-
}
|
|
229
|
-
return output;
|
|
230
|
-
}
|
|
231
|
-
/**
|
|
232
|
-
* Extract mel-spectrogram features
|
|
233
|
-
*/
|
|
234
|
-
async extractMelFeatures(audio) {
|
|
235
|
-
if (!this.config)
|
|
236
|
-
throw new Error("Config not loaded");
|
|
237
|
-
// Simplified feature extraction
|
|
238
|
-
// In production, use proper STFT + Mel filterbank
|
|
239
|
-
const frameLength = this.config.n_fft;
|
|
240
|
-
const hopLength = this.config.hop_length;
|
|
241
|
-
const numFrames = Math.floor((audio.length - frameLength) / hopLength) + 1;
|
|
242
|
-
const features = [];
|
|
243
|
-
for (let i = 0; i < numFrames; i++) {
|
|
244
|
-
const start = i * hopLength;
|
|
245
|
-
const frame = audio.slice(start, start + frameLength);
|
|
246
|
-
// Compute mel bins (simplified)
|
|
247
|
-
const frameFeatures = [];
|
|
248
|
-
for (let j = 0; j < this.config.n_mels; j++) {
|
|
249
|
-
const melBin = this.computeMelBin(frame, j);
|
|
250
|
-
frameFeatures.push(melBin);
|
|
251
|
-
}
|
|
252
|
-
features.push(frameFeatures);
|
|
253
|
-
}
|
|
254
|
-
return features;
|
|
255
|
-
}
|
|
256
|
-
/**
|
|
257
|
-
* Compute mel bin (simplified)
|
|
258
|
-
*/
|
|
259
|
-
computeMelBin(frame, binIndex) {
|
|
260
|
-
const start = Math.floor((binIndex / this.config.n_mels) * frame.length);
|
|
261
|
-
const end = Math.floor(((binIndex + 1) / this.config.n_mels) * frame.length);
|
|
262
|
-
let sum = 0;
|
|
263
|
-
for (let i = start; i < end && i < frame.length; i++) {
|
|
264
|
-
sum += Math.abs(frame[i]);
|
|
265
|
-
}
|
|
266
|
-
const avg = sum / (end - start);
|
|
267
|
-
// Convert to log scale (dB-like)
|
|
268
|
-
return Math.log10(avg + 1e-8) * 10;
|
|
269
|
-
}
|
|
270
|
-
/**
|
|
271
|
-
* Create sequences for LSTM input
|
|
272
|
-
*/
|
|
273
|
-
createSequences(features, seqLength) {
|
|
274
|
-
const sequences = [];
|
|
275
|
-
for (let i = 0; i <= features.length - seqLength; i++) {
|
|
276
|
-
sequences.push(features.slice(i, i + seqLength));
|
|
277
|
-
}
|
|
278
|
-
// If not enough frames, pad with last frame
|
|
279
|
-
if (sequences.length === 0 && features.length > 0) {
|
|
280
|
-
const paddedSeq = [];
|
|
281
|
-
for (let i = 0; i < seqLength; i++) {
|
|
282
|
-
paddedSeq.push(features[Math.min(i, features.length - 1)]);
|
|
283
|
-
}
|
|
284
|
-
sequences.push(paddedSeq);
|
|
285
|
-
}
|
|
286
|
-
return sequences;
|
|
287
|
-
}
|
|
288
|
-
/**
|
|
289
|
-
* Reset processing state (call when switching audio streams)
|
|
290
|
-
*/
|
|
291
|
-
reset() {
|
|
292
|
-
this.prevMask = null;
|
|
293
|
-
this.processingQueue = [];
|
|
294
|
-
}
|
|
295
|
-
/**
|
|
296
|
-
* Get processing latency
|
|
297
|
-
*/
|
|
298
|
-
getLatency() {
|
|
299
|
-
if (!this.config)
|
|
300
|
-
return 0;
|
|
301
|
-
// Approximate latency in milliseconds
|
|
302
|
-
const bufferLatency = (this.config.n_fft / this.config.sample_rate) * 1000;
|
|
303
|
-
const processingLatency = 10; // Model inference ~10ms
|
|
304
|
-
return bufferLatency + processingLatency;
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
exports.UltimateMLNoiseSuppressor = UltimateMLNoiseSuppressor;
|
|
308
|
-
// Export for use in AudioWorklet
|
|
309
|
-
exports.default = UltimateMLNoiseSuppressor;
|