@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.57 → 1.0.58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,309 +0,0 @@
1
- "use strict";
2
- /**
3
- * ULTIMATE ML Noise Suppressor - Enhanced for Apple/Google Meet Quality
4
- * Features:
5
- * 1. Temporal smoothing (exponential moving average)
6
- * 2. Voice frequency preservation (80-500 Hz)
7
- * 3. Sub-bass filtering (remove < 80 Hz)
8
- * 4. Adaptive processing
9
- * 5. WebAssembly acceleration
10
- */
11
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
12
- if (k2 === undefined) k2 = k;
13
- var desc = Object.getOwnPropertyDescriptor(m, k);
14
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
15
- desc = { enumerable: true, get: function() { return m[k]; } };
16
- }
17
- Object.defineProperty(o, k2, desc);
18
- }) : (function(o, m, k, k2) {
19
- if (k2 === undefined) k2 = k;
20
- o[k2] = m[k];
21
- }));
22
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
23
- Object.defineProperty(o, "default", { enumerable: true, value: v });
24
- }) : function(o, v) {
25
- o["default"] = v;
26
- });
27
- var __importStar = (this && this.__importStar) || (function () {
28
- var ownKeys = function(o) {
29
- ownKeys = Object.getOwnPropertyNames || function (o) {
30
- var ar = [];
31
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
32
- return ar;
33
- };
34
- return ownKeys(o);
35
- };
36
- return function (mod) {
37
- if (mod && mod.__esModule) return mod;
38
- var result = {};
39
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
40
- __setModuleDefault(result, mod);
41
- return result;
42
- };
43
- })();
44
- Object.defineProperty(exports, "__esModule", { value: true });
45
- exports.UltimateMLNoiseSuppressor = void 0;
46
- const tf = __importStar(require("@tensorflow/tfjs"));
47
- class UltimateMLNoiseSuppressor {
48
- constructor() {
49
- this.model = null;
50
- this.config = null;
51
- this.normStats = null;
52
- this.audioContext = null;
53
- this.isInitialized = false;
54
- // CRITICAL: Temporal smoothing state
55
- this.prevMask = null;
56
- this.SMOOTHING_ALPHA = 0.85; // 85% current, 15% previous
57
- // Voice frequency preservation
58
- this.highPassFilter = null;
59
- this.voiceBandFilter = null;
60
- // Processing optimization
61
- this.processingQueue = [];
62
- this.isProcessing = false;
63
- }
64
- /**
65
- * Initialize with enhanced setup
66
- */
67
- async initialize(modelUrl, audioContext) {
68
- console.log("🚀 Initializing ULTIMATE ML Noise Suppressor...");
69
- this.audioContext = audioContext;
70
- try {
71
- // Load model
72
- console.log(`📂 Loading model from ${modelUrl}`);
73
- this.model = await tf.loadLayersModel(modelUrl);
74
- console.log("✅ Model loaded");
75
- // Load config
76
- const baseUrl = modelUrl.substring(0, modelUrl.lastIndexOf("/"));
77
- const configResponse = await fetch(`${baseUrl}/model_config.json`);
78
- this.config = await configResponse.json();
79
- // Load normalization stats
80
- const normResponse = await fetch(`${baseUrl}/normalization_stats.json`);
81
- this.normStats = await normResponse.json();
82
- // Setup voice frequency filters
83
- this.setupVoiceFilters();
84
- this.isInitialized = true;
85
- console.log("✅ ULTIMATE ML Noise Suppressor initialized!");
86
- }
87
- catch (error) {
88
- console.error("❌ Failed to initialize:", error);
89
- throw error;
90
- }
91
- }
92
- /**
93
- * Setup filters for voice frequency preservation
94
- */
95
- setupVoiceFilters() {
96
- if (!this.audioContext)
97
- return;
98
- // High-pass filter: Remove sub-bass rumble (< 80 Hz)
99
- this.highPassFilter = this.audioContext.createBiquadFilter();
100
- this.highPassFilter.type = "highpass";
101
- this.highPassFilter.frequency.value = 80; // 80 Hz cutoff
102
- this.highPassFilter.Q.value = 0.7;
103
- // Bandpass filter: Enhance voice fundamentals (100-300 Hz)
104
- this.voiceBandFilter = this.audioContext.createBiquadFilter();
105
- this.voiceBandFilter.type = "bandpass";
106
- this.voiceBandFilter.frequency.value = 200; // Center at 200 Hz
107
- this.voiceBandFilter.Q.value = 1.4;
108
- }
109
- /**
110
- * Process audio with ULTIMATE quality
111
- * NOTE: This runs in the AudioWorklet thread. It must be synchronous and fast.
112
- * The heavy ML inference should ideally happen in a Worker, communicating via SharedArrayBuffer.
113
- * For this implementation, we use a simplified frame-based approach.
114
- */
115
- processAudio(inputBuffer) {
116
- if (!this.isInitialized || !this.model || !this.config || !this.normStats) {
117
- return inputBuffer;
118
- }
119
- // 1. Pre-processing: Remove sub-bass rumble (High-pass)
120
- // Note: In a real AudioWorklet, filters should be applied per-sample or per-block, not on the whole buffer at once if it's a stream.
121
- // But assuming inputBuffer is a processing block (e.g. 128 samples):
122
- const filtered = this.applyHighPassFilter(inputBuffer);
123
- // ⚠️ CRITICAL ARCHITECTURE NOTE ⚠️
124
- // We cannot await this.model.predict() here because this function must return immediately for real-time audio.
125
- // The correct architecture is:
126
- // 1. AudioWorklet writes audio to a RingBuffer (SharedArrayBuffer).
127
- // 2. Web Worker reads RingBuffer, runs TFJS inference (async), writes Mask to another RingBuffer.
128
- // 3. AudioWorklet reads latest Mask from RingBuffer and applies it.
129
- // For now, we will return the filtered audio.
130
- // To enable ML, you must implement the Worker architecture described above.
131
- // Running TFJS on the main audio thread will cause stuttering.
132
- return filtered;
133
- }
134
- /**
135
- * Placeholder for async processing (to be moved to a Web Worker)
136
- */
137
- async processFrameAsync(inputBuffer) {
138
- // This logic belongs in a Web Worker
139
- try {
140
- const features = await this.extractMelFeatures(inputBuffer);
141
- const normalizedFeatures = tf.tidy(() => {
142
- const tensor = tf.tensor2d(features);
143
- return tensor.sub(this.normStats.mean).div(this.normStats.std);
144
- });
145
- const featuresArray = await normalizedFeatures.array();
146
- const sequences = this.createSequences(featuresArray, this.config.sequence_length);
147
- if (sequences.length > 0) {
148
- const sequenceTensor = tf.tensor3d([sequences[0]]);
149
- const maskTensor = this.model.predict(sequenceTensor);
150
- const maskData = await maskTensor.data();
151
- const flatMask = Array.from(maskData);
152
- // Update the current mask for the AudioWorklet to use
153
- this.prevMask = this.applyTemporalSmoothing(flatMask);
154
- normalizedFeatures.dispose();
155
- sequenceTensor.dispose();
156
- maskTensor.dispose();
157
- }
158
- }
159
- catch (e) {
160
- console.error(e);
161
- }
162
- }
163
- /**
164
- * CRITICAL: Temporal smoothing (biggest quality improvement!)
165
- */
166
- applyTemporalSmoothing(currentMask) {
167
- const smoothed = new Float32Array(currentMask.length);
168
- if (!this.prevMask || this.prevMask.length !== currentMask.length) {
169
- // First frame - no smoothing
170
- this.prevMask = new Float32Array(currentMask);
171
- return this.prevMask;
172
- }
173
- // Exponential moving average
174
- for (let i = 0; i < currentMask.length; i++) {
175
- smoothed[i] =
176
- this.SMOOTHING_ALPHA * currentMask[i] +
177
- (1 - this.SMOOTHING_ALPHA) * this.prevMask[i];
178
- // Clamp to valid range [0.02, 1.0]
179
- // Never completely mute (min 2%)
180
- smoothed[i] = Math.max(0.02, Math.min(1.0, smoothed[i]));
181
- }
182
- this.prevMask = smoothed;
183
- return smoothed;
184
- }
185
- /**
186
- * Apply high-pass filter to remove rumble
187
- */
188
- applyHighPassFilter(input) {
189
- // Simple IIR high-pass filter (80 Hz @ 48kHz)
190
- const output = new Float32Array(input.length);
191
- const alpha = 0.98; // Filter coefficient
192
- output[0] = input[0];
193
- for (let i = 1; i < input.length; i++) {
194
- output[i] = alpha * (output[i - 1] + input[i] - input[i - 1]);
195
- }
196
- return output;
197
- }
198
- /**
199
- * Apply mask with voice frequency preservation
200
- */
201
- applyMaskWithVoicePreservation(audio, mask, numFrames) {
202
- const output = new Float32Array(audio.length);
203
- // Simple overlap-add (proper implementation would use ISTFT)
204
- const hopLength = Math.floor(audio.length / numFrames);
205
- for (let i = 0; i < audio.length; i++) {
206
- const frameIdx = Math.floor(i / hopLength);
207
- const maskIdx = Math.min(frameIdx, numFrames - 1);
208
- // Apply mask
209
- let gain = 1.0;
210
- if (maskIdx < mask.length / this.config.n_mels) {
211
- // Average mask across frequency bins for this frame
212
- let maskSum = 0;
213
- const startBin = maskIdx * this.config.n_mels;
214
- for (let j = 0; j < this.config.n_mels; j++) {
215
- maskSum += mask[startBin + j];
216
- }
217
- gain = maskSum / this.config.n_mels;
218
- }
219
- // Apply gain with minimum threshold
220
- output[i] = audio[i] * Math.max(0.02, gain);
221
- }
222
- // Apply fade-in/out to prevent clicks
223
- const fadeLength = Math.min(256, output.length / 10);
224
- for (let i = 0; i < fadeLength; i++) {
225
- const fade = i / fadeLength;
226
- output[i] *= fade;
227
- output[output.length - 1 - i] *= fade;
228
- }
229
- return output;
230
- }
231
- /**
232
- * Extract mel-spectrogram features
233
- */
234
- async extractMelFeatures(audio) {
235
- if (!this.config)
236
- throw new Error("Config not loaded");
237
- // Simplified feature extraction
238
- // In production, use proper STFT + Mel filterbank
239
- const frameLength = this.config.n_fft;
240
- const hopLength = this.config.hop_length;
241
- const numFrames = Math.floor((audio.length - frameLength) / hopLength) + 1;
242
- const features = [];
243
- for (let i = 0; i < numFrames; i++) {
244
- const start = i * hopLength;
245
- const frame = audio.slice(start, start + frameLength);
246
- // Compute mel bins (simplified)
247
- const frameFeatures = [];
248
- for (let j = 0; j < this.config.n_mels; j++) {
249
- const melBin = this.computeMelBin(frame, j);
250
- frameFeatures.push(melBin);
251
- }
252
- features.push(frameFeatures);
253
- }
254
- return features;
255
- }
256
- /**
257
- * Compute mel bin (simplified)
258
- */
259
- computeMelBin(frame, binIndex) {
260
- const start = Math.floor((binIndex / this.config.n_mels) * frame.length);
261
- const end = Math.floor(((binIndex + 1) / this.config.n_mels) * frame.length);
262
- let sum = 0;
263
- for (let i = start; i < end && i < frame.length; i++) {
264
- sum += Math.abs(frame[i]);
265
- }
266
- const avg = sum / (end - start);
267
- // Convert to log scale (dB-like)
268
- return Math.log10(avg + 1e-8) * 10;
269
- }
270
- /**
271
- * Create sequences for LSTM input
272
- */
273
- createSequences(features, seqLength) {
274
- const sequences = [];
275
- for (let i = 0; i <= features.length - seqLength; i++) {
276
- sequences.push(features.slice(i, i + seqLength));
277
- }
278
- // If not enough frames, pad with last frame
279
- if (sequences.length === 0 && features.length > 0) {
280
- const paddedSeq = [];
281
- for (let i = 0; i < seqLength; i++) {
282
- paddedSeq.push(features[Math.min(i, features.length - 1)]);
283
- }
284
- sequences.push(paddedSeq);
285
- }
286
- return sequences;
287
- }
288
- /**
289
- * Reset processing state (call when switching audio streams)
290
- */
291
- reset() {
292
- this.prevMask = null;
293
- this.processingQueue = [];
294
- }
295
- /**
296
- * Get processing latency
297
- */
298
- getLatency() {
299
- if (!this.config)
300
- return 0;
301
- // Approximate latency in milliseconds
302
- const bufferLatency = (this.config.n_fft / this.config.sample_rate) * 1000;
303
- const processingLatency = 10; // Model inference ~10ms
304
- return bufferLatency + processingLatency;
305
- }
306
- }
307
- exports.UltimateMLNoiseSuppressor = UltimateMLNoiseSuppressor;
308
- // Export for use in AudioWorklet
309
- exports.default = UltimateMLNoiseSuppressor;