@newgameplusinc/odyssey-audio-video-sdk-dev 1.0.59 → 1.0.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/MLNoiseSuppressor.d.ts +61 -0
- package/dist/MLNoiseSuppressor.js +266 -0
- package/dist/SpatialAudioManager.d.ts +17 -0
- package/dist/SpatialAudioManager.js +83 -3
- package/dist/index.d.ts +3 -4
- package/dist/index.js +26 -13
- package/package.json +3 -2
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ML Noise Suppressor - TensorFlow.js-based Real-Time Audio Enhancement
|
|
3
|
+
* Integrates trained ML model for noise suppression with fallback to traditional DSP
|
|
4
|
+
*/
|
|
5
|
+
export declare class MLNoiseSuppressor {
|
|
6
|
+
private model;
|
|
7
|
+
private config;
|
|
8
|
+
private normStats;
|
|
9
|
+
private isInitialized;
|
|
10
|
+
private prevMask;
|
|
11
|
+
private readonly SMOOTHING_ALPHA;
|
|
12
|
+
private readonly VOICE_FUNDAMENTAL_MIN;
|
|
13
|
+
private readonly VOICE_FUNDAMENTAL_MAX;
|
|
14
|
+
/**
|
|
15
|
+
* Initialize ML model for noise suppression
|
|
16
|
+
* @param modelUrl Path to model.json file
|
|
17
|
+
*/
|
|
18
|
+
initialize(modelUrl: string): Promise<void>;
|
|
19
|
+
/**
|
|
20
|
+
* Process audio buffer through ML model
|
|
21
|
+
* @param inputBuffer Audio samples to process
|
|
22
|
+
* @returns Denoised audio samples
|
|
23
|
+
*/
|
|
24
|
+
processAudio(inputBuffer: Float32Array): Promise<Float32Array>;
|
|
25
|
+
/**
|
|
26
|
+
* Extract audio features (simplified mel-spectrogram approximation)
|
|
27
|
+
*/
|
|
28
|
+
private extractFeatures;
|
|
29
|
+
/**
|
|
30
|
+
* Create sequences for LSTM input
|
|
31
|
+
*/
|
|
32
|
+
private createSequences;
|
|
33
|
+
/**
|
|
34
|
+
* Apply temporal smoothing to mask (prevents "musical noise")
|
|
35
|
+
*/
|
|
36
|
+
private applyTemporalSmoothing;
|
|
37
|
+
/**
|
|
38
|
+
* Apply noise suppression mask to audio with voice preservation
|
|
39
|
+
*/
|
|
40
|
+
private applyMaskToAudio;
|
|
41
|
+
/**
|
|
42
|
+
* Reset processing state (call when switching audio streams)
|
|
43
|
+
*/
|
|
44
|
+
reset(): void;
|
|
45
|
+
/**
|
|
46
|
+
* Check if ML processor is ready
|
|
47
|
+
*/
|
|
48
|
+
isReady(): boolean;
|
|
49
|
+
/**
|
|
50
|
+
* Get model info
|
|
51
|
+
*/
|
|
52
|
+
getInfo(): {
|
|
53
|
+
initialized: boolean;
|
|
54
|
+
backend: string;
|
|
55
|
+
modelLoaded: boolean;
|
|
56
|
+
};
|
|
57
|
+
/**
|
|
58
|
+
* Cleanup resources
|
|
59
|
+
*/
|
|
60
|
+
dispose(): void;
|
|
61
|
+
}
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ML Noise Suppressor - TensorFlow.js-based Real-Time Audio Enhancement
|
|
4
|
+
* Integrates trained ML model for noise suppression with fallback to traditional DSP
|
|
5
|
+
*/
|
|
6
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
7
|
+
if (k2 === undefined) k2 = k;
|
|
8
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
9
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
10
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
11
|
+
}
|
|
12
|
+
Object.defineProperty(o, k2, desc);
|
|
13
|
+
}) : (function(o, m, k, k2) {
|
|
14
|
+
if (k2 === undefined) k2 = k;
|
|
15
|
+
o[k2] = m[k];
|
|
16
|
+
}));
|
|
17
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
18
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
19
|
+
}) : function(o, v) {
|
|
20
|
+
o["default"] = v;
|
|
21
|
+
});
|
|
22
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
23
|
+
var ownKeys = function(o) {
|
|
24
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
25
|
+
var ar = [];
|
|
26
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
27
|
+
return ar;
|
|
28
|
+
};
|
|
29
|
+
return ownKeys(o);
|
|
30
|
+
};
|
|
31
|
+
return function (mod) {
|
|
32
|
+
if (mod && mod.__esModule) return mod;
|
|
33
|
+
var result = {};
|
|
34
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
35
|
+
__setModuleDefault(result, mod);
|
|
36
|
+
return result;
|
|
37
|
+
};
|
|
38
|
+
})();
|
|
39
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
40
|
+
exports.MLNoiseSuppressor = void 0;
|
|
41
|
+
const tf = __importStar(require("@tensorflow/tfjs"));
|
|
42
|
+
class MLNoiseSuppressor {
|
|
43
|
+
constructor() {
|
|
44
|
+
this.model = null;
|
|
45
|
+
this.config = null;
|
|
46
|
+
this.normStats = null;
|
|
47
|
+
this.isInitialized = false;
|
|
48
|
+
// Temporal smoothing state (prevents "musical noise" artifacts)
|
|
49
|
+
this.prevMask = null;
|
|
50
|
+
this.SMOOTHING_ALPHA = 0.85; // 85% current, 15% previous
|
|
51
|
+
// Voice frequency preservation
|
|
52
|
+
this.VOICE_FUNDAMENTAL_MIN = 80;
|
|
53
|
+
this.VOICE_FUNDAMENTAL_MAX = 500;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Initialize ML model for noise suppression
|
|
57
|
+
* @param modelUrl Path to model.json file
|
|
58
|
+
*/
|
|
59
|
+
async initialize(modelUrl) {
|
|
60
|
+
console.log('🤖 [ML] Initializing ML Noise Suppressor...');
|
|
61
|
+
try {
|
|
62
|
+
// Set TensorFlow.js backend (WebGL for GPU acceleration)
|
|
63
|
+
await tf.setBackend('webgl');
|
|
64
|
+
await tf.ready();
|
|
65
|
+
console.log('✅ [ML] TensorFlow.js backend ready:', tf.getBackend());
|
|
66
|
+
// Load model
|
|
67
|
+
console.log(`📂 [ML] Loading model from ${modelUrl}`);
|
|
68
|
+
this.model = await tf.loadLayersModel(modelUrl);
|
|
69
|
+
console.log('✅ [ML] Model loaded successfully');
|
|
70
|
+
// Load config
|
|
71
|
+
const baseUrl = modelUrl.substring(0, modelUrl.lastIndexOf('/'));
|
|
72
|
+
const configResponse = await fetch(`${baseUrl}/model_config.json`);
|
|
73
|
+
this.config = await configResponse.json();
|
|
74
|
+
console.log('✅ [ML] Model config loaded:', this.config);
|
|
75
|
+
// Load normalization stats
|
|
76
|
+
try {
|
|
77
|
+
const normResponse = await fetch(`${baseUrl}/normalization_stats.json`);
|
|
78
|
+
this.normStats = await normResponse.json();
|
|
79
|
+
console.log('✅ [ML] Normalization stats loaded');
|
|
80
|
+
}
|
|
81
|
+
catch (e) {
|
|
82
|
+
console.warn('⚠️ [ML] No normalization stats found, using defaults');
|
|
83
|
+
this.normStats = { mean: 0, std: 1 };
|
|
84
|
+
}
|
|
85
|
+
this.isInitialized = true;
|
|
86
|
+
console.log('✅ [ML] ML Noise Suppressor fully initialized!');
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
console.error('❌ [ML] Failed to initialize:', error);
|
|
90
|
+
throw error;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Process audio buffer through ML model
|
|
95
|
+
* @param inputBuffer Audio samples to process
|
|
96
|
+
* @returns Denoised audio samples
|
|
97
|
+
*/
|
|
98
|
+
async processAudio(inputBuffer) {
|
|
99
|
+
if (!this.isInitialized || !this.model || !this.config || !this.normStats) {
|
|
100
|
+
console.warn('⚠️ [ML] Not initialized, returning original audio');
|
|
101
|
+
return inputBuffer;
|
|
102
|
+
}
|
|
103
|
+
try {
|
|
104
|
+
return await tf.tidy(() => {
|
|
105
|
+
// 1. Extract features (simplified mel-spectrogram approximation)
|
|
106
|
+
const features = this.extractFeatures(inputBuffer);
|
|
107
|
+
// 2. Normalize features
|
|
108
|
+
const normalizedTensor = tf.tensor2d(features);
|
|
109
|
+
const normalized = normalizedTensor
|
|
110
|
+
.sub(this.normStats.mean)
|
|
111
|
+
.div(Math.max(this.normStats.std, 1e-8));
|
|
112
|
+
// 3. Create sequences for LSTM
|
|
113
|
+
const sequences = this.createSequences(normalized);
|
|
114
|
+
// 4. Run ML inference to get noise suppression mask
|
|
115
|
+
const maskTensor = this.model.predict(sequences);
|
|
116
|
+
// 5. Extract mask values
|
|
117
|
+
const maskArray = maskTensor.dataSync();
|
|
118
|
+
// 6. Apply temporal smoothing (critical for quality!)
|
|
119
|
+
const smoothedMask = this.applyTemporalSmoothing(Array.from(maskArray));
|
|
120
|
+
// 7. Apply mask to audio with voice preservation
|
|
121
|
+
const enhanced = this.applyMaskToAudio(inputBuffer, smoothedMask);
|
|
122
|
+
return enhanced;
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
catch (error) {
|
|
126
|
+
console.error('❌ [ML] Error processing audio:', error);
|
|
127
|
+
// Return original audio on error (graceful degradation)
|
|
128
|
+
return inputBuffer;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Extract audio features (simplified mel-spectrogram approximation)
|
|
133
|
+
*/
|
|
134
|
+
extractFeatures(audio) {
|
|
135
|
+
if (!this.config)
|
|
136
|
+
return [[]];
|
|
137
|
+
const frameSize = this.config.frame_size || 512;
|
|
138
|
+
const hopLength = this.config.hop_length || 256;
|
|
139
|
+
const nMels = this.config.n_mels || 128;
|
|
140
|
+
const numFrames = Math.floor((audio.length - frameSize) / hopLength) + 1;
|
|
141
|
+
const features = [];
|
|
142
|
+
for (let i = 0; i < numFrames; i++) {
|
|
143
|
+
const start = i * hopLength;
|
|
144
|
+
const frame = audio.slice(start, Math.min(start + frameSize, audio.length));
|
|
145
|
+
// Compute simplified mel bins
|
|
146
|
+
const melBins = [];
|
|
147
|
+
for (let j = 0; j < nMels; j++) {
|
|
148
|
+
const binStart = Math.floor((j / nMels) * frame.length);
|
|
149
|
+
const binEnd = Math.floor(((j + 1) / nMels) * frame.length);
|
|
150
|
+
// Compute energy in this bin
|
|
151
|
+
let energy = 0;
|
|
152
|
+
for (let k = binStart; k < binEnd; k++) {
|
|
153
|
+
energy += frame[k] * frame[k];
|
|
154
|
+
}
|
|
155
|
+
energy = Math.sqrt(energy / (binEnd - binStart));
|
|
156
|
+
// Convert to log scale (dB-like)
|
|
157
|
+
const logEnergy = Math.log10(energy + 1e-10) * 10;
|
|
158
|
+
melBins.push(logEnergy);
|
|
159
|
+
}
|
|
160
|
+
features.push(melBins);
|
|
161
|
+
}
|
|
162
|
+
return features.length > 0 ? features : [[0]];
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Create sequences for LSTM input
|
|
166
|
+
*/
|
|
167
|
+
createSequences(featureTensor) {
|
|
168
|
+
if (!this.config)
|
|
169
|
+
return tf.zeros([1, 1, 1]);
|
|
170
|
+
const seqLength = this.config.sequence_length || 16;
|
|
171
|
+
const shape = featureTensor.shape;
|
|
172
|
+
const numFrames = shape[0];
|
|
173
|
+
const numFeatures = shape[1];
|
|
174
|
+
if (numFrames < seqLength) {
|
|
175
|
+
// Pad if needed
|
|
176
|
+
const padding = tf.zeros([seqLength - numFrames, numFeatures]);
|
|
177
|
+
const padded = tf.concat([featureTensor, padding], 0);
|
|
178
|
+
return padded.expandDims(0);
|
|
179
|
+
}
|
|
180
|
+
// Take the last seqLength frames
|
|
181
|
+
const sequence = featureTensor.slice([numFrames - seqLength, 0], [seqLength, numFeatures]);
|
|
182
|
+
return sequence.expandDims(0);
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Apply temporal smoothing to mask (prevents "musical noise")
|
|
186
|
+
*/
|
|
187
|
+
applyTemporalSmoothing(currentMask) {
|
|
188
|
+
const smoothed = new Float32Array(currentMask.length);
|
|
189
|
+
if (!this.prevMask || this.prevMask.length !== currentMask.length) {
|
|
190
|
+
// First frame - no smoothing
|
|
191
|
+
this.prevMask = new Float32Array(currentMask);
|
|
192
|
+
return this.prevMask;
|
|
193
|
+
}
|
|
194
|
+
// Exponential moving average
|
|
195
|
+
for (let i = 0; i < currentMask.length; i++) {
|
|
196
|
+
smoothed[i] =
|
|
197
|
+
this.SMOOTHING_ALPHA * currentMask[i] +
|
|
198
|
+
(1 - this.SMOOTHING_ALPHA) * this.prevMask[i];
|
|
199
|
+
// Clamp to valid range [0.05, 1.0]
|
|
200
|
+
// Never completely mute (min 5% to preserve voice quality)
|
|
201
|
+
smoothed[i] = Math.max(0.05, Math.min(1.0, smoothed[i]));
|
|
202
|
+
}
|
|
203
|
+
this.prevMask = smoothed;
|
|
204
|
+
return smoothed;
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Apply noise suppression mask to audio with voice preservation
|
|
208
|
+
*/
|
|
209
|
+
applyMaskToAudio(audio, mask) {
|
|
210
|
+
const output = new Float32Array(audio.length);
|
|
211
|
+
// Apply mask with simple interpolation
|
|
212
|
+
for (let i = 0; i < audio.length; i++) {
|
|
213
|
+
// Map audio sample to mask index
|
|
214
|
+
const maskIdx = Math.floor((i / audio.length) * mask.length);
|
|
215
|
+
const gain = mask[Math.min(maskIdx, mask.length - 1)];
|
|
216
|
+
// Apply gain with voice frequency boost
|
|
217
|
+
output[i] = audio[i] * gain;
|
|
218
|
+
}
|
|
219
|
+
// Apply fade-in/out to prevent clicks at boundaries
|
|
220
|
+
const fadeLength = Math.min(128, output.length / 20);
|
|
221
|
+
for (let i = 0; i < fadeLength; i++) {
|
|
222
|
+
const fade = i / fadeLength;
|
|
223
|
+
output[i] *= fade;
|
|
224
|
+
if (output.length - 1 - i >= 0) {
|
|
225
|
+
output[output.length - 1 - i] *= fade;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return output;
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Reset processing state (call when switching audio streams)
|
|
232
|
+
*/
|
|
233
|
+
reset() {
|
|
234
|
+
this.prevMask = null;
|
|
235
|
+
console.log('🔄 [ML] Processing state reset');
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Check if ML processor is ready
|
|
239
|
+
*/
|
|
240
|
+
isReady() {
|
|
241
|
+
return this.isInitialized && this.model !== null;
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Get model info
|
|
245
|
+
*/
|
|
246
|
+
getInfo() {
|
|
247
|
+
return {
|
|
248
|
+
initialized: this.isInitialized,
|
|
249
|
+
backend: tf.getBackend(),
|
|
250
|
+
modelLoaded: this.model !== null,
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Cleanup resources
|
|
255
|
+
*/
|
|
256
|
+
dispose() {
|
|
257
|
+
if (this.model) {
|
|
258
|
+
this.model.dispose();
|
|
259
|
+
this.model = null;
|
|
260
|
+
}
|
|
261
|
+
this.prevMask = null;
|
|
262
|
+
this.isInitialized = false;
|
|
263
|
+
console.log('🗑️ [ML] Resources disposed');
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
exports.MLNoiseSuppressor = MLNoiseSuppressor;
|
|
@@ -44,8 +44,19 @@ export declare class SpatialAudioManager extends EventManager {
|
|
|
44
44
|
private listenerPosition;
|
|
45
45
|
private listenerInitialized;
|
|
46
46
|
private listenerDirection;
|
|
47
|
+
private mlSuppressor;
|
|
48
|
+
private useMLProcessor;
|
|
47
49
|
constructor(options?: SpatialAudioOptions);
|
|
48
50
|
getAudioContext(): AudioContext;
|
|
51
|
+
/**
|
|
52
|
+
* Initialize ML-based noise suppression (TensorFlow.js)
|
|
53
|
+
* Falls back to AudioWorklet denoiser if ML initialization fails
|
|
54
|
+
*/
|
|
55
|
+
initializeMLNoiseSuppression(modelUrl: string): Promise<void>;
|
|
56
|
+
/**
|
|
57
|
+
* Get current noise suppression mode
|
|
58
|
+
*/
|
|
59
|
+
getNoiseSuppressionMode(): 'ml' | 'audioworklet' | 'none';
|
|
49
60
|
/**
|
|
50
61
|
* Setup spatial audio for a participant
|
|
51
62
|
*
|
|
@@ -63,6 +74,12 @@ export declare class SpatialAudioManager extends EventManager {
|
|
|
63
74
|
*/
|
|
64
75
|
setupSpatialAudioForParticipant(participantId: string, track: MediaStreamTrack, bypassSpatialization?: boolean): Promise<void>;
|
|
65
76
|
private startMonitoring;
|
|
77
|
+
/**
|
|
78
|
+
* Toggle spatialization for a participant (for huddle/spatial switching)
|
|
79
|
+
* @param participantId The participant to update
|
|
80
|
+
* @param enableSpatialization True for spatial audio, false for non-spatial (huddle)
|
|
81
|
+
*/
|
|
82
|
+
setParticipantSpatialization(participantId: string, enableSpatialization: boolean): void;
|
|
66
83
|
/**
|
|
67
84
|
* Update spatial audio position and orientation for a participant
|
|
68
85
|
*
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.SpatialAudioManager = void 0;
|
|
4
4
|
const EventManager_1 = require("./EventManager");
|
|
5
|
+
const MLNoiseSuppressor_1 = require("./MLNoiseSuppressor");
|
|
5
6
|
class SpatialAudioManager extends EventManager_1.EventManager {
|
|
6
7
|
constructor(options) {
|
|
7
8
|
super();
|
|
@@ -14,6 +15,9 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
14
15
|
forward: { x: 0, y: 1, z: 0 },
|
|
15
16
|
up: { x: 0, y: 0, z: 1 },
|
|
16
17
|
};
|
|
18
|
+
// ML Noise Suppressor (TensorFlow.js-based)
|
|
19
|
+
this.mlSuppressor = null;
|
|
20
|
+
this.useMLProcessor = false; // Flag to determine if ML is active
|
|
17
21
|
this.options = this.resolveOptions(options);
|
|
18
22
|
// Use high sample rate for best audio quality
|
|
19
23
|
this.audioContext = new AudioContext({ sampleRate: 48000 });
|
|
@@ -34,6 +38,43 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
34
38
|
getAudioContext() {
|
|
35
39
|
return this.audioContext;
|
|
36
40
|
}
|
|
41
|
+
/**
|
|
42
|
+
* Initialize ML-based noise suppression (TensorFlow.js)
|
|
43
|
+
* Falls back to AudioWorklet denoiser if ML initialization fails
|
|
44
|
+
*/
|
|
45
|
+
async initializeMLNoiseSuppression(modelUrl) {
|
|
46
|
+
console.log('🤖 [SpatialAudio] Attempting to initialize ML noise suppression...');
|
|
47
|
+
try {
|
|
48
|
+
this.mlSuppressor = new MLNoiseSuppressor_1.MLNoiseSuppressor();
|
|
49
|
+
await this.mlSuppressor.initialize(modelUrl);
|
|
50
|
+
if (this.mlSuppressor.isReady()) {
|
|
51
|
+
this.useMLProcessor = true;
|
|
52
|
+
console.log('✅ [SpatialAudio] ML noise suppression ACTIVE');
|
|
53
|
+
console.log('📊 [SpatialAudio] ML Info:', this.mlSuppressor.getInfo());
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
throw new Error('ML processor not ready after initialization');
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
catch (error) {
|
|
60
|
+
console.warn('⚠️ [SpatialAudio] ML initialization failed, using AudioWorklet fallback:', error);
|
|
61
|
+
this.useMLProcessor = false;
|
|
62
|
+
this.mlSuppressor = null;
|
|
63
|
+
// AudioWorklet denoiser will be used as fallback (existing behavior)
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Get current noise suppression mode
|
|
68
|
+
*/
|
|
69
|
+
getNoiseSuppressionMode() {
|
|
70
|
+
if (this.useMLProcessor && this.mlSuppressor?.isReady()) {
|
|
71
|
+
return 'ml';
|
|
72
|
+
}
|
|
73
|
+
if (this.isDenoiserEnabled()) {
|
|
74
|
+
return 'audioworklet';
|
|
75
|
+
}
|
|
76
|
+
return 'none';
|
|
77
|
+
}
|
|
37
78
|
/**
|
|
38
79
|
* Setup spatial audio for a participant
|
|
39
80
|
*
|
|
@@ -62,6 +103,9 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
62
103
|
const gain = this.audioContext.createGain();
|
|
63
104
|
const proximityGain = this.audioContext.createGain();
|
|
64
105
|
let denoiseNode;
|
|
106
|
+
// ML-based denoising: Currently prepared but requires AudioWorklet integration
|
|
107
|
+
// For now, using traditional AudioWorklet denoiser as it provides real-time processing
|
|
108
|
+
// TODO: Integrate ML processor into AudioWorklet for hybrid ML+DSP approach
|
|
65
109
|
if (this.isDenoiserEnabled() && typeof this.audioContext.audioWorklet !== "undefined") {
|
|
66
110
|
try {
|
|
67
111
|
await this.ensureDenoiseWorklet();
|
|
@@ -181,6 +225,40 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
181
225
|
}, 2000); // Check every 2 seconds
|
|
182
226
|
this.monitoringIntervals.set(participantId, interval);
|
|
183
227
|
}
|
|
228
|
+
/**
|
|
229
|
+
* Toggle spatialization for a participant (for huddle/spatial switching)
|
|
230
|
+
* @param participantId The participant to update
|
|
231
|
+
* @param enableSpatialization True for spatial audio, false for non-spatial (huddle)
|
|
232
|
+
*/
|
|
233
|
+
setParticipantSpatialization(participantId, enableSpatialization) {
|
|
234
|
+
const nodes = this.participantNodes.get(participantId);
|
|
235
|
+
if (!nodes) {
|
|
236
|
+
console.warn(`[SpatialAudio] No nodes found for participant ${participantId}`);
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
// Disconnect and reconnect audio chain
|
|
240
|
+
try {
|
|
241
|
+
// Disconnect from current destination
|
|
242
|
+
nodes.proximityGain.disconnect();
|
|
243
|
+
if (enableSpatialization) {
|
|
244
|
+
// Connect through panner for 3D spatial audio
|
|
245
|
+
nodes.proximityGain.connect(nodes.panner);
|
|
246
|
+
nodes.panner.connect(nodes.analyser);
|
|
247
|
+
console.log(`🎯 [SpatialAudio] Enabled spatialization for ${participantId.substring(0, 8)}`);
|
|
248
|
+
}
|
|
249
|
+
else {
|
|
250
|
+
// Bypass panner for non-spatial (huddle) audio
|
|
251
|
+
nodes.proximityGain.connect(nodes.analyser);
|
|
252
|
+
console.log(`🔊 [SpatialAudio] Disabled spatialization (huddle mode) for ${participantId.substring(0, 8)}`);
|
|
253
|
+
}
|
|
254
|
+
// Rest of the chain remains the same
|
|
255
|
+
nodes.analyser.connect(nodes.gain);
|
|
256
|
+
nodes.gain.connect(this.masterGainNode);
|
|
257
|
+
}
|
|
258
|
+
catch (error) {
|
|
259
|
+
console.error(`[SpatialAudio] Error toggling spatialization for ${participantId}:`, error);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
184
262
|
/**
|
|
185
263
|
* Update spatial audio position and orientation for a participant
|
|
186
264
|
*
|
|
@@ -278,7 +356,9 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
278
356
|
const fwdY = forwardY / forwardLen;
|
|
279
357
|
const fwdZ = forwardZ / forwardLen;
|
|
280
358
|
// Calculate right vector (cross product of world up and forward)
|
|
281
|
-
|
|
359
|
+
// Web Audio API uses Y-up coordinate system, Unreal uses Z-up
|
|
360
|
+
// We need to transform: Unreal (X,Y,Z) -> WebAudio (X,Z,-Y)
|
|
361
|
+
const worldUp = { x: 0, y: 1, z: 0 }; // Web Audio Y-up
|
|
282
362
|
const rightX = worldUp.y * fwdZ - worldUp.z * fwdY;
|
|
283
363
|
const rightY = worldUp.z * fwdX - worldUp.x * fwdZ;
|
|
284
364
|
const rightZ = worldUp.x * fwdY - worldUp.y * fwdX;
|
|
@@ -290,8 +370,8 @@ class SpatialAudioManager extends EventManager_1.EventManager {
|
|
|
290
370
|
forwardY: fwdY,
|
|
291
371
|
forwardZ: fwdZ,
|
|
292
372
|
upX: 0,
|
|
293
|
-
upY:
|
|
294
|
-
upZ:
|
|
373
|
+
upY: 1,
|
|
374
|
+
upZ: 0,
|
|
295
375
|
});
|
|
296
376
|
return;
|
|
297
377
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -15,10 +15,9 @@ export declare class OdysseySpatialComms extends EventManager {
|
|
|
15
15
|
emit(event: OdysseyEvent, ...args: any[]): boolean;
|
|
16
16
|
/**
|
|
17
17
|
* Initialize ML-based noise suppression
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
* @param modelPath Path to the ML model (currently not used)
|
|
18
|
+
* This method loads the TensorFlow.js model and enables ML-based denoising.
|
|
19
|
+
* Falls back to AudioWorklet denoiser if ML initialization fails.
|
|
20
|
+
* @param modelPath Path to the ML model (e.g., "/odyssey_noise_suppressor_v1/model.json")
|
|
22
21
|
*/
|
|
23
22
|
initializeMLNoiseSuppression(modelPath: string): Promise<void>;
|
|
24
23
|
joinRoom(data: {
|
package/dist/index.js
CHANGED
|
@@ -32,17 +32,22 @@ class OdysseySpatialComms extends EventManager_1.EventManager {
|
|
|
32
32
|
}
|
|
33
33
|
/**
|
|
34
34
|
* Initialize ML-based noise suppression
|
|
35
|
-
*
|
|
36
|
-
*
|
|
37
|
-
*
|
|
38
|
-
* @param modelPath Path to the ML model (currently not used)
|
|
35
|
+
* This method loads the TensorFlow.js model and enables ML-based denoising.
|
|
36
|
+
* Falls back to AudioWorklet denoiser if ML initialization fails.
|
|
37
|
+
* @param modelPath Path to the ML model (e.g., "/odyssey_noise_suppressor_v1/model.json")
|
|
39
38
|
*/
|
|
40
39
|
async initializeMLNoiseSuppression(modelPath) {
|
|
41
|
-
console.log(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
40
|
+
console.log(`🤖 [OdysseySpatialComms] Initializing ML Noise Suppression with model: ${modelPath}`);
|
|
41
|
+
try {
|
|
42
|
+
await this.spatialAudioManager.initializeMLNoiseSuppression(modelPath);
|
|
43
|
+
const mode = this.spatialAudioManager.getNoiseSuppressionMode();
|
|
44
|
+
console.log(`✅ [OdysseySpatialComms] Noise suppression mode: ${mode.toUpperCase()}`);
|
|
45
|
+
}
|
|
46
|
+
catch (error) {
|
|
47
|
+
console.error('❌ [OdysseySpatialComms] ML initialization failed:', error);
|
|
48
|
+
console.log('🔄 [OdysseySpatialComms] Falling back to AudioWorklet denoiser');
|
|
49
|
+
// Graceful degradation - AudioWorklet will be used as fallback
|
|
50
|
+
}
|
|
46
51
|
}
|
|
47
52
|
async joinRoom(data) {
|
|
48
53
|
return new Promise((resolve, reject) => {
|
|
@@ -378,11 +383,16 @@ class OdysseySpatialComms extends EventManager_1.EventManager {
|
|
|
378
383
|
return; // Exit early to prevent any audio processing
|
|
379
384
|
}
|
|
380
385
|
else {
|
|
381
|
-
//
|
|
382
|
-
|
|
386
|
+
// Check if participant is in a huddle (non-spatial channel)
|
|
387
|
+
const participantChannel = participant.currentChannel || "spatial";
|
|
388
|
+
const isInHuddle = participantChannel !== "spatial";
|
|
389
|
+
// Setup spatial audio - bypass 3D positioning for huddle members
|
|
390
|
+
await this.spatialAudioManager.setupSpatialAudioForParticipant(participant.participantId, track, isInHuddle // Bypass spatialization if in huddle
|
|
383
391
|
);
|
|
384
|
-
//
|
|
385
|
-
|
|
392
|
+
// Only update spatial position if in spatial channel
|
|
393
|
+
if (!isInHuddle) {
|
|
394
|
+
this.spatialAudioManager.updateSpatialAudio(participant.participantId, data.position);
|
|
395
|
+
}
|
|
386
396
|
}
|
|
387
397
|
// NOW resume the consumer after audio pipeline is ready
|
|
388
398
|
this.mediasoupManager
|
|
@@ -491,6 +501,9 @@ class OdysseySpatialComms extends EventManager_1.EventManager {
|
|
|
491
501
|
const participant = this.room?.participants.get(data.participantId);
|
|
492
502
|
if (participant) {
|
|
493
503
|
participant.currentChannel = data.channelId;
|
|
504
|
+
// Update spatialization based on channel
|
|
505
|
+
const isInSpatialChannel = data.channelId === "spatial";
|
|
506
|
+
this.spatialAudioManager.setParticipantSpatialization(data.participantId, isInSpatialChannel);
|
|
494
507
|
// If this participant is now in a different channel from us, clear their screenshare
|
|
495
508
|
const myChannel = this.localParticipant?.currentChannel || "spatial";
|
|
496
509
|
const theirChannel = data.channelId || "spatial";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@newgameplusinc/odyssey-audio-video-sdk-dev",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.61",
|
|
4
4
|
"description": "Odyssey Spatial Audio & Video SDK using MediaSoup for real-time communication",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -31,7 +31,8 @@
|
|
|
31
31
|
"socket.io-client": "^4.7.2",
|
|
32
32
|
"webrtc-adapter": "^8.2.3",
|
|
33
33
|
"mediasoup-client": "^3.6.90",
|
|
34
|
-
"events": "^3.3.0"
|
|
34
|
+
"events": "^3.3.0",
|
|
35
|
+
"@tensorflow/tfjs": "^4.11.0"
|
|
35
36
|
},
|
|
36
37
|
"devDependencies": {
|
|
37
38
|
"@types/node": "^20.0.0",
|