@newgameplusinc/alpha-spatial-comms 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,1092 @@
1
+ # odyssey-spatial-comms
2
+
3
+ > WebRTC spatial audio & video SDK — built on mediasoup, Web Audio API, and TensorFlow noise suppression.
4
+
5
+ [![npm version](https://img.shields.io/npm/v/odyssey-spatial-comms)](https://www.npmjs.com/package/odyssey-spatial-comms)
6
+ [![license](https://img.shields.io/npm/l/odyssey-spatial-comms)](LICENSE)
7
+
8
+ ---
9
+
10
+ ## Installation
11
+
12
+ **npm**
13
+ ```bash
14
+ npm install odyssey-spatial-comms
15
+ ```
16
+
17
+ **yarn**
18
+ ```bash
19
+ yarn add odyssey-spatial-comms
20
+ ```
21
+
22
+ **pnpm**
23
+ ```bash
24
+ pnpm add odyssey-spatial-comms
25
+ ```
26
+
27
+ ---
28
+
29
+ ## Quick Start
30
+
31
+ ### React / Next.js
32
+ ```tsx
33
+ import { SpatialCommsSDK } from 'odyssey-spatial-comms';
34
+
35
+ // Create once when the user enters a space — type is inferred automatically
36
+ const client = SpatialCommsSDK.create('https://your-mediasoup-server.com');
37
+
38
+ // Resume audio context on first user gesture (required by all browsers)
39
+ await client.resumeAudio();
40
+
41
+ // Listen for events
42
+ client.on('room-joined', (data) => console.log('joined', data));
43
+ client.on('participant-joined', (p) => console.log('user joined', p.userId));
44
+
45
+ // Join a room
46
+ await client.joinRoom({
47
+ roomId: 'my-room',
48
+ userId: 'user-123',
49
+ deviceId: 'device-123',
50
+ position: { x: 0, y: 0, z: 0 },
51
+ direction: { x: 0, y: 0, z: 1 },
52
+ userName: 'Alice', // optional — shown to other participants
53
+ userEmail: 'alice@co.com', // optional
54
+ });
55
+
56
+ // Publish microphone
57
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
58
+ await client.produceTrack(stream.getAudioTracks()[0]);
59
+
60
+ // Update 3D position every time the user moves (~10Hz)
61
+ client.updatePosition(
62
+ { x: 1.0, y: 0.0, z: 5.0 }, // world position in meters
63
+ { x: 0, y: 0, z: 1 }, // forward direction (unit vector)
64
+ { rot: { x: 0, y: 45, z: 0 } } // rotation: pitch, yaw, roll (degrees)
65
+ );
66
+
67
+ // Leave
68
+ client.leaveRoom();
69
+ ```
70
+
71
+ ### Vue 3 (Composition API)
72
+ ```ts
73
+ import { SpatialCommsSDK } from 'odyssey-spatial-comms';
74
+ import type { OdysseySpatialCommsHandle } from 'odyssey-spatial-comms';
75
+ import { onMounted, onUnmounted, ref } from 'vue';
76
+
77
+ const client = ref<OdysseySpatialCommsHandle | null>(null);
78
+
79
+ onMounted(() => {
80
+ client.value = SpatialCommsSDK.create('https://your-mediasoup-server.com');
81
+
82
+ client.value.on('room-joined', () => console.log('connected!'));
83
+ client.value.on('participant-joined', (p) => console.log('user joined', p.userId));
84
+
85
+ client.value.joinRoom({
86
+ roomId: 'my-room',
87
+ userId: 'user-123',
88
+ deviceId: 'device-123',
89
+ position: { x: 0, y: 0, z: 0 },
90
+ direction: { x: 0, y: 0, z: 1 },
91
+ });
92
+ });
93
+
94
+ onUnmounted(() => client.value?.leaveRoom());
95
+ ```
96
+
97
+ ### Plain TypeScript / Vanilla JS
98
+ ```ts
99
+ import { SpatialCommsSDK } from 'odyssey-spatial-comms';
100
+
101
+ const client = SpatialCommsSDK.create('https://your-mediasoup-server.com');
102
+
103
+ client.on('room-joined', () => console.log('connected!'));
104
+
105
+ await client.joinRoom({
106
+ roomId: 'lobby',
107
+ userId: 'user-1',
108
+ deviceId: 'device-1',
109
+ position: { x: 0, y: 0, z: 0 },
110
+ direction: { x: 0, y: 0, z: 1 },
111
+ });
112
+ ```
113
+
114
+ ### Next.js — dynamic import (browser-only SDK)
115
+ ```ts
116
+ // pages/space.tsx or app/space/page.tsx
117
+ import dynamic from 'next/dynamic';
118
+
119
+ // Must be dynamic — SDK uses Web Audio API (browser only)
120
+ const SpaceRoom = dynamic(() => import('../components/SpaceRoom'), { ssr: false });
121
+ ```
122
+
123
+ ```ts
124
+ // components/SpaceRoom.tsx
125
+ import { useEffect, useRef } from 'react';
126
+ import { SpatialCommsSDK } from 'odyssey-spatial-comms';
127
+ import type { OdysseySpatialCommsHandle } from 'odyssey-spatial-comms';
128
+
129
+ export default function SpaceRoom() {
130
+ const clientRef = useRef<OdysseySpatialCommsHandle | null>(null);
131
+
132
+ useEffect(() => {
133
+ const client = SpatialCommsSDK.create('https://your-server.com');
134
+ clientRef.current = client;
135
+
136
+ client.on('room-joined', async () => {
137
+ await client.resumeAudio();
138
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
139
+ await client.produceTrack(stream.getAudioTracks()[0]);
140
+ });
141
+
142
+ client.joinRoom({
143
+ roomId: 'space-1',
144
+ userId: 'user-1',
145
+ deviceId: 'dev-1',
146
+ position: { x: 0, y: 0, z: 0 },
147
+ direction: { x: 0, y: 0, z: 1 },
148
+ });
149
+
150
+ return () => { client.leaveRoom(); };
151
+ }, []);
152
+
153
+ return <div>Space loaded</div>;
154
+ }
155
+ ```
156
+
157
+ ### TypeScript — if you need the type explicitly
158
+ ```ts
159
+ import { SpatialCommsSDK } from 'odyssey-spatial-comms';
160
+ import type { OdysseySpatialCommsHandle } from 'odyssey-spatial-comms';
161
+
162
+ // Option A: let TypeScript infer it (recommended — no import needed)
163
+ const client = SpatialCommsSDK.create('https://your-server.com');
164
+
165
+ // Option B: explicit type annotation (only needed for refs/stores)
166
+ const clientRef: OdysseySpatialCommsHandle | null = null;
167
+
168
+ // Option C: derive the type from the factory (no import needed)
169
+ type OdysseyClient = ReturnType<typeof SpatialCommsSDK.create>;
170
+ ```
171
+
172
+ ---
173
+
174
+ ## Peer Dependencies
175
+
176
+ These are **not bundled** — install them in your own project:
177
+
178
+ ```bash
179
+ npm install socket.io-client mediasoup-client @tensorflow/tfjs webrtc-adapter
180
+ ```
181
+
182
+ ---
183
+
184
+ ## Key API
185
+
186
+ | Method | Description |
187
+ |---|---|
188
+ | `SpatialCommsSDK.create(serverUrl)` | Create a new SDK client |
189
+ | `client.joinRoom({ roomId, userId, deviceId, position, direction })` | Join a room |
190
+ | `client.leaveRoom()` | Leave the current room |
191
+ | `client.updatePosition(position, direction)` | Update 3D position in space |
192
+ | `client.produceTrack(track, appData?)` | Publish a local audio/video track |
193
+ | `client.setMasterMuted(muted)` | Mute/unmute all incoming audio |
194
+ | `client.initializeMLNoiseSuppression(modelPath)` | Enable TF.js noise suppression |
195
+ | `client.setListenerPosition(pos, orientation)` | Set spatial audio listener |
196
+ | `client.on(event, handler)` | Subscribe to SDK events |
197
+ | `client.off(event, handler)` | Unsubscribe from SDK events |
198
+
199
+ ---
200
+
201
+ ## Events
202
+
203
+ ```ts
204
+ client.on('room-joined', (data: RoomJoinedData) => { ... });
205
+ client.on('participant-joined', (p: Participant) => { ... });
206
+ client.on('participant-left', (p: Participant) => { ... });
207
+ client.on('participant-updated', (p: Participant) => { ... });
208
+ client.on('track-added', ({ participantId, track, kind }) => { ... });
209
+ client.on('huddle-invite', ({ from }) => { ... });
210
+ client.on('space-live-started', (data) => { ... });
211
+ client.on('space-live-stopped', () => { ... });
212
+ ```
213
+
214
+ ---
215
+
216
+ ## Complete Flow: Frontend → Server → SDK
217
+
218
+ ```
219
+ [1] UNREAL ENGINE
220
+ Sends: pos=(4130, 220, 700) cm (X=forward, Y=right, Z=up)
221
+
222
+
223
+
224
+
225
+ [2] FRONTEND (Vue / React)
226
+ Transform Unreal coords → Standard + cm → meters:
227
+
228
+ position = {
229
+ x: unrealPos.y / 100, // UE Y (right) → X (right) = 2.2m
230
+ y: unrealPos.z / 100, // UE Z (up) → Y (up) = 7.0m
231
+ z: unrealPos.x / 100 // UE X (forward) → Z (forward) = 41.3m
232
+ }
233
+
234
+ Calls:
235
+ sdk.updatePosition(position, direction, { rot })
236
+ sdk.setListenerFromLSD(position, cameraPos, lookAtPos, rot)
237
+
238
+
239
+
240
+
241
+ [3] SDK → SERVER (socket.emit "update-position")
242
+ Sends: { participantId, position, direction, rot }
243
+
244
+
245
+
246
+
247
+ [4] SERVER (pass-through mode)
248
+ 1. Receive position from client
249
+ 2. Auto-detect units: if maxAxis > 50 → divide by 100 (cm→m safety net)
250
+ 3. No smoothing: pass real-time position straight through
251
+ 4. Broadcast normalized position (meters) to all other clients
252
+
253
+
254
+
255
+
256
+ [5] SDK — receiving remote participant positions
257
+ socket.on("participant-position-updated") triggers:
258
+ 1. normalizePositionUnits() — backup unit check (maxAxis > 50 → /100)
259
+ 2. snapPosition() — ignore movements < 15cm (anti-jitter)
260
+ 3. computeHeadPosition() — add +1.6m Y for head height
261
+ 4. calculateLogarithmicGain()— cubic falloff 100%→0% over 0.5m→15m
262
+ 5. calculatePanning() — sin(atan2) projection onto listener right-ear axis
263
+ 6. Web Audio nodes updated — GainNode + StereoPannerNode ramped smoothly
264
+ ```
265
+
266
+ ---
267
+
268
+ ## What Happens on a Sudden 5m Position Jump
269
+
270
+ ```
271
+ SCENARIO: Person teleports from 2m away → 7m away instantly
272
+
273
+ Step 1 — SERVER detects jump > 5m
274
+ Lerps 30% toward new position each frame:
275
+
276
+ Frame 1: 2.0m → 3.5m (30% of 5m gap)
277
+ Frame 2: 3.5m → 4.55m
278
+ Frame 3: 4.55m → 5.29m
279
+ Frame 4: 5.29m → 5.80m
280
+ Frame N: ... converges to 7m
281
+
282
+ Step 2 — SDK receives the smoothed intermediate positions
283
+ Recalculates cubic gain for each step:
284
+ 3.5m → ~51% gain
285
+ 4.55m → ~38% gain
286
+ 5.29m → ~29% gain
287
+
288
+ Step 3 — Web Audio smooths each gain change
289
+ gainNode.gain.setTargetAtTime(newGain, now, 0.05)
290
+ ≈ 150ms smooth ramp per step → zero clicks or pops
291
+ ```
292
+
293
+ ## Coordinate System (World Space)
294
+
295
+ All spatial calculations are performed relative to a **world origin (datum)** at `(0, 0, 0)`:
296
+
297
+ ```
298
+ +Z (Forward/North)
299
+
300
+ 10 |
301
+ | B (15, 8) ← Speaker
302
+ 8 | /
303
+ | / 5.83m distance
304
+ 6 | /
305
+ | /
306
+ 5 | A (10, 5) ← YOU (Listener, facing 0°)
307
+ | ↑
308
+ 3 | | Your right ear →
309
+ | |
310
+ 1 |
311
+ |
312
+ 0 +--+--+--+--+--+--+--+--+--→ +X (Right/East)
313
+ 0 2 4 6 8 10 12 14 16
314
+
315
+ ↙ (into page)
316
+ +Y (Up/Height)
317
+ ```
318
+
319
+ **Key Points:**
320
+ - **Datum (0,0,0)**: World origin - all positions measured from here
321
+ - **X-axis**: Right/Left (positive = right, negative = left)
322
+ - **Y-axis**: Up/Down (height above ground)
323
+ - **Z-axis**: Forward/Back (positive = forward/north, negative = back/south)
324
+ - **Distance**: 3D Euclidean distance = `√(Δx² + Δy² + Δz²)`
325
+ - **Panning**: Calculated from X-Z plane position relative to listener rotation
326
+
327
+ **Coordinate Transform (Unreal → Standard):**
328
+ ```javascript
329
+ // Unreal: X=forward, Y=right, Z=up
330
+ // Standard SDK: X=right, Y=up, Z=forward
331
+ position = {
332
+ x: unrealPos.y / 100, // UE Y (right) → X (right)
333
+ y: unrealPos.z / 100, // UE Z (up) → Y (up)
334
+ z: unrealPos.x / 100 // UE X (forward) → Z (forward)
335
+ }
336
+ ```
337
+
338
+ ---
339
+
340
+ ## Feature Highlights
341
+
342
+ - 🔌 **One factory to rule it all** – `SpatialCommsSDK.create(serverUrl)` wires transports, producers, consumers, and room state.
343
+ - 🧭 **Accurate pose propagation** – `updatePosition()` streams listener pose to the SFU while `participant-position-updated` keeps the local store in sync.
344
+ - 🎧 **Studio-grade spatial audio** – each remote participant gets a dedicated Web Audio graph: `ML denoiser (ScriptProcessorNode) → limiter → high-pass → low-pass → stereo panner → adaptive gain → master compressor`. ML denoiser is a trained 3-layer GRU model (872K params, val_loss=0.1636) running fully client-side via TensorFlow.js.
345
+ - 🎚️ **Crystal-Clear Audio Processing** – A finely-tuned audio pipeline featuring a gentle compressor, multi-stage filtering, and a smart denoiser prevents audio dropouts and echo. The result is a more natural, continuous voice without distracting artifacts.
346
+ - 🧭 **Position-based spatial panning** – `updatePosition` forwards positions to Web Audio which calculates panning based on WHERE the speaker is relative to the listener (not which way they face). Uses listener's right-vector projection with 5m pan radius for natural left/right placement.
347
+ - 🤖 **ML Noise Suppression (Active)** – TensorFlow.js GRU model (`odyssey_adaptive_denoiser`) runs as a `ScriptProcessorNode` wired as the **first node** in every participant's audio chain. Loads non-blocking in the background; operates in pass-through mode until the model is ready, then switches to ML denoising automatically. No fallback — if it fails, the error is logged to console.
348
+ - 🔄 **ICE Connection Stability** – Automatic ICE restart on transport disconnect for robust connections. SDK requests ICE restart from server when transport enters `disconnected` state, enabling faster recovery from network issues without full reconnection.
349
+
350
+ ---
351
+
352
+ ## Audio Flow (Server ↔ Browser)
353
+
354
+ ```
355
+ ┌──────────────┐ update-position ┌──────────────┐ pose + tracks ┌──────────────────┐
356
+ │ Browser LSD │ ──────────────────▶ │ MediaSoup SFU│ ────────────────▶ │ SDK Event Bus │
357
+ │ (Unreal data)│ │ + Socket.IO │ │ (EventManager) │
358
+ └──────┬───────┘ └──────┬───────┘ └──────────┬────────┘
359
+ │ │ track + pose
360
+ │ │ ▼
361
+ │ ┌────────▼────────┐ ┌──────────────────┐
362
+ │ audio RTP │ consumer-created│ │ SpatialAudioMgr │
363
+ └──────────────────────────▶│ setup per-user │◀──────────────────────│ (Web Audio API) │
364
+ └────────┬────────┘ │ - Denoiser │
365
+ │ │ - HP / LP │
366
+ │ │ - StereoPanner │
367
+ ▼ │ - Gain + Comp │
368
+ Web Audio Graph └──────────┬───────┘
369
+ │ │
370
+ ▼ ▼
371
+ Listener ears (Left/Right) System Output
372
+ ```
373
+
374
+ ---
375
+
376
+ ## Video Flow (Capture ↔ Rendering)
377
+
378
+ ```
379
+ ┌──────────────┐ produceTrack ┌──────────────┐ RTP ┌──────────────┐
380
+ │ getUserMedia │ ───────────────▶ │ MediaSoup SDK│ ──────▶ │ MediaSoup SFU│
381
+ └──────┬───────┘ │ (Odyssey) │ └──────┬───────┘
382
+ │ └──────┬───────┘ │
383
+ │ consumer-created │ track │
384
+ ▼ ▼ │
385
+ ┌──────────────┐ ┌──────────────┐ │
386
+ │ Vue/React UI │ ◀─────────────── │ SDK Event Bus │ ◀──────────────┘
387
+ │ (muted video │ │ exposes media │
388
+ │ elements) │ │ tracks │
389
+ └──────────────┘ └──────────────┘
390
+ ```
391
+
392
+ **Video Track Flow:**
393
+ 1. **Capture**: `getUserMedia()` captures video from camera or screen
394
+ 2. **Produce**: `sdk.produceTrack(track, { isScreenshare: true })` sends to SFU
395
+ 3. **Route**: MediaSoup SFU routes video RTP to other participants
396
+ 4. **Consume**: SDK receives `consumer-created` event with video track
397
+ 5. **Render**: UI attaches track to muted `<video>` element (audio handled separately)
398
+
399
+ ---
400
+
401
+ ## Web Audio Algorithms
402
+
403
+ **Coordinate normalization** – Unreal sends centimeters; SpatialAudioManager auto-detects large values and converts to meters once.
404
+
405
+ **360° angle-based stereo panning** – `setListenerFromLSD()` calculates the listener's right-ear vector from their yaw (rot.y). When `updateSpatialAudio()` runs, it uses `atan2` to calculate the angle from listener to speaker, then applies `sin(angle)` for natural panning. This gives **full left/right separation** at ±90° angles. Speaker's rotation is ignored – only their position relative to listener matters.
406
+
407
+ **Dynamic distance gain** – `updateSpatialAudio()` measures distance from listener → source and applies a **CUBIC EXPONENTIAL** falloff (0.5m-15m range). Voices gradually fade from 100% (0.5m) to **complete silence at 15m+** (hard cutoff). The cubic `(1-normalized)³` formula creates clearly noticeable volume changes as you move. Distance calculated from listener's HEAD position to participant's HEAD position (body + 1.6m height). Master compressor is **DISABLED** to ensure gain changes are audible.
408
+
409
+ **Noise handling** – a TensorFlow.js GRU model (`odyssey_adaptive_denoiser`, 872K params, val_loss=0.1636) runs in a ScriptProcessorNode as the FIRST node in every participant's chain, applying a learned spectral mask before the high/low-pass filters. Audio passes through unchanged until the model finishes loading, then ML denoising becomes active automatically with no user action required.
410
+
411
+ ---
412
+
413
+ ## Spatial Audio System (CLOCKWISE Rotation)
414
+
415
+ ### Core Algorithm (Full 360° Support)
416
+
417
+ The panning calculation uses **position-based projection** onto the listener's right-ear axis:
418
+
419
+ ```typescript
420
+ // Step 1: Calculate listener's right vector from yaw (CLOCKWISE rotation)
421
+ const yawRad = (rot.y * Math.PI) / 180;
422
+ listenerRight = {
423
+ x: Math.cos(yawRad),
424
+ z: -Math.sin(yawRad) // NEGATIVE sine for CLOCKWISE rotation
425
+ };
426
+
427
+ // Step 2: Vector from listener to speaker
428
+ vecToSource = {
429
+ x: speakerPos.x - listenerPos.x,
430
+ z: speakerPos.z - listenerPos.z
431
+ };
432
+
433
+ // Step 3: Calculate forward vector (90° CW from right)
434
+ listenerForward = { x: -listenerRight.z, z: listenerRight.x };
435
+
436
+ // Step 4: Project onto both axes
437
+ dxLocal = vecToSource.x * listenerRight.x + vecToSource.z * listenerRight.z; // Right/Left
438
+ dzLocal = vecToSource.x * listenerForward.x + vecToSource.z * listenerForward.z; // Front/Back
439
+
440
+ // Step 5: Calculate angle using atan2 (gives -π to +π radians)
441
+ angleToSource = Math.atan2(dxLocal, dzLocal);
442
+
443
+ // Step 6: Convert to pan value using sine (-1 to +1)
444
+ // 90° (right) = +1.0, 270° (left) = -1.0, 0°/180° (front/back) = 0.0
445
+ rawPan = Math.sin(angleToSource);
446
+
447
+ // Step 7: Apply smoothing to prevent jitter
448
+ smoothedPan = smoothPanValue(participantId, rawPan);
449
+ ```
450
+
451
+ ### Key Principles
452
+
453
+ | Principle | Description |
454
+ |----------------------------|-------------------------------------------------------------|
455
+ | **Position-based** | Panning based on WHERE speaker is, NOT where they're looking |
456
+ | **Listener yaw matters** | Your `rot.y` determines which direction is "right" |
457
+ | **Speaker rotation ignored** | Their facing direction does NOT affect panning |
458
+ | **Full 360° support** | cos/sin trigonometry handles any angle automatically |
459
+
460
+ ### Listener Right Vector by Yaw (CLOCKWISE Rotation)
461
+
462
+ | Yaw | Facing | listenerRight (x, z) | Right Ear Faces | Left Ear Faces |
463
+ |-------|-----------|----------------------|-----------------|----------------|
464
+ | 0° | +Z (fwd) | (1.0, 0.0) | +X | -X |
465
+ | 90° | +X (right)| (0.0, -1.0) | -Z | +Z |
466
+ | 180° | -Z (back) | (-1.0, 0.0) | -X | +X |
467
+ | 270° | -X (left) | (0.0, 1.0) | +Z | -Z |
468
+
469
+ ### Pan Value to Left/Right Gain
470
+
471
+ | panValue | Left Ear | Right Ear | Angle | Description |
472
+ |----------|----------|-----------|--------------|----------------|
473
+ | -1.0 | 100% | 0% | 270° (left) | Full LEFT |
474
+ | -0.71 | 85% | 15% | 315°/225° | Diagonal LEFT |
475
+ | 0.0 | 50% | 50% | 0°/180° | CENTER |
476
+ | +0.71 | 15% | 85% | 45°/135° | Diagonal RIGHT |
477
+ | +1.0 | 0% | 100% | 90° (right) | Full RIGHT |
478
+
479
+ ---
480
+
481
+ ## Anti-Jitter Smoothing (3 Layers)
482
+
483
+ ### Layer 1: Gain Change Threshold Filter (2.5%)
484
+ ```typescript
485
+ const GAIN_CHANGE_THRESHOLD = 0.025; // 2.5%
486
+ if (Math.abs(newGain - currentGain) / 100 < GAIN_CHANGE_THRESHOLD) {
487
+ return currentGain; // Ignore micro-jitter (movements ≤40cm)
488
+ }
489
+ ```
490
+
491
+ ### Layer 2: Adaptive EMA for Pan
492
+ ```typescript
493
+ // Normal: 70% smoothing for stability
494
+ smoothedPan = previousPan * 0.7 + newPan * 0.3;
495
+
496
+ // Near center: 50% smoothing for moderate response
497
+ if (bothNearCenter) {
498
+ smoothedPan = previousPan * 0.5 + newPan * 0.5;
499
+ }
500
+
501
+ // Full flip (likely jitter): 85% HEAVY smoothing
502
+ if (signFlipped && panChange > 1.0) {
503
+ smoothedPan = previousPan * 0.85 + newPan * 0.15;
504
+ }
505
+ ```
506
+
507
+ ### Layer 3: Audio API Ramp Time
508
+ ```typescript
509
+ stereoPanner.pan.setTargetAtTime(panValue, currentTime, 0.08); // 80ms pan
510
+ gainNode.gain.setTargetAtTime(gainValue, currentTime, 0.05); // 50ms gain
511
+ ```
512
+
513
+ ---
514
+
515
+ ## Distance-Based Gain: CUBIC EXPONENTIAL Falloff (HARD CUTOFF at 15m)
516
+
517
+ | Distance | Gain | Description |
518
+ |:---------------|:------:|:---------------------------------|
519
+ | `0.0 - 0.5m` | 100% | Full volume (intimate) |
520
+ | `1.0m` | ~90% | Very close - still loud |
521
+ | `2.0m` | ~72% | Normal talking - NOTICEABLE |
522
+ | `3.0m` | ~57% | Across table - CLEARLY QUIETER |
523
+ | `5.0m` | ~33% | Across room - MUCH QUIETER |
524
+ | `7.0m` | ~17% | Far end of room - very faint |
525
+ | `10.0m` | ~4% | Barely audible |
526
+ | `≥15.0m` | **0%** | **Silent (HARD CUTOFF)** |
527
+
528
+ **CUBIC EXPONENTIAL falloff formula:**
529
+ ```typescript
530
+ private calculateLogarithmicGain(distance: number): number {
531
+ const minDistance = 0.5; // Full volume at 0.5m or closer
532
+ const maxDistance = 15.0; // Silent at 15m or farther - HARD CUTOFF
533
+
534
+ if (distance <= minDistance) return 100; // Full volume
535
+ if (distance >= maxDistance) return 0; // Silent - HARD CUTOFF
536
+
537
+ // CUBIC: (1 - normalized)³ for NOTICEABLE volume changes
538
+ const range = maxDistance - minDistance; // 14.5m
539
+ const normalizedDistance = (distance - minDistance) / range;
540
+ const remainingRatio = 1 - normalizedDistance;
541
+
542
+ return 100 * remainingRatio * remainingRatio * remainingRatio;
543
+ }
544
+ ```
545
+
546
+ **Why Cubic (not Linear or Quadratic)?**
547
+ - **Linear**: Too gradual - hard to notice volume changes
548
+ - **Quadratic**: Not steep enough for 15m range
549
+ - **Cubic**: Perfect balance - clearly noticeable with proper 15m silence
550
+
551
+ **Smoothing:** Web Audio's `setTargetAtTime()` handles all smoothing:
552
+ ```typescript
553
+ // Time constant 0.05 = ~150ms smooth transition (no clicks)
554
+ gainNode.gain.setTargetAtTime(gainValue, currentTime, 0.05);
555
+ ```
556
+
557
+ **Note:** Master compressor is **DISABLED** to ensure gain changes are clearly audible.
558
+
559
+ ---
560
+
561
+ ## Audio Stability System
562
+
563
+ ### Layer 1: Gain Change Threshold Filter (2.5%)
564
+ ```typescript
565
+ const GAIN_CHANGE_THRESHOLD = 0.025; // 2.5%
566
+ if (Math.abs(newGain - currentGain) / 100 < GAIN_CHANGE_THRESHOLD) {
567
+ return currentGain; // Ignore micro-jitter (movements ≤40cm)
568
+ }
569
+ ```
570
+
571
+ ### Layer 2: SDK Position Snapping
572
+ ```
573
+ positionSnapThreshold = 40cm
574
+ If movement < 40cm → use cached position (ignores pixel streaming jitter)
575
+ ```
576
+
577
+ ### Layer 3: Web Audio Smoothing
578
+ ```typescript
579
+ // Gain changes are smoothed by Web Audio API directly
580
+ // 50ms time constant for smooth transitions
581
+ gainNode.gain.setTargetAtTime(gainValue, currentTime, 0.05); // 150ms smooth
582
+ stereoPanner.pan.setTargetAtTime(panValue, currentTime, 0.08); // 240ms smooth
583
+ ```
584
+
585
+ **Why simplified?** Previous rate-limiting was causing gain to get stuck at low values. Web Audio's built-in smoothing is sufficient and more reliable.
586
+
587
+ ---
588
+
589
+ ## Enterprise-Grade Gain Smoothing
590
+
591
+ **Problem Solved:** With 40+ people in a room, rapid position updates (60+ Hz) caused **instantaneous gain changes** that created audible clicks, pops, and "pit pit" crackling noise. This was caused by `setValueAtTime()` creating waveform discontinuities.
592
+ ```
593
+ Throttling = "Wait a bit before doing the same thing again" to prevent overload! 🎯
594
+ Person walking 10 meters:
595
+
596
+ Without Throttling:
597
+ ||||||||||||||||||||||||||||||||||||| 600 updates
598
+ ↑ Every single tiny movement = update
599
+ Result: Clicks, pops, CPU overload 😖
600
+
601
+ With Throttling (16ms):
602
+ | | | | | | | | | | 60 updates
603
+ ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑
604
+ 16ms gaps between updates
605
+ Result: Smooth, efficient, perfect 😎
606
+ ```
607
+ ### The Solution: Intelligent Throttling + Adaptive Ramping
608
+
609
+ ```typescript
610
+ // OLD (Causes Clicks):
611
+ nodes.gain.gain.setValueAtTime(gainValue, currentTime); // ❌ Instant jump
612
+
613
+ // NEW (Butter Smooth):
614
+ nodes.gain.gain.cancelScheduledValues(currentTime);
615
+ nodes.gain.gain.setValueAtTime(lastGain, currentTime);
616
+ nodes.gain.gain.linearRampToValueAtTime(gainValue, currentTime + rampTime); // ✅ Smooth transition
617
+ ```
618
+
619
+ ### Performance Characteristics
620
+
621
+ | Participant Count | Position Updates/sec | Throttled Updates/sec | CPU Impact | Audio Quality |
622
+ |:-----------------:|:--------------------:|:---------------------:|:----------:|:-------------:|
623
+ | **2-5** | ~300 | ~60 | Low | Perfect ✅ |
624
+ | **10** | ~600 | ~120 | Low | Perfect ✅ |
625
+ | **20** | ~1,200 | ~240 | Medium | Perfect ✅ |
626
+ | **40** | ~2,400 | ~480 | Medium | Perfect ✅ |
627
+ | **100** | ~6,000 | ~600 | Medium | Perfect ✅ |
628
+
629
+ ### Intelligent Throttling Logic
630
+
631
+ ```typescript
632
+ // Throttle: Skip update if too recent AND gain change is small
633
+ const isSignificantChange = gainDelta > 0.1; // >10% change
634
+ if (timeSinceLastUpdate < 16ms && !isSignificantChange) {
635
+ return; // Skip this update, wait for next frame
636
+ }
637
+ ```
638
+
639
+ **Key Features:**
640
+ - ✅ **Time-based throttling**: Maximum 60Hz per participant (16ms interval)
641
+ - ✅ **Significance bypass**: Large changes (>10%) bypass throttle immediately
642
+ - ✅ **Per-participant tracking**: Each person has independent throttle state
643
+ - ✅ **Standing participants**: Minimal updates when not moving (saves CPU)
644
+
645
+ ### Adaptive Ramp Time
646
+
647
+ The system automatically adjusts ramp time based on gain change magnitude:
648
+
649
+ | Gain Change | Ramp Time | User Experience |
650
+ |:-----------:|:---------:|:----------------|
651
+ | **< 5%** | 15ms | Instant feel, imperceptible smoothing |
652
+ | **5-20%** | 15-35ms | Smooth transition, natural |
653
+ | **20-30%** | 35-45ms | Very smooth, no artifacts |
654
+ | **> 30%** | 50ms | Ultra smooth, prevents any clicking |
655
+
656
+ **Formula:**
657
+ ```typescript
658
+ rampTime = Math.min(
659
+ 0.015 + (gainDelta * 0.1), // Base 15ms + scaled by change
660
+ 0.050 // Max 50ms cap
661
+ );
662
+ ```
663
+
664
+ ### Real-World Scenarios
665
+
666
+ | Scenario | Behavior | Result |
667
+ |----------|----------|--------|
668
+ | **Person walking nearby** | Small gain changes → 15-25ms ramps | Feels instant, zero clicks |
669
+ | **Person runs past you** | Large gain changes → 40-50ms ramps | Smooth volume sweep |
670
+ | **40 people, 20 moving** | ~1200 updates → throttled to ~240 | Perfect audio, low CPU |
671
+ | **Person stands still** | Updates skipped entirely | Zero CPU usage |
672
+ | **Person teleports close** | >10% change bypasses throttle | Immediate volume update |
673
+
674
+ ### Error Handling & Fallback
675
+
676
+ ```typescript
677
+ try {
678
+ // Smooth ramping
679
+ nodes.gain.gain.linearRampToValueAtTime(gainValue, currentTime + rampTime);
680
+ } catch (err) {
681
+ // Fallback: Direct value setting (rare edge case)
682
+ console.warn(`Gain scheduling failed, using instant set:`, err);
683
+ nodes.gain.gain.value = gainValue;
684
+ }
685
+ ```
686
+
687
+ ### Why This Works
688
+
689
+ **Root Cause:** Instantaneous gain changes create **waveform discontinuities**:
690
+ ```
691
+ Old Method: New Method:
692
+ Volume Volume
693
+ ↑ ↑
694
+ │ ╱╲ ╱╲ │ ╱╲ ╱╲
695
+ │ ╱ ╲ ╱ ╲ │ ╱ ╲ ╱ ╲
696
+ │ ╱ ╲╱ ╲ │ ╱ ╲╱ ╲
697
+ │ ╱ ╲ │ ╱ ╲
698
+ │ ╱ ╲ │ ╱ ╲
699
+ ──┼────────────────────→ Time ──┼────────────────────→ Time
700
+ 0 ← JUMP! Click here! 0 ← Smooth ramp here!
701
+ ```
702
+
703
+ **Technical Details:**
704
+ - Rapid gain jumps = discontinuous waveform = audible click
705
+ - With 60Hz position updates × 40 people = 2400 potential clicks/sec
706
+ - Linear ramping = continuous waveform = zero artifacts
707
+ - Throttling reduces update frequency by ~60% (saves CPU + audio thread)
708
+
709
+ ### Network Resilience
710
+
711
+ **Server-Side:**
712
+ ```typescript
713
+ // Opus codec with Forward Error Correction
714
+ useinbandfec: 1 // Automatically recovers lost packets
715
+ ptime: 20 // 20ms frames for low latency
716
+ ```
717
+
718
+ **Why Non-Spatial Audio Worked Fine:**
719
+ - Non-spatial audio: Single static gain value, rarely changes
720
+ - Spatial audio: Per-frame position updates = rapid gain changes
721
+ - **The issue wasn't network** - it was rapid gain value changes in Web Audio API
722
+
723
+ ---
724
+
725
+ ## 🎛️ Audio Processing Settings
726
+
727
+ > **Design Goal:** Crystal clear voice with no echo, pumping, or bathroom effect.
728
+
729
+ ### 🔊 Master Compressor
730
+
731
+ | Setting | Value | Purpose |
732
+ |:--------------|:---------:|:-------------------------------------|
733
+ | Threshold | `-18 dB` | Only compress loud peaks |
734
+ | Knee | `40 dB` | Soft knee for natural sound |
735
+ | Ratio | `3:1` | Gentle compression, no pumping |
736
+ | Attack | `10 ms` | Fast enough to catch peaks |
737
+ | Release | `150 ms` | Fast release prevents echo tail |
738
+ | Master Gain | `1.0` | Unity gain for clean signal |
739
+
740
+ ### 🎚️ Filter Chain
741
+
742
+ | Filter | Frequency | Q Value | Purpose |
743
+ |:----------------|:-----------:|:-------:|:--------------------------------|
744
+ | Highpass | `100 Hz` | `0.5` | Remove room boom/rumble |
745
+ | Lowpass | `10 kHz` | `0.5` | Open sound, no ringing |
746
+ | Voice Boost | `180 Hz` | `0.5` | ❌ **Disabled** (prevents echo) |
747
+ | Dynamic Lowpass | `12 kHz` | `0.5` | Natural treble preservation |
748
+
749
+ ### 🛡️ Per-Participant Limiter
750
+
751
+ | Setting | Value | Purpose |
752
+ |:-----------|:---------:|:--------------------------------------|
753
+ | Threshold | `-6 dB` | Only activate near clipping |
754
+ | Knee | `3 dB` | Hard knee = true limiter |
755
+ | Ratio | `20:1` | High ratio catches peaks cleanly |
756
+ | Attack | `1 ms` | Ultra-fast peak catching |
757
+ | Release | `50 ms` | Fast release = no pumping |
758
+
759
+ ### 🎤 Denoiser (ML — GRU ScriptProcessorNode)
760
+
761
+ | Parameter | Value | Purpose |
762
+ |:------------------|:-------------------------:|:--------------------------------------------------|
763
+ | Model | `odyssey_adaptive_denoiser` | 3-layer GRU, UINT8 quantized TF.js |
764
+ | Params | 872,448 | Trained 100 epochs, val_loss=0.1636 |
765
+ | Buffer size | 4096 samples (~85ms) | ScriptProcessorNode synchronous processing |
766
+ | Backend | WebGL (GPU) | Reported at load: `[MLNoiseSuppressor] TF.js backend ready: webgl` |
767
+ | Pass-through | Yes (while loading) | Audio unaffected until model is ready |
768
+ | Normalization | mean=0.3953, std=0.1442 | Stats loaded from `normalization_stats.json` |
769
+
770
+ ---
771
+
772
+ ## 🔗 Audio Chain
773
+
774
+ ```
775
+ ┌──────────────────────────────────────────────────────────────────────────────────────────┐
776
+ │ AUDIO PROCESSING CHAIN │
777
+ ├──────────────────────────────────────────────────────────────────────────────────────────┤
778
+ │ │
779
+ │ MediaStream ML Denoiser Per-Participant Spatial Master │
780
+ │ Source → (GRU model) → Limiter → Filters → Panner → Compressor │
781
+ │ │ │ │ │ │ │ │
782
+ │ ▼ ▼ ▼ ▼ ▼ ▼ │
783
+ │ [WebRTC] [ScriptProcessor [Peak Catch] [HP 100Hz] [Stereo [3:1 Ratio] │
784
+ │ Track 872K GRU model] [-6dB] LP 10kHz] L/R Pan] Output │
785
+ │ (pass-through │
786
+ │ while loading) │
787
+ └──────────────────────────────────────────────────────────────────────────────────────────┘
788
+ ```
789
+
790
+ **Detailed Chain:**
791
+ ```
792
+ Source → MLScriptProcessor (GRU denoiser) → Limiter → HighPass(100Hz) → VoiceBand → LowPass(10kHz) →
793
+ DynamicLP(12kHz) → MonoDownmix → StereoUpmix → StereoPanner → Gain → MasterCompressor → Output
794
+ ```
795
+
796
+ ---
797
+
798
+ ## Spatial Audio Flowchart
799
+
800
+ ```
801
+ ┌─────────────────────────────────────────────────────────────────────────────┐
802
+ │ SPATIAL AUDIO PIPELINE │
803
+ └─────────────────────────────────────────────────────────────────────────────┘
804
+
805
+ ┌──────────────────┐ ┌──────────────────┐
806
+ │ LISTENER DATA │ │ SPEAKER DATA │
807
+ │ pos, rot (yaw) │ │ pos (x, y, z) │
808
+ └────────┬─────────┘ └────────┬─────────┘
809
+ │ │
810
+ ▼ │
811
+ ┌──────────────────┐ │
812
+ │ Calculate Right │ │
813
+ │ Vector │ │
814
+ │ cos(yaw), -sin() │ │
815
+ └────────┬─────────┘ │
816
+ │ │
817
+ ▼ ▼
818
+ ┌─────────────────────────────────────────────┐
819
+ │ VECTOR TO SPEAKER │
820
+ │ vecToSource = speakerPos - listenerPos │
821
+ └──────────────────────┬──────────────────────┘
822
+
823
+
824
+ ┌─────────────────────────────────────────────┐
825
+ │ DOT PRODUCT (Projection) │
826
+ │ dxLocal = vecToSource · listenerRight │
827
+ │ (positive = RIGHT, negative = LEFT) │
828
+ └──────────────────────┬──────────────────────┘
829
+
830
+ ┌────────────┴────────────┐
831
+ ▼ ▼
832
+ ┌──────────────────┐ ┌──────────────────┐
833
+ │ NORMALIZE PAN │ │ CALCULATE DIST │
834
+ │ sin(atan2(dx,dz))│ │ dist = |vecTo| │
835
+ │ range: -1 to +1 │ │ 0.5m → 15m range │
836
+ └────────┬─────────┘ └────────┬─────────┘
837
+ │ │
838
+ ▼ ▼
839
+ ┌──────────────────┐ ┌──────────────────┐
840
+ │ ANTI-JITTER │ │ EXPONENTIAL GAIN │
841
+ │ - Threshold 2.5% │ │ gain = (1-norm)² │
842
+ │ - EMA 70% │ │ × 100% │
843
+ │ - Ramp 80ms │ │ 0% at 15m │
844
+ └────────┬─────────┘ └────────┬─────────┘
845
+ │ │
846
+ ▼ ▼
847
+ ┌──────────────────┐ ┌──────────────────┐
848
+ │ StereoPanner │ │ GainNode │
849
+ │ L/R balance │ │ volume │
850
+ └────────┬─────────┘ └────────┬─────────┘
851
+ │ │
852
+ └──────────┬──────────────┘
853
+
854
+ ┌──────────────────┐
855
+ │ AUDIO OUTPUT │
856
+ │ (headphones) │
857
+ └──────────────────┘
858
+ ```
859
+
860
+ ---
861
+
862
+ ## 360° Spatial Audio Diagram (Top View)
863
+
864
+ ```
865
+ 0° (Front)
866
+ L100% R100%
867
+
868
+
869
+ 315° │ 45°
870
+ L100% R58% │ L58% R100%
871
+ ↖ │ ↗
872
+ ↖ │ ↗
873
+ ↖ │ ↗
874
+ ↖ │ ↗
875
+ 270° ←─────────────── 🎧 ───────────────→ 90°
876
+ (Left) Listener (Right)
877
+ L100% R40% yaw=0° L40% R100%
878
+ ↙ │ ↘
879
+ ↙ │ ↘
880
+ ↙ │ ↘
881
+ ↙ │ ↘
882
+ L100% R58% │ L58% R100%
883
+ 225° │ 135°
884
+
885
+
886
+ L100% R100%
887
+ 180° (Behind)
888
+
889
+ Legend:
890
+ - 🎧 = Listener at origin, facing 0° (forward)
891
+ - Angles = Speaker position around listener
892
+ - L/R % = Left/Right ear volume for speaker at that position
893
+ ```
894
+
895
+ ---
896
+
897
+ ## Configuration
898
+
899
+ | Parameter | Value | Description |
900
+ |------------------------|--------|-----------------------------|
901
+ | `positionPanRadius` | 5.0m | Distance for full L/R pan |
902
+ | `nearDistance` | 0.5m | Full gain threshold |
903
+ | `farDistance` | 10.0m | Silence threshold |
904
+ | `panSmoothingFactor` | 0.5 | Normal smoothing |
905
+ | `panChangeThreshold` | 0.02 | Jitter ignore threshold |
906
+ | `panRampTime` | 0.15s | Audio transition time |
907
+ | `headHeight` | 1.6m | Added to body Y |
908
+
909
+ ---
910
+
911
+ ## Console Logs Reference
912
+
913
+ ```javascript
914
+ // Mediasoup server URL being used
915
+ [Odyssey] Connecting to MediaSoup server: https://...
916
+
917
+ // ML model loading
918
+ [MLNoiseSuppressor] Initializing TF.js backend: webgl
919
+ [MLNoiseSuppressor] Model loaded — 872,448 params | backend: webgl
920
+ [Odyssey] ML Noise Suppression loaded and active
921
+
922
+ // ML model failure (audio still works — pass-through mode)
923
+ [Odyssey] ML Noise Suppression failed to load: <error>
924
+
925
+ // ML active per participant
926
+ [SpatialAudioChannel] ML noise suppression ACTIVE — model loaded from <url>
927
+
928
+ // Listener position update
929
+ 📍 [SDK Listener] pos=(x, y, z) rot=(pitch, yaw, roll)
930
+
931
+ // Speaker position received
932
+ 🎧 [SDK Rx] <id> bodyPos=(x, y, z) rot=(pitch, yaw, roll)
933
+
934
+ // Spatial audio calculation
935
+ 🎧 SPATIAL AUDIO [<id>] dist=Xm dxLocal=Xm rawPan=X smoothPan=X pan(L=X%,R=X%) gain=X% listenerRight=(x,z) vecToSrc=(x,z)
936
+ ```
937
+
938
+ ---
939
+
940
+ ## Server Contract (Socket.IO Events)
941
+
942
+ | Event | Direction | Payload |
943
+ |----------------------------------|------------------|-----------------------------------------------------------------------------|
944
+ | `join-room` | client → server | `{roomId, userId, deviceId, position, direction}` |
945
+ | `room-joined` | server → client | `RoomJoinedData` (router caps, participants snapshot) |
946
+ | `update-position` | client → server | `{participantId, conferenceId, position, direction, rot, cameraDistance}` |
947
+ | `participant-position-updated` | server → client | `{participantId, position, direction, rot, mediaState, pan}` |
948
+ | `consumer-created` | server → client | `{participantId, track(kind), position, direction, appData}` |
949
+ | `participant-media-state-updated`| server → client | `{participantId, mediaState}` |
950
+ | `all-participants-update` | server → client | `{roomId, participants[]}` |
951
+ | `new-participant` | server → client | `{participantId, userId, position, direction}` |
952
+ | `participant-left` | server → client | `{participantId}` |
953
+
954
+ ### Position Data Types (Critical for Spatial Audio)
955
+
956
+ The SDK sends **three separate data types** to the server for accurate spatial audio:
957
+
958
+ | Data Type | Structure | Description |
959
+ |--------------|----------------------------------|---------------------------------------------------------------|
960
+ | `position` | `{x, y, z}` in meters | World coordinates - WHERE the player is located |
961
+ | `direction` | `{x, y, z}` normalized vector | Forward direction - which way the player is LOOKING (unit vector) |
962
+ | `rot` | `{x, y, z}` in degrees | Euler rotation angles - pitch(x), **yaw(y)**, roll(z) |
963
+
964
+ **IMPORTANT**: `rot.y` (yaw) is **critical** for spatial audio left/right ear calculation:
965
+ - The listener's yaw determines their ear orientation
966
+ - `listenerRight = { x: cos(yaw), z: -sin(yaw) }`
967
+ - Speakers are panned based on their position projected onto listener's right axis
968
+
969
+ ```typescript
970
+ // Frontend sends all 3 data types:
971
+ sdk.updatePosition(position, direction, {
972
+ rot, // Rotation angles (pitch, yaw, roll) in degrees
973
+ cameraDistance,
974
+ screenPos,
975
+ });
976
+
977
+ // Server broadcasts to other clients:
978
+ socket.emit("participant-position-updated", {
979
+ position, // World coordinates
980
+ direction, // Forward vector
981
+ rot, // Rotation angles - yaw used for L/R audio
982
+ ...
983
+ });
984
+ ```
985
+
986
+ ---
987
+
988
+ ## Noise-Cancellation Stack (What's Included)
989
+
990
+ | Layer | Purpose |
991
+ |-------------------------------|--------------------------------------------------------------------------------------------------------------------------------|
992
+ | **Adaptive denoiser worklet** | Learns each participant's noise floor in real time, applies multi-band downward expander plus dynamic low/high-pass shaping |
993
+ | **speechBoost** | Lifts the low/mid band only when speech confidence is high, keeping consonants bright without reintroducing floor noise |
994
+ | **highBandGate** | Clamps constant fan hiss in the 4–12 kHz band whenever speechPresence is low |
995
+ | **Silence gate** | If energy stays below `silenceFloor` for configurable hold window, track ramps to true silence, wakes instantly on voice return|
996
+ | **Classic filters** | Fixed high-pass (80Hz) / low-pass (8kHz) shave off rumble and hiss before signals reach the panner |
997
+
998
+ **Configuration example:**
999
+ ```typescript
1000
+ const sdk = SpatialCommsSDK.create(serverUrl, {
1001
+ denoiser: {
1002
+ threshold: 0.008,
1003
+ maxReduction: 0.88,
1004
+ hissCut: 0.52,
1005
+ holdMs: 260,
1006
+ voiceBoost: 0.65,
1007
+ voiceSensitivity: 0.33,
1008
+ voiceEnhancement: true,
1009
+ silenceFloor: 0.00075,
1010
+ silenceHoldMs: 520,
1011
+ silenceReleaseMs: 160,
1012
+ speechBoost: 0.35,
1013
+ highBandGate: 0.7,
1014
+ highBandAttack: 0.25,
1015
+ highBandRelease: 0.12,
1016
+ },
1017
+ });
1018
+ ```
1019
+
1020
+ ---
1021
+
1022
+ ## How Spatial Audio Is Built
1023
+
1024
+ | Step | Description |
1025
+ |-------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
1026
+ | **1. Telemetry ingestion** | Each LSD packet is passed through `setListenerFromLSD(listenerPos, cameraPos, lookAtPos, rot)` so the Web Audio listener matches the player's real head/camera pose |
1027
+ | **2. Per-participant graph** | When `consumer-created` yields a remote audio track, `setupSpatialAudioForParticipant()` spins up: `Source → Compressor → Denoiser → HP → LP → StereoPanner → Gain` |
1028
+ | **3. Position updates** | Every `participant-position-updated` event calls `updateSpatialAudio(participantId, position, rot)`. Position feeds panning, rot provides listener's yaw |
1029
+ | **4. Distance-aware gain** | The manager computes Euclidean distance to each remote participant and applies inverse distance law with exponential falloff (0.5m–15m range) for more perceptible volume changes |
1030
+ | **5. Anti-jitter smoothing** | 3-layer system: threshold filter (0.02), EMA smoothing (0.5), SNAP behavior (0.2 for direction changes) |
1031
+ | **6. Left/right rendering** | StereoPannerNode outputs processed signal with accurate L/R separation based on position projection |
1032
+
1033
+ ---
1034
+
1035
+ ## Integration Checklist
1036
+
1037
+ - [ ] **Instantiate once per page/tab** and keep it in a store (Vuex, Redux, Zustand, etc.)
1038
+ - [ ] **Pipe LSD/Lap data** from your rendering engine into `updatePosition()` + `setListenerFromLSD()` at ~10 Hz
1039
+ - [ ] **Render videos muted** – never attach remote audio tracks straight to DOM; let `SpatialAudioManager` own playback
1040
+ - [ ] **Resume audio context** – call `sdk.resumeAudio()` on first user interaction (required by browsers)
1041
+ - [ ] **Handle consumer-created** – attach video tracks to UI, audio is handled automatically by spatial audio
1042
+ - [ ] **Monitor logs** – browser console shows `🎧 SDK`, `📍 SDK`, and `🎚️ [Spatial Audio]` statements for every critical hop
1043
+ - [ ] **Push avatar telemetry** back to Unreal so `remoteSpatialData` can render minimaps/circles
1044
+
1045
+ ---
1046
+
1047
+ ## Core Modules
1048
+
1049
+ | File | Purpose |
1050
+ |-----------------------------|--------------------------------------------------------------------------------------|
1051
+ | `src/index.ts` | `SpatialCommsSDK.create()` – socket lifecycle, producers/consumers, event surface |
1052
+ | `src/core/MediasoupManager.ts` | Transport helpers for produce/consume/resume |
1053
+ | `src/channels/spatial/SpatialAudioChannel.ts` | Web Audio orchestration (listener transforms, per-participant chains, ML denoiser node) |
1054
+ | `src/audio/MLNoiseSuppressor.ts` | TensorFlow.js GRU denoiser — `odyssey_adaptive_denoiser` model, 872K params, val_loss=0.1636 |
1055
+ | `src/core/EventManager.ts` | Lightweight EventEmitter used by the entire SDK |
1056
+ | `src/types/index.ts` | TypeScript interfaces for Position, Direction, Participant, MediaState, etc. |
1057
+
1058
+ ---
1059
+
1060
+ ## Development Tips
1061
+
1062
+ - Run `npm install && npm run build` inside `odyssey-mediasoup-sdk` to publish a fresh build
1063
+ - Use `npm run dev` while iterating so TypeScript outputs live under `dist/`
1064
+ - The SDK targets evergreen browsers; Safari <16.4 needs WebGL support for TF.js (all modern Safari versions have this)
1065
+ - Have questions or want to extend the SDK? Start with `SpatialAudioManager` – that's where most of the "real-world" behavior (distance feel, stereo cues, denoiser) lives
1066
+ - **ML Noise Suppression**: Initialized automatically at SDK startup using the `odyssey_adaptive_denoiser` model from `public/odyssey_adaptive_denoiser/model.json`. No manual call needed. Watch browser console for `[Odyssey] ML Noise Suppression loaded and active`
1067
+
1068
+ ---
1069
+
1070
+ ## Development
1071
+
1072
+ ```bash
1073
+ # Install dependencies
1074
+ npm install
1075
+
1076
+ # Build
1077
+ npm run build
1078
+
1079
+ # Watch mode
1080
+ npm run dev
1081
+
1082
+ # Type check only
1083
+ npm run typecheck
1084
+ ```
1085
+
1086
+ ---
1087
+
1088
+ ## Related Documentation
1089
+
1090
+ - [HEAD_POSITION_DATA_FLOW.md](HEAD_POSITION_DATA_FLOW.md) – Detailed panning algorithm with 360° tables
1091
+ - [SPATIAL_AUDIO_IMPLEMENTATION.md](SPATIAL_AUDIO_IMPLEMENTATION.md) – Implementation summary with examples
1092
+ - [audio-position-wisecalulation.md](audio-position-wisecalulation.md) – Position-wise calculation reference