npm - @stream-io/video-client - Versions diffs - 1.15.6 → 1.15.7 - Mend

@stream-io/video-client 1.15.6 → 1.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +7 -0
package/dist/index.browser.es.js +93 -41
package/dist/index.browser.es.js.map +1 -1
package/dist/index.cjs.js +93 -41
package/dist/index.cjs.js.map +1 -1
package/dist/index.es.js +93 -41
package/dist/index.es.js.map +1 -1
package/dist/src/helpers/RNSpeechDetector.d.ts +3 -4
package/package.json +1 -1
package/src/devices/MicrophoneManager.ts +4 -12
package/src/devices/__tests__/MicrophoneManagerRN.test.ts +6 -4
package/src/helpers/RNSpeechDetector.ts +104 -40

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,13 @@
 This file was generated using [@jscutlery/semver](https://github.com/jscutlery/semver).
+## [1.15.7](https://github.com/GetStream/stream-video-js/compare/@stream-io/video-client-1.15.6...@stream-io/video-client-1.15.7) (2025-01-29)
+### Bug Fixes
+* speech detection and align mic disable with web ([#1658](https://github.com/GetStream/stream-video-js/issues/1658)) ([fd908fb](https://github.com/GetStream/stream-video-js/commit/fd908fb2b70e6bade595f44107ca2f85aa4d5631))
 ## [1.15.6](https://github.com/GetStream/stream-video-js/compare/@stream-io/video-client-1.15.5...@stream-io/video-client-1.15.6) (2025-01-29)

package/dist/index.browser.es.js CHANGED Viewed

@@ -7356,7 +7356,7 @@ const aggregate = (stats) => {
     return report;
 };
-const version = "1.15.6";
+const version = "1.15.7";
 const [major, minor, patch] = version.split('.');
 let sdkInfo = {
     type: SdkType.PLAIN_JAVASCRIPT,
@@ -9337,7 +9337,7 @@ class MicrophoneManagerState extends InputMediaDeviceManagerState {
 }
 const DETECTION_FREQUENCY_IN_MS = 500;
-const AUDIO_LEVEL_THRESHOLD$1 = 150;
+const AUDIO_LEVEL_THRESHOLD = 150;
 const FFT_SIZE = 128;
 /**
  * Creates a new sound detector.
@@ -9348,7 +9348,7 @@ const FFT_SIZE = 128;
  * @returns a clean-up function which once invoked stops the sound detector.
  */
 const createSoundDetector = (audioStream, onSoundDetectedStateChanged, options = {}) => {
-    const { detectionFrequencyInMs = DETECTION_FREQUENCY_IN_MS, audioLevelThreshold = AUDIO_LEVEL_THRESHOLD$1, fftSize = FFT_SIZE, destroyStreamOnStop = true, } = options;
+    const { detectionFrequencyInMs = DETECTION_FREQUENCY_IN_MS, audioLevelThreshold = AUDIO_LEVEL_THRESHOLD, fftSize = FFT_SIZE, destroyStreamOnStop = true, } = options;
     const audioContext = new AudioContext();
     const analyser = audioContext.createAnalyser();
     analyser.fftSize = fftSize;
@@ -9389,7 +9389,6 @@ const createSoundDetector = (audioStream, onSoundDetectedStateChanged, options =
     };
 };
-const AUDIO_LEVEL_THRESHOLD = 0.2;
 class RNSpeechDetector {
     constructor() {
         this.pc1 = new RTCPeerConnection({});
@@ -9398,7 +9397,7 @@ class RNSpeechDetector {
     /**
      * Starts the speech detection.
      */
-    async start() {
+    async start(onSoundDetectedStateChanged) {
         try {
             this.cleanupAudioStream();
             const audioStream = await navigator.mediaDevices.getUserMedia({
@@ -9411,6 +9410,14 @@ class RNSpeechDetector {
             this.pc2.addEventListener('icecandidate', async (e) => {
                 await this.pc1.addIceCandidate(e.candidate);
             });
+            this.pc2.addEventListener('track', (e) => {
+                e.streams[0].getTracks().forEach((track) => {
+                    // In RN, the remote track is automatically added to the audio output device
+                    // so we need to mute it to avoid hearing the audio back
+                    // @ts-ignore _setVolume is a private method in react-native-webrtc
+                    track._setVolume(0);
+                });
+            });
             audioStream
                 .getTracks()
                 .forEach((track) => this.pc1.addTrack(track, audioStream));
@@ -9420,12 +9427,16 @@ class RNSpeechDetector {
             const answer = await this.pc2.createAnswer();
             await this.pc1.setRemoteDescription(answer);
             await this.pc2.setLocalDescription(answer);
-            const audioTracks = audioStream.getAudioTracks();
-            // We need to mute the audio track for this temporary stream, or else you will hear yourself twice while in the call.
-            audioTracks.forEach((track) => (track.enabled = false));
+            const unsub = this.onSpeakingDetectedStateChange(onSoundDetectedStateChanged);
+            return () => {
+                unsub();
+                this.stop();
+            };
         }
         catch (error) {
-            console.error('Error connecting and negotiating between PeerConnections:', error);
+            const logger = getLogger(['RNSpeechDetector']);
+            logger('error', 'error handling permissions: ', error);
+            return () => { };
         }
     }
     /**
@@ -9435,40 +9446,85 @@ class RNSpeechDetector {
         this.pc1.close();
         this.pc2.close();
         this.cleanupAudioStream();
-        if (this.intervalId) {
-            clearInterval(this.intervalId);
-        }
     }
     /**
      * Public method that detects the audio levels and returns the status.
      */
     onSpeakingDetectedStateChange(onSoundDetectedStateChanged) {
-        this.intervalId = setInterval(async () => {
-            const stats = (await this.pc1.getStats());
-            const report = flatten(stats);
-            // Audio levels are present inside stats of type `media-source` and of kind `audio`
-            const audioMediaSourceStats = report.find((stat) => stat.type === 'media-source' &&
-                stat.kind === 'audio');
-            if (audioMediaSourceStats) {
-                const { audioLevel } = audioMediaSourceStats;
-                if (audioLevel) {
-                    if (audioLevel >= AUDIO_LEVEL_THRESHOLD) {
-                        onSoundDetectedStateChanged({
-                            isSoundDetected: true,
-                            audioLevel,
-                        });
-                    }
-                    else {
-                        onSoundDetectedStateChanged({
-                            isSoundDetected: false,
-                            audioLevel: 0,
-                        });
+        const initialBaselineNoiseLevel = 0.13;
+        let baselineNoiseLevel = initialBaselineNoiseLevel;
+        let speechDetected = false;
+        let intervalId;
+        let speechTimer;
+        let silenceTimer;
+        let audioLevelHistory = []; // Store recent audio levels for smoother detection
+        const historyLength = 10;
+        const silenceThreshold = 1.1;
+        const resetThreshold = 0.9;
+        const speechTimeout = 500; // Speech is set to true after 500ms of audio detection
+        const silenceTimeout = 5000; // Reset baseline after 5 seconds of silence
+        const checkAudioLevel = async () => {
+            try {
+                const stats = (await this.pc1.getStats());
+                const report = flatten(stats);
+                // Audio levels are present inside stats of type `media-source` and of kind `audio`
+                const audioMediaSourceStats = report.find((stat) => stat.type === 'media-source' &&
+                    stat.kind === 'audio');
+                if (audioMediaSourceStats) {
+                    const { audioLevel } = audioMediaSourceStats;
+                    if (audioLevel) {
+                        // Update audio level history (with max historyLength sized array)
+                        audioLevelHistory.push(audioLevel);
+                        if (audioLevelHistory.length > historyLength) {
+                            audioLevelHistory.shift();
+                        }
+                        // Calculate average audio level
+                        const avgAudioLevel = audioLevelHistory.reduce((a, b) => a + b, 0) /
+                            audioLevelHistory.length;
+                        // Update baseline (if necessary) based on silence detection
+                        if (avgAudioLevel < baselineNoiseLevel * silenceThreshold) {
+                            if (!silenceTimer) {
+                                silenceTimer = setTimeout(() => {
+                                    baselineNoiseLevel = Math.min(avgAudioLevel * resetThreshold, initialBaselineNoiseLevel);
+                                }, silenceTimeout);
+                            }
+                        }
+                        else {
+                            clearTimeout(silenceTimer);
+                            silenceTimer = undefined;
+                        }
+                        // Speech detection with hysteresis
+                        if (avgAudioLevel > baselineNoiseLevel * 1.5) {
+                            if (!speechDetected) {
+                                speechDetected = true;
+                                onSoundDetectedStateChanged({
+                                    isSoundDetected: true,
+                                    audioLevel,
+                                });
+                            }
+                            clearTimeout(speechTimer);
+                            speechTimer = setTimeout(() => {
+                                speechDetected = false;
+                                onSoundDetectedStateChanged({
+                                    isSoundDetected: false,
+                                    audioLevel: 0,
+                                });
+                            }, speechTimeout);
+                        }
                     }
                 }
             }
-        }, 1000);
+            catch (error) {
+                const logger = getLogger(['RNSpeechDetector']);
+                logger('error', 'error checking audio level from stats', error);
+            }
+        };
+        // Call checkAudioLevel periodically (every 100ms)
+        intervalId = setInterval(checkAudioLevel, 100);
         return () => {
-            clearInterval(this.intervalId);
+            clearInterval(intervalId);
+            clearTimeout(speechTimer);
+            clearTimeout(silenceTimer);
         };
     }
     cleanupAudioStream() {
@@ -9486,9 +9542,7 @@ class RNSpeechDetector {
 }
 class MicrophoneManager extends InputMediaDeviceManager {
-    constructor(call, disableMode = isReactNative()
-        ? 'disable-tracks'
-        : 'stop-tracks') {
+    constructor(call, disableMode = 'stop-tracks') {
         super(call, new MicrophoneManagerState(disableMode), TrackType.AUDIO);
         this.speakingWhileMutedNotificationEnabled = true;
         this.soundDetectorConcurrencyTag = Symbol('soundDetectorConcurrencyTag');
@@ -9669,13 +9723,11 @@ class MicrophoneManager extends InputMediaDeviceManager {
             await this.stopSpeakingWhileMutedDetection();
             if (isReactNative()) {
                 this.rnSpeechDetector = new RNSpeechDetector();
-                await this.rnSpeechDetector.start();
-                const unsubscribe = this.rnSpeechDetector?.onSpeakingDetectedStateChange((event) => {
+                const unsubscribe = await this.rnSpeechDetector.start((event) => {
                     this.state.setSpeakingWhileMuted(event.isSoundDetected);
                 });
                 this.soundDetectorCleanup = () => {
                     unsubscribe();
-                    this.rnSpeechDetector?.stop();
                     this.rnSpeechDetector = undefined;
                 };
             }
@@ -12863,7 +12915,7 @@ class StreamClient {
             return await this.wsConnection.connect(this.defaultWSTimeout);
         };
         this.getUserAgent = () => {
-            const version = "1.15.6";
+            const version = "1.15.7";
             return (this.userAgent ||
                 `stream-video-javascript-client-${this.node ? 'node' : 'browser'}-${version}`);
         };