@stream-io/video-client 1.15.6 → 1.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,13 @@
2
2
 
3
3
  This file was generated using [@jscutlery/semver](https://github.com/jscutlery/semver).
4
4
 
5
+ ## [1.15.7](https://github.com/GetStream/stream-video-js/compare/@stream-io/video-client-1.15.6...@stream-io/video-client-1.15.7) (2025-01-29)
6
+
7
+
8
+ ### Bug Fixes
9
+
10
+ * speech detection and align mic disable with web ([#1658](https://github.com/GetStream/stream-video-js/issues/1658)) ([fd908fb](https://github.com/GetStream/stream-video-js/commit/fd908fb2b70e6bade595f44107ca2f85aa4d5631))
11
+
5
12
  ## [1.15.6](https://github.com/GetStream/stream-video-js/compare/@stream-io/video-client-1.15.5...@stream-io/video-client-1.15.6) (2025-01-29)
6
13
 
7
14
 
@@ -7356,7 +7356,7 @@ const aggregate = (stats) => {
7356
7356
  return report;
7357
7357
  };
7358
7358
 
7359
- const version = "1.15.6";
7359
+ const version = "1.15.7";
7360
7360
  const [major, minor, patch] = version.split('.');
7361
7361
  let sdkInfo = {
7362
7362
  type: SdkType.PLAIN_JAVASCRIPT,
@@ -9337,7 +9337,7 @@ class MicrophoneManagerState extends InputMediaDeviceManagerState {
9337
9337
  }
9338
9338
 
9339
9339
  const DETECTION_FREQUENCY_IN_MS = 500;
9340
- const AUDIO_LEVEL_THRESHOLD$1 = 150;
9340
+ const AUDIO_LEVEL_THRESHOLD = 150;
9341
9341
  const FFT_SIZE = 128;
9342
9342
  /**
9343
9343
  * Creates a new sound detector.
@@ -9348,7 +9348,7 @@ const FFT_SIZE = 128;
9348
9348
  * @returns a clean-up function which once invoked stops the sound detector.
9349
9349
  */
9350
9350
  const createSoundDetector = (audioStream, onSoundDetectedStateChanged, options = {}) => {
9351
- const { detectionFrequencyInMs = DETECTION_FREQUENCY_IN_MS, audioLevelThreshold = AUDIO_LEVEL_THRESHOLD$1, fftSize = FFT_SIZE, destroyStreamOnStop = true, } = options;
9351
+ const { detectionFrequencyInMs = DETECTION_FREQUENCY_IN_MS, audioLevelThreshold = AUDIO_LEVEL_THRESHOLD, fftSize = FFT_SIZE, destroyStreamOnStop = true, } = options;
9352
9352
  const audioContext = new AudioContext();
9353
9353
  const analyser = audioContext.createAnalyser();
9354
9354
  analyser.fftSize = fftSize;
@@ -9389,7 +9389,6 @@ const createSoundDetector = (audioStream, onSoundDetectedStateChanged, options =
9389
9389
  };
9390
9390
  };
9391
9391
 
9392
- const AUDIO_LEVEL_THRESHOLD = 0.2;
9393
9392
  class RNSpeechDetector {
9394
9393
  constructor() {
9395
9394
  this.pc1 = new RTCPeerConnection({});
@@ -9398,7 +9397,7 @@ class RNSpeechDetector {
9398
9397
  /**
9399
9398
  * Starts the speech detection.
9400
9399
  */
9401
- async start() {
9400
+ async start(onSoundDetectedStateChanged) {
9402
9401
  try {
9403
9402
  this.cleanupAudioStream();
9404
9403
  const audioStream = await navigator.mediaDevices.getUserMedia({
@@ -9411,6 +9410,14 @@ class RNSpeechDetector {
9411
9410
  this.pc2.addEventListener('icecandidate', async (e) => {
9412
9411
  await this.pc1.addIceCandidate(e.candidate);
9413
9412
  });
9413
+ this.pc2.addEventListener('track', (e) => {
9414
+ e.streams[0].getTracks().forEach((track) => {
9415
+ // In RN, the remote track is automatically added to the audio output device
9416
+ // so we need to mute it to avoid hearing the audio back
9417
+ // @ts-ignore _setVolume is a private method in react-native-webrtc
9418
+ track._setVolume(0);
9419
+ });
9420
+ });
9414
9421
  audioStream
9415
9422
  .getTracks()
9416
9423
  .forEach((track) => this.pc1.addTrack(track, audioStream));
@@ -9420,12 +9427,16 @@ class RNSpeechDetector {
9420
9427
  const answer = await this.pc2.createAnswer();
9421
9428
  await this.pc1.setRemoteDescription(answer);
9422
9429
  await this.pc2.setLocalDescription(answer);
9423
- const audioTracks = audioStream.getAudioTracks();
9424
- // We need to mute the audio track for this temporary stream, or else you will hear yourself twice while in the call.
9425
- audioTracks.forEach((track) => (track.enabled = false));
9430
+ const unsub = this.onSpeakingDetectedStateChange(onSoundDetectedStateChanged);
9431
+ return () => {
9432
+ unsub();
9433
+ this.stop();
9434
+ };
9426
9435
  }
9427
9436
  catch (error) {
9428
- console.error('Error connecting and negotiating between PeerConnections:', error);
9437
+ const logger = getLogger(['RNSpeechDetector']);
9438
+ logger('error', 'error handling permissions: ', error);
9439
+ return () => { };
9429
9440
  }
9430
9441
  }
9431
9442
  /**
@@ -9435,40 +9446,85 @@ class RNSpeechDetector {
9435
9446
  this.pc1.close();
9436
9447
  this.pc2.close();
9437
9448
  this.cleanupAudioStream();
9438
- if (this.intervalId) {
9439
- clearInterval(this.intervalId);
9440
- }
9441
9449
  }
9442
9450
  /**
9443
9451
  * Public method that detects the audio levels and returns the status.
9444
9452
  */
9445
9453
  onSpeakingDetectedStateChange(onSoundDetectedStateChanged) {
9446
- this.intervalId = setInterval(async () => {
9447
- const stats = (await this.pc1.getStats());
9448
- const report = flatten(stats);
9449
- // Audio levels are present inside stats of type `media-source` and of kind `audio`
9450
- const audioMediaSourceStats = report.find((stat) => stat.type === 'media-source' &&
9451
- stat.kind === 'audio');
9452
- if (audioMediaSourceStats) {
9453
- const { audioLevel } = audioMediaSourceStats;
9454
- if (audioLevel) {
9455
- if (audioLevel >= AUDIO_LEVEL_THRESHOLD) {
9456
- onSoundDetectedStateChanged({
9457
- isSoundDetected: true,
9458
- audioLevel,
9459
- });
9460
- }
9461
- else {
9462
- onSoundDetectedStateChanged({
9463
- isSoundDetected: false,
9464
- audioLevel: 0,
9465
- });
9454
+ const initialBaselineNoiseLevel = 0.13;
9455
+ let baselineNoiseLevel = initialBaselineNoiseLevel;
9456
+ let speechDetected = false;
9457
+ let intervalId;
9458
+ let speechTimer;
9459
+ let silenceTimer;
9460
+ let audioLevelHistory = []; // Store recent audio levels for smoother detection
9461
+ const historyLength = 10;
9462
+ const silenceThreshold = 1.1;
9463
+ const resetThreshold = 0.9;
9464
+ const speechTimeout = 500; // Speech is set to true after 500ms of audio detection
9465
+ const silenceTimeout = 5000; // Reset baseline after 5 seconds of silence
9466
+ const checkAudioLevel = async () => {
9467
+ try {
9468
+ const stats = (await this.pc1.getStats());
9469
+ const report = flatten(stats);
9470
+ // Audio levels are present inside stats of type `media-source` and of kind `audio`
9471
+ const audioMediaSourceStats = report.find((stat) => stat.type === 'media-source' &&
9472
+ stat.kind === 'audio');
9473
+ if (audioMediaSourceStats) {
9474
+ const { audioLevel } = audioMediaSourceStats;
9475
+ if (audioLevel) {
9476
+ // Update audio level history (with max historyLength sized array)
9477
+ audioLevelHistory.push(audioLevel);
9478
+ if (audioLevelHistory.length > historyLength) {
9479
+ audioLevelHistory.shift();
9480
+ }
9481
+ // Calculate average audio level
9482
+ const avgAudioLevel = audioLevelHistory.reduce((a, b) => a + b, 0) /
9483
+ audioLevelHistory.length;
9484
+ // Update baseline (if necessary) based on silence detection
9485
+ if (avgAudioLevel < baselineNoiseLevel * silenceThreshold) {
9486
+ if (!silenceTimer) {
9487
+ silenceTimer = setTimeout(() => {
9488
+ baselineNoiseLevel = Math.min(avgAudioLevel * resetThreshold, initialBaselineNoiseLevel);
9489
+ }, silenceTimeout);
9490
+ }
9491
+ }
9492
+ else {
9493
+ clearTimeout(silenceTimer);
9494
+ silenceTimer = undefined;
9495
+ }
9496
+ // Speech detection with hysteresis
9497
+ if (avgAudioLevel > baselineNoiseLevel * 1.5) {
9498
+ if (!speechDetected) {
9499
+ speechDetected = true;
9500
+ onSoundDetectedStateChanged({
9501
+ isSoundDetected: true,
9502
+ audioLevel,
9503
+ });
9504
+ }
9505
+ clearTimeout(speechTimer);
9506
+ speechTimer = setTimeout(() => {
9507
+ speechDetected = false;
9508
+ onSoundDetectedStateChanged({
9509
+ isSoundDetected: false,
9510
+ audioLevel: 0,
9511
+ });
9512
+ }, speechTimeout);
9513
+ }
9466
9514
  }
9467
9515
  }
9468
9516
  }
9469
- }, 1000);
9517
+ catch (error) {
9518
+ const logger = getLogger(['RNSpeechDetector']);
9519
+ logger('error', 'error checking audio level from stats', error);
9520
+ }
9521
+ };
9522
+ // Call checkAudioLevel periodically (every 100ms)
9523
+ intervalId = setInterval(checkAudioLevel, 100);
9470
9524
  return () => {
9471
- clearInterval(this.intervalId);
9525
+ clearInterval(intervalId);
9526
+ clearTimeout(speechTimer);
9527
+ clearTimeout(silenceTimer);
9472
9528
  };
9473
9529
  }
9474
9530
  cleanupAudioStream() {
@@ -9486,9 +9542,7 @@ class RNSpeechDetector {
9486
9542
  }
9487
9543
 
9488
9544
  class MicrophoneManager extends InputMediaDeviceManager {
9489
- constructor(call, disableMode = isReactNative()
9490
- ? 'disable-tracks'
9491
- : 'stop-tracks') {
9545
+ constructor(call, disableMode = 'stop-tracks') {
9492
9546
  super(call, new MicrophoneManagerState(disableMode), TrackType.AUDIO);
9493
9547
  this.speakingWhileMutedNotificationEnabled = true;
9494
9548
  this.soundDetectorConcurrencyTag = Symbol('soundDetectorConcurrencyTag');
@@ -9669,13 +9723,11 @@ class MicrophoneManager extends InputMediaDeviceManager {
9669
9723
  await this.stopSpeakingWhileMutedDetection();
9670
9724
  if (isReactNative()) {
9671
9725
  this.rnSpeechDetector = new RNSpeechDetector();
9672
- await this.rnSpeechDetector.start();
9673
- const unsubscribe = this.rnSpeechDetector?.onSpeakingDetectedStateChange((event) => {
9726
+ const unsubscribe = await this.rnSpeechDetector.start((event) => {
9674
9727
  this.state.setSpeakingWhileMuted(event.isSoundDetected);
9675
9728
  });
9676
9729
  this.soundDetectorCleanup = () => {
9677
9730
  unsubscribe();
9678
- this.rnSpeechDetector?.stop();
9679
9731
  this.rnSpeechDetector = undefined;
9680
9732
  };
9681
9733
  }
@@ -12863,7 +12915,7 @@ class StreamClient {
12863
12915
  return await this.wsConnection.connect(this.defaultWSTimeout);
12864
12916
  };
12865
12917
  this.getUserAgent = () => {
12866
- const version = "1.15.6";
12918
+ const version = "1.15.7";
12867
12919
  return (this.userAgent ||
12868
12920
  `stream-video-javascript-client-${this.node ? 'node' : 'browser'}-${version}`);
12869
12921
  };