@tensamin/audio 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,6 +9,10 @@ DeepFilterNet3-based noise suppression and realtime speaking detection for LiveK
9
9
  - Automatic mute/unmute for LiveKit tracks
10
10
  - Simple min/max dB speaking thresholds
11
11
 
12
+ > [Noise suppression is provided via the `deepfilternet3-noise-filter` package.](https://www.npmjs.com/package/deepfilternet3-noise-filter)
13
+ > [That package is based on DeepFilterNet by Rikorose.](https://github.com/Rikorose/DeepFilterNet)
14
+
15
+
12
16
  ## Installation
13
17
 
14
18
  ```bash
@@ -0,0 +1,92 @@
1
+ import {
2
+ LevelBasedVAD
3
+ } from "./chunk-AQ5RVY33.mjs";
4
+ import {
5
+ getAudioContext,
6
+ registerPipeline,
7
+ unregisterPipeline
8
+ } from "./chunk-OZ7KMC4S.mjs";
9
+ import {
10
+ createLevelDetectorNode
11
+ } from "./chunk-QNQK6QFB.mjs";
12
+
13
+ // src/pipeline/remote-audio-monitor.ts
14
+ import mitt from "mitt";
15
+ async function createRemoteAudioMonitor(sourceTrack, config = {}) {
16
+ const context = getAudioContext();
17
+ registerPipeline();
18
+ const fullConfig = {
19
+ speaking: {
20
+ minDb: config.speaking?.minDb ?? -60,
21
+ maxDb: config.speaking?.maxDb ?? -20,
22
+ speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
23
+ speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
24
+ hangoverMs: config.speaking?.hangoverMs ?? 350,
25
+ attackMs: config.speaking?.attackMs ?? 50,
26
+ releaseMs: config.speaking?.releaseMs ?? 120
27
+ }
28
+ };
29
+ if (!sourceTrack || sourceTrack.kind !== "audio") {
30
+ throw new Error(
31
+ "createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
32
+ );
33
+ }
34
+ if (sourceTrack.readyState === "ended") {
35
+ throw new Error("Cannot create monitor from an ended MediaStreamTrack");
36
+ }
37
+ const sourceStream = new MediaStream([sourceTrack]);
38
+ const sourceNode = context.createMediaStreamSource(sourceStream);
39
+ const emitter = mitt();
40
+ const vad = new LevelBasedVAD(fullConfig.speaking);
41
+ let lastState = { speaking: false, levelDb: -Infinity };
42
+ const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
43
+ try {
44
+ const timestamp = context.currentTime * 1e3;
45
+ const nextState = vad.process(levelDb, timestamp);
46
+ const speakingChanged = nextState.speaking !== lastState.speaking;
47
+ const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
48
+ if (speakingChanged || levelChanged) {
49
+ lastState = nextState;
50
+ emitter.emit("speakingChange", nextState);
51
+ }
52
+ } catch (error) {
53
+ const err = error instanceof Error ? error : new Error(String(error));
54
+ emitter.emit("error", err);
55
+ }
56
+ });
57
+ sourceNode.connect(levelHandle.node);
58
+ function dispose() {
59
+ try {
60
+ sourceNode.disconnect();
61
+ levelHandle.node.disconnect();
62
+ levelHandle.dispose();
63
+ } catch (error) {
64
+ console.error("Error during remote monitor disposal", error);
65
+ } finally {
66
+ unregisterPipeline();
67
+ }
68
+ }
69
+ const handle = {
70
+ events: emitter,
71
+ get state() {
72
+ return lastState;
73
+ },
74
+ setConfig: (next) => {
75
+ try {
76
+ if (next.speaking) {
77
+ vad.updateConfig(next.speaking);
78
+ fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
79
+ }
80
+ } catch (error) {
81
+ const err = error instanceof Error ? error : new Error(String(error));
82
+ emitter.emit("error", err);
83
+ }
84
+ },
85
+ dispose
86
+ };
87
+ return handle;
88
+ }
89
+
90
+ export {
91
+ createRemoteAudioMonitor
92
+ };
@@ -1,9 +1,11 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "./chunk-K4J3UUOR.mjs";
3
+ } from "./chunk-BSYE2MWZ.mjs";
4
+ import {
5
+ createRemoteAudioMonitor
6
+ } from "./chunk-DTIMONGP.mjs";
4
7
 
5
8
  // src/livekit/integration.ts
6
- import "mitt";
7
9
  async function attachSpeakingDetectionToTrack(track, options = {}) {
8
10
  if (!track) {
9
11
  throw new Error(
@@ -74,7 +76,54 @@ async function attachSpeakingDetectionToTrack(track, options = {}) {
74
76
  };
75
77
  return controller;
76
78
  }
79
+ async function attachSpeakingDetectionToRemoteTrack(track, options = {}) {
80
+ if (!track) {
81
+ throw new Error(
82
+ "attachSpeakingDetectionToRemoteTrack requires a valid RemoteAudioTrack"
83
+ );
84
+ }
85
+ const mediaTrack = track.mediaStreamTrack;
86
+ if (!mediaTrack || mediaTrack.readyState === "ended") {
87
+ throw new Error("RemoteAudioTrack has no live MediaStreamTrack to monitor");
88
+ }
89
+ const monitor = await createRemoteAudioMonitor(mediaTrack, options);
90
+ const listeners = /* @__PURE__ */ new Set();
91
+ let currentState = monitor.state;
92
+ const speakingHandler = (state) => {
93
+ currentState = state;
94
+ listeners.forEach((listener) => listener(state));
95
+ };
96
+ monitor.events.on("speakingChange", speakingHandler);
97
+ const errorHandler = (error) => {
98
+ console.error("Remote audio monitor error", error);
99
+ };
100
+ monitor.events.on("error", errorHandler);
101
+ const controller = {
102
+ get speaking() {
103
+ return currentState.speaking;
104
+ },
105
+ get levelDb() {
106
+ return currentState.levelDb;
107
+ },
108
+ onChange: (listener) => {
109
+ listeners.add(listener);
110
+ listener(currentState);
111
+ return () => listeners.delete(listener);
112
+ },
113
+ setConfig: (config) => {
114
+ monitor.setConfig(config);
115
+ },
116
+ dispose: () => {
117
+ monitor.events.off("speakingChange", speakingHandler);
118
+ monitor.events.off("error", errorHandler);
119
+ listeners.clear();
120
+ monitor.dispose();
121
+ }
122
+ };
123
+ return controller;
124
+ }
77
125
 
78
126
  export {
79
- attachSpeakingDetectionToTrack
127
+ attachSpeakingDetectionToTrack,
128
+ attachSpeakingDetectionToRemoteTrack
80
129
  };
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.mjs';
2
- export { attachSpeakingDetectionToTrack } from './livekit/integration.mjs';
1
+ export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.mjs';
2
+ export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack } from './livekit/integration.mjs';
3
3
  import 'mitt';
4
4
  import 'livekit-client';
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.js';
2
- export { attachSpeakingDetectionToTrack } from './livekit/integration.js';
1
+ export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.js';
2
+ export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack } from './livekit/integration.js';
3
3
  import 'mitt';
4
4
  import 'livekit-client';
package/dist/index.js CHANGED
@@ -30,13 +30,11 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/index.ts
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
+ attachSpeakingDetectionToRemoteTrack: () => attachSpeakingDetectionToRemoteTrack,
33
34
  attachSpeakingDetectionToTrack: () => attachSpeakingDetectionToTrack
34
35
  });
35
36
  module.exports = __toCommonJS(index_exports);
36
37
 
37
- // src/livekit/integration.ts
38
- var import_mitt2 = require("mitt");
39
-
40
38
  // src/pipeline/audio-pipeline.ts
41
39
  var import_mitt = __toESM(require("mitt"));
42
40
 
@@ -389,6 +387,83 @@ async function createAudioPipeline(sourceTrack, config = {}) {
389
387
  return handle;
390
388
  }
391
389
 
390
+ // src/pipeline/remote-audio-monitor.ts
391
+ var import_mitt2 = __toESM(require("mitt"));
392
+ async function createRemoteAudioMonitor(sourceTrack, config = {}) {
393
+ const context = getAudioContext();
394
+ registerPipeline();
395
+ const fullConfig = {
396
+ speaking: {
397
+ minDb: config.speaking?.minDb ?? -60,
398
+ maxDb: config.speaking?.maxDb ?? -20,
399
+ speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
400
+ speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
401
+ hangoverMs: config.speaking?.hangoverMs ?? 350,
402
+ attackMs: config.speaking?.attackMs ?? 50,
403
+ releaseMs: config.speaking?.releaseMs ?? 120
404
+ }
405
+ };
406
+ if (!sourceTrack || sourceTrack.kind !== "audio") {
407
+ throw new Error(
408
+ "createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
409
+ );
410
+ }
411
+ if (sourceTrack.readyState === "ended") {
412
+ throw new Error("Cannot create monitor from an ended MediaStreamTrack");
413
+ }
414
+ const sourceStream = new MediaStream([sourceTrack]);
415
+ const sourceNode = context.createMediaStreamSource(sourceStream);
416
+ const emitter = (0, import_mitt2.default)();
417
+ const vad = new LevelBasedVAD(fullConfig.speaking);
418
+ let lastState = { speaking: false, levelDb: -Infinity };
419
+ const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
420
+ try {
421
+ const timestamp = context.currentTime * 1e3;
422
+ const nextState = vad.process(levelDb, timestamp);
423
+ const speakingChanged = nextState.speaking !== lastState.speaking;
424
+ const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
425
+ if (speakingChanged || levelChanged) {
426
+ lastState = nextState;
427
+ emitter.emit("speakingChange", nextState);
428
+ }
429
+ } catch (error) {
430
+ const err = error instanceof Error ? error : new Error(String(error));
431
+ emitter.emit("error", err);
432
+ }
433
+ });
434
+ sourceNode.connect(levelHandle.node);
435
+ function dispose() {
436
+ try {
437
+ sourceNode.disconnect();
438
+ levelHandle.node.disconnect();
439
+ levelHandle.dispose();
440
+ } catch (error) {
441
+ console.error("Error during remote monitor disposal", error);
442
+ } finally {
443
+ unregisterPipeline();
444
+ }
445
+ }
446
+ const handle = {
447
+ events: emitter,
448
+ get state() {
449
+ return lastState;
450
+ },
451
+ setConfig: (next) => {
452
+ try {
453
+ if (next.speaking) {
454
+ vad.updateConfig(next.speaking);
455
+ fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
456
+ }
457
+ } catch (error) {
458
+ const err = error instanceof Error ? error : new Error(String(error));
459
+ emitter.emit("error", err);
460
+ }
461
+ },
462
+ dispose
463
+ };
464
+ return handle;
465
+ }
466
+
392
467
  // src/livekit/integration.ts
393
468
  async function attachSpeakingDetectionToTrack(track, options = {}) {
394
469
  if (!track) {
@@ -460,7 +535,54 @@ async function attachSpeakingDetectionToTrack(track, options = {}) {
460
535
  };
461
536
  return controller;
462
537
  }
538
+ async function attachSpeakingDetectionToRemoteTrack(track, options = {}) {
539
+ if (!track) {
540
+ throw new Error(
541
+ "attachSpeakingDetectionToRemoteTrack requires a valid RemoteAudioTrack"
542
+ );
543
+ }
544
+ const mediaTrack = track.mediaStreamTrack;
545
+ if (!mediaTrack || mediaTrack.readyState === "ended") {
546
+ throw new Error("RemoteAudioTrack has no live MediaStreamTrack to monitor");
547
+ }
548
+ const monitor = await createRemoteAudioMonitor(mediaTrack, options);
549
+ const listeners = /* @__PURE__ */ new Set();
550
+ let currentState = monitor.state;
551
+ const speakingHandler = (state) => {
552
+ currentState = state;
553
+ listeners.forEach((listener) => listener(state));
554
+ };
555
+ monitor.events.on("speakingChange", speakingHandler);
556
+ const errorHandler = (error) => {
557
+ console.error("Remote audio monitor error", error);
558
+ };
559
+ monitor.events.on("error", errorHandler);
560
+ const controller = {
561
+ get speaking() {
562
+ return currentState.speaking;
563
+ },
564
+ get levelDb() {
565
+ return currentState.levelDb;
566
+ },
567
+ onChange: (listener) => {
568
+ listeners.add(listener);
569
+ listener(currentState);
570
+ return () => listeners.delete(listener);
571
+ },
572
+ setConfig: (config) => {
573
+ monitor.setConfig(config);
574
+ },
575
+ dispose: () => {
576
+ monitor.events.off("speakingChange", speakingHandler);
577
+ monitor.events.off("error", errorHandler);
578
+ listeners.clear();
579
+ monitor.dispose();
580
+ }
581
+ };
582
+ return controller;
583
+ }
463
584
  // Annotate the CommonJS export names for ESM import in node:
464
585
  0 && (module.exports = {
586
+ attachSpeakingDetectionToRemoteTrack,
465
587
  attachSpeakingDetectionToTrack
466
588
  });
package/dist/index.mjs CHANGED
@@ -1,12 +1,15 @@
1
1
  import "./chunk-WBQAMGXK.mjs";
2
2
  import {
3
+ attachSpeakingDetectionToRemoteTrack,
3
4
  attachSpeakingDetectionToTrack
4
- } from "./chunk-6BJ4XGSA.mjs";
5
- import "./chunk-K4J3UUOR.mjs";
6
- import "./chunk-OZ7KMC4S.mjs";
5
+ } from "./chunk-JBGGED5Q.mjs";
6
+ import "./chunk-BSYE2MWZ.mjs";
7
7
  import "./chunk-IS37FHDN.mjs";
8
- import "./chunk-QNQK6QFB.mjs";
8
+ import "./chunk-DTIMONGP.mjs";
9
9
  import "./chunk-AQ5RVY33.mjs";
10
+ import "./chunk-OZ7KMC4S.mjs";
11
+ import "./chunk-QNQK6QFB.mjs";
10
12
  export {
13
+ attachSpeakingDetectionToRemoteTrack,
11
14
  attachSpeakingDetectionToTrack
12
15
  };
@@ -1,7 +1,8 @@
1
- import { LivekitSpeakingOptions, SpeakingController } from '../types.mjs';
2
- import { LocalAudioTrack } from 'livekit-client';
1
+ import { LivekitSpeakingOptions, SpeakingController, RemoteSpeakingOptions } from '../types.mjs';
2
+ import { LocalAudioTrack, RemoteAudioTrack } from 'livekit-client';
3
3
  import 'mitt';
4
4
 
5
5
  declare function attachSpeakingDetectionToTrack(track: LocalAudioTrack, options?: LivekitSpeakingOptions): Promise<SpeakingController>;
6
+ declare function attachSpeakingDetectionToRemoteTrack(track: RemoteAudioTrack, options?: RemoteSpeakingOptions): Promise<SpeakingController>;
6
7
 
7
- export { attachSpeakingDetectionToTrack };
8
+ export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack };
@@ -1,7 +1,8 @@
1
- import { LivekitSpeakingOptions, SpeakingController } from '../types.js';
2
- import { LocalAudioTrack } from 'livekit-client';
1
+ import { LivekitSpeakingOptions, SpeakingController, RemoteSpeakingOptions } from '../types.js';
2
+ import { LocalAudioTrack, RemoteAudioTrack } from 'livekit-client';
3
3
  import 'mitt';
4
4
 
5
5
  declare function attachSpeakingDetectionToTrack(track: LocalAudioTrack, options?: LivekitSpeakingOptions): Promise<SpeakingController>;
6
+ declare function attachSpeakingDetectionToRemoteTrack(track: RemoteAudioTrack, options?: RemoteSpeakingOptions): Promise<SpeakingController>;
6
7
 
7
- export { attachSpeakingDetectionToTrack };
8
+ export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack };
@@ -30,10 +30,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/livekit/integration.ts
31
31
  var integration_exports = {};
32
32
  __export(integration_exports, {
33
+ attachSpeakingDetectionToRemoteTrack: () => attachSpeakingDetectionToRemoteTrack,
33
34
  attachSpeakingDetectionToTrack: () => attachSpeakingDetectionToTrack
34
35
  });
35
36
  module.exports = __toCommonJS(integration_exports);
36
- var import_mitt2 = require("mitt");
37
37
 
38
38
  // src/pipeline/audio-pipeline.ts
39
39
  var import_mitt = __toESM(require("mitt"));
@@ -387,6 +387,83 @@ async function createAudioPipeline(sourceTrack, config = {}) {
387
387
  return handle;
388
388
  }
389
389
 
390
+ // src/pipeline/remote-audio-monitor.ts
391
+ var import_mitt2 = __toESM(require("mitt"));
392
+ async function createRemoteAudioMonitor(sourceTrack, config = {}) {
393
+ const context = getAudioContext();
394
+ registerPipeline();
395
+ const fullConfig = {
396
+ speaking: {
397
+ minDb: config.speaking?.minDb ?? -60,
398
+ maxDb: config.speaking?.maxDb ?? -20,
399
+ speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
400
+ speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
401
+ hangoverMs: config.speaking?.hangoverMs ?? 350,
402
+ attackMs: config.speaking?.attackMs ?? 50,
403
+ releaseMs: config.speaking?.releaseMs ?? 120
404
+ }
405
+ };
406
+ if (!sourceTrack || sourceTrack.kind !== "audio") {
407
+ throw new Error(
408
+ "createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
409
+ );
410
+ }
411
+ if (sourceTrack.readyState === "ended") {
412
+ throw new Error("Cannot create monitor from an ended MediaStreamTrack");
413
+ }
414
+ const sourceStream = new MediaStream([sourceTrack]);
415
+ const sourceNode = context.createMediaStreamSource(sourceStream);
416
+ const emitter = (0, import_mitt2.default)();
417
+ const vad = new LevelBasedVAD(fullConfig.speaking);
418
+ let lastState = { speaking: false, levelDb: -Infinity };
419
+ const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
420
+ try {
421
+ const timestamp = context.currentTime * 1e3;
422
+ const nextState = vad.process(levelDb, timestamp);
423
+ const speakingChanged = nextState.speaking !== lastState.speaking;
424
+ const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
425
+ if (speakingChanged || levelChanged) {
426
+ lastState = nextState;
427
+ emitter.emit("speakingChange", nextState);
428
+ }
429
+ } catch (error) {
430
+ const err = error instanceof Error ? error : new Error(String(error));
431
+ emitter.emit("error", err);
432
+ }
433
+ });
434
+ sourceNode.connect(levelHandle.node);
435
+ function dispose() {
436
+ try {
437
+ sourceNode.disconnect();
438
+ levelHandle.node.disconnect();
439
+ levelHandle.dispose();
440
+ } catch (error) {
441
+ console.error("Error during remote monitor disposal", error);
442
+ } finally {
443
+ unregisterPipeline();
444
+ }
445
+ }
446
+ const handle = {
447
+ events: emitter,
448
+ get state() {
449
+ return lastState;
450
+ },
451
+ setConfig: (next) => {
452
+ try {
453
+ if (next.speaking) {
454
+ vad.updateConfig(next.speaking);
455
+ fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
456
+ }
457
+ } catch (error) {
458
+ const err = error instanceof Error ? error : new Error(String(error));
459
+ emitter.emit("error", err);
460
+ }
461
+ },
462
+ dispose
463
+ };
464
+ return handle;
465
+ }
466
+
390
467
  // src/livekit/integration.ts
391
468
  async function attachSpeakingDetectionToTrack(track, options = {}) {
392
469
  if (!track) {
@@ -458,7 +535,54 @@ async function attachSpeakingDetectionToTrack(track, options = {}) {
458
535
  };
459
536
  return controller;
460
537
  }
538
+ async function attachSpeakingDetectionToRemoteTrack(track, options = {}) {
539
+ if (!track) {
540
+ throw new Error(
541
+ "attachSpeakingDetectionToRemoteTrack requires a valid RemoteAudioTrack"
542
+ );
543
+ }
544
+ const mediaTrack = track.mediaStreamTrack;
545
+ if (!mediaTrack || mediaTrack.readyState === "ended") {
546
+ throw new Error("RemoteAudioTrack has no live MediaStreamTrack to monitor");
547
+ }
548
+ const monitor = await createRemoteAudioMonitor(mediaTrack, options);
549
+ const listeners = /* @__PURE__ */ new Set();
550
+ let currentState = monitor.state;
551
+ const speakingHandler = (state) => {
552
+ currentState = state;
553
+ listeners.forEach((listener) => listener(state));
554
+ };
555
+ monitor.events.on("speakingChange", speakingHandler);
556
+ const errorHandler = (error) => {
557
+ console.error("Remote audio monitor error", error);
558
+ };
559
+ monitor.events.on("error", errorHandler);
560
+ const controller = {
561
+ get speaking() {
562
+ return currentState.speaking;
563
+ },
564
+ get levelDb() {
565
+ return currentState.levelDb;
566
+ },
567
+ onChange: (listener) => {
568
+ listeners.add(listener);
569
+ listener(currentState);
570
+ return () => listeners.delete(listener);
571
+ },
572
+ setConfig: (config) => {
573
+ monitor.setConfig(config);
574
+ },
575
+ dispose: () => {
576
+ monitor.events.off("speakingChange", speakingHandler);
577
+ monitor.events.off("error", errorHandler);
578
+ listeners.clear();
579
+ monitor.dispose();
580
+ }
581
+ };
582
+ return controller;
583
+ }
461
584
  // Annotate the CommonJS export names for ESM import in node:
462
585
  0 && (module.exports = {
586
+ attachSpeakingDetectionToRemoteTrack,
463
587
  attachSpeakingDetectionToTrack
464
588
  });
@@ -1,11 +1,14 @@
1
1
  import {
2
+ attachSpeakingDetectionToRemoteTrack,
2
3
  attachSpeakingDetectionToTrack
3
- } from "../chunk-6BJ4XGSA.mjs";
4
- import "../chunk-K4J3UUOR.mjs";
5
- import "../chunk-OZ7KMC4S.mjs";
4
+ } from "../chunk-JBGGED5Q.mjs";
5
+ import "../chunk-BSYE2MWZ.mjs";
6
6
  import "../chunk-IS37FHDN.mjs";
7
- import "../chunk-QNQK6QFB.mjs";
7
+ import "../chunk-DTIMONGP.mjs";
8
8
  import "../chunk-AQ5RVY33.mjs";
9
+ import "../chunk-OZ7KMC4S.mjs";
10
+ import "../chunk-QNQK6QFB.mjs";
9
11
  export {
12
+ attachSpeakingDetectionToRemoteTrack,
10
13
  attachSpeakingDetectionToTrack
11
14
  };
@@ -1,10 +1,10 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "../chunk-K4J3UUOR.mjs";
4
- import "../chunk-OZ7KMC4S.mjs";
3
+ } from "../chunk-BSYE2MWZ.mjs";
5
4
  import "../chunk-IS37FHDN.mjs";
6
- import "../chunk-QNQK6QFB.mjs";
7
5
  import "../chunk-AQ5RVY33.mjs";
6
+ import "../chunk-OZ7KMC4S.mjs";
7
+ import "../chunk-QNQK6QFB.mjs";
8
8
  export {
9
9
  createAudioPipeline
10
10
  };
@@ -0,0 +1,12 @@
1
+ import { Emitter } from 'mitt';
2
+ import { SpeakingEvents, SpeakingState, RemoteSpeakingOptions } from '../types.mjs';
3
+
4
+ interface RemoteAudioMonitorHandle {
5
+ readonly events: Emitter<SpeakingEvents>;
6
+ readonly state: SpeakingState;
7
+ setConfig(config: Partial<RemoteSpeakingOptions>): void;
8
+ dispose(): void;
9
+ }
10
+ declare function createRemoteAudioMonitor(sourceTrack: MediaStreamTrack, config?: RemoteSpeakingOptions): Promise<RemoteAudioMonitorHandle>;
11
+
12
+ export { type RemoteAudioMonitorHandle, createRemoteAudioMonitor };
@@ -0,0 +1,12 @@
1
+ import { Emitter } from 'mitt';
2
+ import { SpeakingEvents, SpeakingState, RemoteSpeakingOptions } from '../types.js';
3
+
4
+ interface RemoteAudioMonitorHandle {
5
+ readonly events: Emitter<SpeakingEvents>;
6
+ readonly state: SpeakingState;
7
+ setConfig(config: Partial<RemoteSpeakingOptions>): void;
8
+ dispose(): void;
9
+ }
10
+ declare function createRemoteAudioMonitor(sourceTrack: MediaStreamTrack, config?: RemoteSpeakingOptions): Promise<RemoteAudioMonitorHandle>;
11
+
12
+ export { type RemoteAudioMonitorHandle, createRemoteAudioMonitor };
@@ -0,0 +1,276 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/pipeline/remote-audio-monitor.ts
31
+ var remote_audio_monitor_exports = {};
32
+ __export(remote_audio_monitor_exports, {
33
+ createRemoteAudioMonitor: () => createRemoteAudioMonitor
34
+ });
35
+ module.exports = __toCommonJS(remote_audio_monitor_exports);
36
+ var import_mitt = __toESM(require("mitt"));
37
+
38
+ // src/context/audio-context.ts
39
+ var sharedContext = null;
40
+ var activePipelines = 0;
41
+ function getAudioContext(options) {
42
+ if (typeof window === "undefined" || typeof AudioContext === "undefined") {
43
+ throw new Error(
44
+ "AudioContext is not supported in this environment (browser only)."
45
+ );
46
+ }
47
+ if (!sharedContext || sharedContext.state === "closed") {
48
+ sharedContext = new AudioContext(options);
49
+ }
50
+ return sharedContext;
51
+ }
52
+ function registerPipeline() {
53
+ activePipelines++;
54
+ }
55
+ function unregisterPipeline() {
56
+ activePipelines = Math.max(0, activePipelines - 1);
57
+ }
58
+
59
+ // src/vad/vad-node.ts
60
+ function createLevelDetectorWorkletCode(smoothing) {
61
+ return `
62
+ class LevelDetectorProcessor extends AudioWorkletProcessor {
63
+ constructor() {
64
+ super();
65
+ this.smoothed = 0;
66
+ this.smoothing = ${smoothing};
67
+ }
68
+
69
+ process(inputs) {
70
+ const input = inputs[0];
71
+ if (!input || input.length === 0) return true;
72
+ const channel = input[0];
73
+ if (!channel || channel.length === 0) return true;
74
+
75
+ let sum = 0;
76
+ for (let i = 0; i < channel.length; i++) {
77
+ const sample = channel[i];
78
+ sum += sample * sample;
79
+ }
80
+ const rms = Math.sqrt(sum / channel.length);
81
+ this.smoothed = this.smoothed * this.smoothing + rms * (1 - this.smoothing);
82
+ const levelDb = 20 * Math.log10(Math.max(1e-8, this.smoothed));
83
+ this.port.postMessage({ levelDb });
84
+ return true;
85
+ }
86
+ }
87
+
88
+ registerProcessor('level-detector-processor', LevelDetectorProcessor);
89
+ `;
90
+ }
91
+ async function createLevelDetectorNode(context, onLevel, options) {
92
+ const smoothing = options?.smoothing ?? 0.9;
93
+ const workletCode = createLevelDetectorWorkletCode(smoothing);
94
+ const blob = new Blob([workletCode], { type: "application/javascript" });
95
+ const url = URL.createObjectURL(blob);
96
+ try {
97
+ await context.audioWorklet.addModule(url);
98
+ } finally {
99
+ URL.revokeObjectURL(url);
100
+ }
101
+ const node = new AudioWorkletNode(context, "level-detector-processor", {
102
+ numberOfInputs: 1,
103
+ numberOfOutputs: 0
104
+ });
105
+ node.port.onmessage = (event) => {
106
+ const { levelDb } = event.data ?? {};
107
+ if (typeof levelDb === "number" && !Number.isNaN(levelDb)) {
108
+ onLevel(levelDb);
109
+ }
110
+ };
111
+ node.port.onmessageerror = (event) => {
112
+ console.error("Level detector port error", event);
113
+ };
114
+ return {
115
+ node,
116
+ dispose: () => {
117
+ try {
118
+ node.port.onmessage = null;
119
+ node.port.close();
120
+ } catch (error) {
121
+ console.error("Failed to dispose level detector node", error);
122
+ }
123
+ }
124
+ };
125
+ }
126
+
127
+ // src/vad/vad-state.ts
128
+ var LevelBasedVAD = class {
129
+ config;
130
+ speaking = false;
131
+ pendingSpeechSince = null;
132
+ pendingSilenceSince = null;
133
+ constructor(config) {
134
+ this.config = {
135
+ minDb: config.minDb,
136
+ maxDb: config.maxDb,
137
+ speakOnRatio: config.speakOnRatio ?? 0.6,
138
+ speakOffRatio: config.speakOffRatio ?? 0.3,
139
+ hangoverMs: config.hangoverMs ?? 350,
140
+ attackMs: config.attackMs ?? 50,
141
+ releaseMs: config.releaseMs ?? 120
142
+ };
143
+ }
144
+ updateConfig(config) {
145
+ this.config = {
146
+ ...this.config,
147
+ ...config,
148
+ speakOnRatio: config.speakOnRatio ?? this.config.speakOnRatio,
149
+ speakOffRatio: config.speakOffRatio ?? this.config.speakOffRatio,
150
+ hangoverMs: config.hangoverMs ?? this.config.hangoverMs,
151
+ attackMs: config.attackMs ?? this.config.attackMs,
152
+ releaseMs: config.releaseMs ?? this.config.releaseMs
153
+ };
154
+ }
155
+ process(levelDb, timestampMs) {
156
+ const {
157
+ minDb,
158
+ maxDb,
159
+ speakOnRatio,
160
+ speakOffRatio,
161
+ hangoverMs,
162
+ attackMs,
163
+ releaseMs
164
+ } = this.config;
165
+ const clamped = Math.min(maxDb, Math.max(minDb, levelDb));
166
+ const norm = (clamped - minDb) / Math.max(1, maxDb - minDb);
167
+ if (!this.speaking) {
168
+ if (norm >= speakOnRatio) {
169
+ this.pendingSpeechSince = this.pendingSpeechSince ?? timestampMs;
170
+ if (timestampMs - this.pendingSpeechSince >= attackMs) {
171
+ this.speaking = true;
172
+ this.pendingSpeechSince = null;
173
+ this.pendingSilenceSince = null;
174
+ }
175
+ } else {
176
+ this.pendingSpeechSince = null;
177
+ }
178
+ } else {
179
+ if (norm <= speakOffRatio) {
180
+ this.pendingSilenceSince = this.pendingSilenceSince ?? timestampMs;
181
+ const releaseWindow = Math.max(releaseMs, hangoverMs);
182
+ if (timestampMs - this.pendingSilenceSince >= releaseWindow) {
183
+ this.speaking = false;
184
+ this.pendingSilenceSince = null;
185
+ this.pendingSpeechSince = null;
186
+ }
187
+ } else {
188
+ this.pendingSilenceSince = null;
189
+ }
190
+ }
191
+ return {
192
+ speaking: this.speaking,
193
+ levelDb: clamped
194
+ };
195
+ }
196
+ };
197
+
198
+ // src/pipeline/remote-audio-monitor.ts
199
+ async function createRemoteAudioMonitor(sourceTrack, config = {}) {
200
+ const context = getAudioContext();
201
+ registerPipeline();
202
+ const fullConfig = {
203
+ speaking: {
204
+ minDb: config.speaking?.minDb ?? -60,
205
+ maxDb: config.speaking?.maxDb ?? -20,
206
+ speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
207
+ speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
208
+ hangoverMs: config.speaking?.hangoverMs ?? 350,
209
+ attackMs: config.speaking?.attackMs ?? 50,
210
+ releaseMs: config.speaking?.releaseMs ?? 120
211
+ }
212
+ };
213
+ if (!sourceTrack || sourceTrack.kind !== "audio") {
214
+ throw new Error(
215
+ "createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
216
+ );
217
+ }
218
+ if (sourceTrack.readyState === "ended") {
219
+ throw new Error("Cannot create monitor from an ended MediaStreamTrack");
220
+ }
221
+ const sourceStream = new MediaStream([sourceTrack]);
222
+ const sourceNode = context.createMediaStreamSource(sourceStream);
223
+ const emitter = (0, import_mitt.default)();
224
+ const vad = new LevelBasedVAD(fullConfig.speaking);
225
+ let lastState = { speaking: false, levelDb: -Infinity };
226
+ const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
227
+ try {
228
+ const timestamp = context.currentTime * 1e3;
229
+ const nextState = vad.process(levelDb, timestamp);
230
+ const speakingChanged = nextState.speaking !== lastState.speaking;
231
+ const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
232
+ if (speakingChanged || levelChanged) {
233
+ lastState = nextState;
234
+ emitter.emit("speakingChange", nextState);
235
+ }
236
+ } catch (error) {
237
+ const err = error instanceof Error ? error : new Error(String(error));
238
+ emitter.emit("error", err);
239
+ }
240
+ });
241
+ sourceNode.connect(levelHandle.node);
242
+ function dispose() {
243
+ try {
244
+ sourceNode.disconnect();
245
+ levelHandle.node.disconnect();
246
+ levelHandle.dispose();
247
+ } catch (error) {
248
+ console.error("Error during remote monitor disposal", error);
249
+ } finally {
250
+ unregisterPipeline();
251
+ }
252
+ }
253
+ const handle = {
254
+ events: emitter,
255
+ get state() {
256
+ return lastState;
257
+ },
258
+ setConfig: (next) => {
259
+ try {
260
+ if (next.speaking) {
261
+ vad.updateConfig(next.speaking);
262
+ fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
263
+ }
264
+ } catch (error) {
265
+ const err = error instanceof Error ? error : new Error(String(error));
266
+ emitter.emit("error", err);
267
+ }
268
+ },
269
+ dispose
270
+ };
271
+ return handle;
272
+ }
273
+ // Annotate the CommonJS export names for ESM import in node:
274
+ 0 && (module.exports = {
275
+ createRemoteAudioMonitor
276
+ });
@@ -0,0 +1,9 @@
1
+ import {
2
+ createRemoteAudioMonitor
3
+ } from "../chunk-DTIMONGP.mjs";
4
+ import "../chunk-AQ5RVY33.mjs";
5
+ import "../chunk-OZ7KMC4S.mjs";
6
+ import "../chunk-QNQK6QFB.mjs";
7
+ export {
8
+ createRemoteAudioMonitor
9
+ };
package/dist/types.d.mts CHANGED
@@ -29,6 +29,9 @@ interface LivekitSpeakingOptions {
29
29
  output?: OutputGainConfig;
30
30
  muteWhenSilent?: boolean;
31
31
  }
32
+ interface RemoteSpeakingOptions {
33
+ speaking?: SpeakingDetectionConfig;
34
+ }
32
35
  interface SpeakingState {
33
36
  speaking: boolean;
34
37
  levelDb: number;
@@ -52,4 +55,4 @@ interface SpeakingController {
52
55
  dispose(): void;
53
56
  }
54
57
 
55
- export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
58
+ export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
package/dist/types.d.ts CHANGED
@@ -29,6 +29,9 @@ interface LivekitSpeakingOptions {
29
29
  output?: OutputGainConfig;
30
30
  muteWhenSilent?: boolean;
31
31
  }
32
+ interface RemoteSpeakingOptions {
33
+ speaking?: SpeakingDetectionConfig;
34
+ }
32
35
  interface SpeakingState {
33
36
  speaking: boolean;
34
37
  levelDb: number;
@@ -52,4 +55,4 @@ interface SpeakingController {
52
55
  dispose(): void;
53
56
  }
54
57
 
55
- export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
58
+ export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tensamin/audio",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "author": {
5
5
  "email": "aloisianer@proton.me",
6
6
  "name": "Alois"
@@ -1,17 +1,17 @@
1
+ import {
2
+ createDeepFilterNet3Node
3
+ } from "./chunk-IS37FHDN.mjs";
4
+ import {
5
+ LevelBasedVAD
6
+ } from "./chunk-AQ5RVY33.mjs";
1
7
  import {
2
8
  getAudioContext,
3
9
  registerPipeline,
4
10
  unregisterPipeline
5
11
  } from "./chunk-OZ7KMC4S.mjs";
6
- import {
7
- createDeepFilterNet3Node
8
- } from "./chunk-IS37FHDN.mjs";
9
12
  import {
10
13
  createLevelDetectorNode
11
14
  } from "./chunk-QNQK6QFB.mjs";
12
- import {
13
- LevelBasedVAD
14
- } from "./chunk-AQ5RVY33.mjs";
15
15
 
16
16
  // src/pipeline/audio-pipeline.ts
17
17
  import mitt from "mitt";