@tensamin/audio 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/dist/chunk-7IKKNKM7.mjs +92 -0
- package/dist/{chunk-K4J3UUOR.mjs → chunk-BAUJY4Q2.mjs} +10 -10
- package/dist/{chunk-6BJ4XGSA.mjs → chunk-YQPL2O7D.mjs} +52 -3
- package/dist/index.d.mts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +129 -7
- package/dist/index.mjs +7 -4
- package/dist/livekit/integration.d.mts +4 -3
- package/dist/livekit/integration.d.ts +4 -3
- package/dist/livekit/integration.js +129 -5
- package/dist/livekit/integration.mjs +7 -4
- package/dist/pipeline/audio-pipeline.js +4 -4
- package/dist/pipeline/audio-pipeline.mjs +3 -3
- package/dist/pipeline/remote-audio-monitor.d.mts +12 -0
- package/dist/pipeline/remote-audio-monitor.d.ts +12 -0
- package/dist/pipeline/remote-audio-monitor.js +276 -0
- package/dist/pipeline/remote-audio-monitor.mjs +9 -0
- package/dist/types.d.mts +4 -1
- package/dist/types.d.ts +4 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -9,6 +9,9 @@ DeepFilterNet3-based noise suppression and realtime speaking detection for LiveK
|
|
|
9
9
|
- Automatic mute/unmute for LiveKit tracks
|
|
10
10
|
- Simple min/max dB speaking thresholds
|
|
11
11
|
|
|
12
|
+
> [Noise suppression is provided via the `deepfilternet3-noise-filter` package.](https://www.npmjs.com/package/deepfilternet3-noise-filter)
|
|
13
|
+
> [That package is based on DeepFilterNet by Rikorose.](https://github.com/Rikorose/DeepFilterNet)
|
|
14
|
+
|
|
12
15
|
## Installation
|
|
13
16
|
|
|
14
17
|
```bash
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import {
|
|
2
|
+
LevelBasedVAD
|
|
3
|
+
} from "./chunk-AQ5RVY33.mjs";
|
|
4
|
+
import {
|
|
5
|
+
getAudioContext,
|
|
6
|
+
registerPipeline,
|
|
7
|
+
unregisterPipeline
|
|
8
|
+
} from "./chunk-OZ7KMC4S.mjs";
|
|
9
|
+
import {
|
|
10
|
+
createLevelDetectorNode
|
|
11
|
+
} from "./chunk-QNQK6QFB.mjs";
|
|
12
|
+
|
|
13
|
+
// src/pipeline/remote-audio-monitor.ts
|
|
14
|
+
import mitt from "mitt";
|
|
15
|
+
async function createRemoteAudioMonitor(sourceTrack, config = {}) {
|
|
16
|
+
const context = getAudioContext();
|
|
17
|
+
registerPipeline();
|
|
18
|
+
const fullConfig = {
|
|
19
|
+
speaking: {
|
|
20
|
+
minDb: config.speaking?.minDb ?? -55,
|
|
21
|
+
maxDb: config.speaking?.maxDb ?? -20,
|
|
22
|
+
speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
|
|
23
|
+
speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
|
|
24
|
+
hangoverMs: config.speaking?.hangoverMs ?? 500,
|
|
25
|
+
attackMs: config.speaking?.attackMs ?? 100,
|
|
26
|
+
releaseMs: config.speaking?.releaseMs ?? 120
|
|
27
|
+
}
|
|
28
|
+
};
|
|
29
|
+
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
30
|
+
throw new Error(
|
|
31
|
+
"createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
if (sourceTrack.readyState === "ended") {
|
|
35
|
+
throw new Error("Cannot create monitor from an ended MediaStreamTrack");
|
|
36
|
+
}
|
|
37
|
+
const sourceStream = new MediaStream([sourceTrack]);
|
|
38
|
+
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
39
|
+
const emitter = mitt();
|
|
40
|
+
const vad = new LevelBasedVAD(fullConfig.speaking);
|
|
41
|
+
let lastState = { speaking: false, levelDb: -Infinity };
|
|
42
|
+
const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
|
|
43
|
+
try {
|
|
44
|
+
const timestamp = context.currentTime * 1e3;
|
|
45
|
+
const nextState = vad.process(levelDb, timestamp);
|
|
46
|
+
const speakingChanged = nextState.speaking !== lastState.speaking;
|
|
47
|
+
const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
|
|
48
|
+
if (speakingChanged || levelChanged) {
|
|
49
|
+
lastState = nextState;
|
|
50
|
+
emitter.emit("speakingChange", nextState);
|
|
51
|
+
}
|
|
52
|
+
} catch (error) {
|
|
53
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
54
|
+
emitter.emit("error", err);
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
sourceNode.connect(levelHandle.node);
|
|
58
|
+
function dispose() {
|
|
59
|
+
try {
|
|
60
|
+
sourceNode.disconnect();
|
|
61
|
+
levelHandle.node.disconnect();
|
|
62
|
+
levelHandle.dispose();
|
|
63
|
+
} catch (error) {
|
|
64
|
+
console.error("Error during remote monitor disposal", error);
|
|
65
|
+
} finally {
|
|
66
|
+
unregisterPipeline();
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
const handle = {
|
|
70
|
+
events: emitter,
|
|
71
|
+
get state() {
|
|
72
|
+
return lastState;
|
|
73
|
+
},
|
|
74
|
+
setConfig: (next) => {
|
|
75
|
+
try {
|
|
76
|
+
if (next.speaking) {
|
|
77
|
+
vad.updateConfig(next.speaking);
|
|
78
|
+
fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
|
|
79
|
+
}
|
|
80
|
+
} catch (error) {
|
|
81
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
82
|
+
emitter.emit("error", err);
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
dispose
|
|
86
|
+
};
|
|
87
|
+
return handle;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export {
|
|
91
|
+
createRemoteAudioMonitor
|
|
92
|
+
};
|
|
@@ -1,17 +1,17 @@
|
|
|
1
|
+
import {
|
|
2
|
+
createDeepFilterNet3Node
|
|
3
|
+
} from "./chunk-IS37FHDN.mjs";
|
|
4
|
+
import {
|
|
5
|
+
LevelBasedVAD
|
|
6
|
+
} from "./chunk-AQ5RVY33.mjs";
|
|
1
7
|
import {
|
|
2
8
|
getAudioContext,
|
|
3
9
|
registerPipeline,
|
|
4
10
|
unregisterPipeline
|
|
5
11
|
} from "./chunk-OZ7KMC4S.mjs";
|
|
6
|
-
import {
|
|
7
|
-
createDeepFilterNet3Node
|
|
8
|
-
} from "./chunk-IS37FHDN.mjs";
|
|
9
12
|
import {
|
|
10
13
|
createLevelDetectorNode
|
|
11
14
|
} from "./chunk-QNQK6QFB.mjs";
|
|
12
|
-
import {
|
|
13
|
-
LevelBasedVAD
|
|
14
|
-
} from "./chunk-AQ5RVY33.mjs";
|
|
15
15
|
|
|
16
16
|
// src/pipeline/audio-pipeline.ts
|
|
17
17
|
import mitt from "mitt";
|
|
@@ -28,12 +28,12 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
28
28
|
const fullConfig = {
|
|
29
29
|
noiseSuppression: nsConfig,
|
|
30
30
|
speaking: {
|
|
31
|
-
minDb: config.speaking?.minDb ?? -
|
|
31
|
+
minDb: config.speaking?.minDb ?? -55,
|
|
32
32
|
maxDb: config.speaking?.maxDb ?? -20,
|
|
33
|
-
speakOnRatio: config.speaking?.speakOnRatio ?? 0.
|
|
33
|
+
speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
|
|
34
34
|
speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
|
|
35
|
-
hangoverMs: config.speaking?.hangoverMs ??
|
|
36
|
-
attackMs: config.speaking?.attackMs ??
|
|
35
|
+
hangoverMs: config.speaking?.hangoverMs ?? 500,
|
|
36
|
+
attackMs: config.speaking?.attackMs ?? 100,
|
|
37
37
|
releaseMs: config.speaking?.releaseMs ?? 120
|
|
38
38
|
},
|
|
39
39
|
output: {
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createAudioPipeline
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-BAUJY4Q2.mjs";
|
|
4
|
+
import {
|
|
5
|
+
createRemoteAudioMonitor
|
|
6
|
+
} from "./chunk-7IKKNKM7.mjs";
|
|
4
7
|
|
|
5
8
|
// src/livekit/integration.ts
|
|
6
|
-
import "mitt";
|
|
7
9
|
async function attachSpeakingDetectionToTrack(track, options = {}) {
|
|
8
10
|
if (!track) {
|
|
9
11
|
throw new Error(
|
|
@@ -74,7 +76,54 @@ async function attachSpeakingDetectionToTrack(track, options = {}) {
|
|
|
74
76
|
};
|
|
75
77
|
return controller;
|
|
76
78
|
}
|
|
79
|
+
async function attachSpeakingDetectionToRemoteTrack(track, options = {}) {
|
|
80
|
+
if (!track) {
|
|
81
|
+
throw new Error(
|
|
82
|
+
"attachSpeakingDetectionToRemoteTrack requires a valid RemoteAudioTrack"
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
const mediaTrack = track.mediaStreamTrack;
|
|
86
|
+
if (!mediaTrack || mediaTrack.readyState === "ended") {
|
|
87
|
+
throw new Error("RemoteAudioTrack has no live MediaStreamTrack to monitor");
|
|
88
|
+
}
|
|
89
|
+
const monitor = await createRemoteAudioMonitor(mediaTrack, options);
|
|
90
|
+
const listeners = /* @__PURE__ */ new Set();
|
|
91
|
+
let currentState = monitor.state;
|
|
92
|
+
const speakingHandler = (state) => {
|
|
93
|
+
currentState = state;
|
|
94
|
+
listeners.forEach((listener) => listener(state));
|
|
95
|
+
};
|
|
96
|
+
monitor.events.on("speakingChange", speakingHandler);
|
|
97
|
+
const errorHandler = (error) => {
|
|
98
|
+
console.error("Remote audio monitor error", error);
|
|
99
|
+
};
|
|
100
|
+
monitor.events.on("error", errorHandler);
|
|
101
|
+
const controller = {
|
|
102
|
+
get speaking() {
|
|
103
|
+
return currentState.speaking;
|
|
104
|
+
},
|
|
105
|
+
get levelDb() {
|
|
106
|
+
return currentState.levelDb;
|
|
107
|
+
},
|
|
108
|
+
onChange: (listener) => {
|
|
109
|
+
listeners.add(listener);
|
|
110
|
+
listener(currentState);
|
|
111
|
+
return () => listeners.delete(listener);
|
|
112
|
+
},
|
|
113
|
+
setConfig: (config) => {
|
|
114
|
+
monitor.setConfig(config);
|
|
115
|
+
},
|
|
116
|
+
dispose: () => {
|
|
117
|
+
monitor.events.off("speakingChange", speakingHandler);
|
|
118
|
+
monitor.events.off("error", errorHandler);
|
|
119
|
+
listeners.clear();
|
|
120
|
+
monitor.dispose();
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
return controller;
|
|
124
|
+
}
|
|
77
125
|
|
|
78
126
|
export {
|
|
79
|
-
attachSpeakingDetectionToTrack
|
|
127
|
+
attachSpeakingDetectionToTrack,
|
|
128
|
+
attachSpeakingDetectionToRemoteTrack
|
|
80
129
|
};
|
package/dist/index.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.mjs';
|
|
2
|
-
export { attachSpeakingDetectionToTrack } from './livekit/integration.mjs';
|
|
1
|
+
export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.mjs';
|
|
2
|
+
export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack } from './livekit/integration.mjs';
|
|
3
3
|
import 'mitt';
|
|
4
4
|
import 'livekit-client';
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.js';
|
|
2
|
-
export { attachSpeakingDetectionToTrack } from './livekit/integration.js';
|
|
1
|
+
export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.js';
|
|
2
|
+
export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack } from './livekit/integration.js';
|
|
3
3
|
import 'mitt';
|
|
4
4
|
import 'livekit-client';
|
package/dist/index.js
CHANGED
|
@@ -30,13 +30,11 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
30
30
|
// src/index.ts
|
|
31
31
|
var index_exports = {};
|
|
32
32
|
__export(index_exports, {
|
|
33
|
+
attachSpeakingDetectionToRemoteTrack: () => attachSpeakingDetectionToRemoteTrack,
|
|
33
34
|
attachSpeakingDetectionToTrack: () => attachSpeakingDetectionToTrack
|
|
34
35
|
});
|
|
35
36
|
module.exports = __toCommonJS(index_exports);
|
|
36
37
|
|
|
37
|
-
// src/livekit/integration.ts
|
|
38
|
-
var import_mitt2 = require("mitt");
|
|
39
|
-
|
|
40
38
|
// src/pipeline/audio-pipeline.ts
|
|
41
39
|
var import_mitt = __toESM(require("mitt"));
|
|
42
40
|
|
|
@@ -244,12 +242,12 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
244
242
|
const fullConfig = {
|
|
245
243
|
noiseSuppression: nsConfig,
|
|
246
244
|
speaking: {
|
|
247
|
-
minDb: config.speaking?.minDb ?? -
|
|
245
|
+
minDb: config.speaking?.minDb ?? -55,
|
|
248
246
|
maxDb: config.speaking?.maxDb ?? -20,
|
|
249
|
-
speakOnRatio: config.speaking?.speakOnRatio ?? 0.
|
|
247
|
+
speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
|
|
250
248
|
speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
|
|
251
|
-
hangoverMs: config.speaking?.hangoverMs ??
|
|
252
|
-
attackMs: config.speaking?.attackMs ??
|
|
249
|
+
hangoverMs: config.speaking?.hangoverMs ?? 500,
|
|
250
|
+
attackMs: config.speaking?.attackMs ?? 100,
|
|
253
251
|
releaseMs: config.speaking?.releaseMs ?? 120
|
|
254
252
|
},
|
|
255
253
|
output: {
|
|
@@ -389,6 +387,83 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
389
387
|
return handle;
|
|
390
388
|
}
|
|
391
389
|
|
|
390
|
+
// src/pipeline/remote-audio-monitor.ts
|
|
391
|
+
var import_mitt2 = __toESM(require("mitt"));
|
|
392
|
+
async function createRemoteAudioMonitor(sourceTrack, config = {}) {
|
|
393
|
+
const context = getAudioContext();
|
|
394
|
+
registerPipeline();
|
|
395
|
+
const fullConfig = {
|
|
396
|
+
speaking: {
|
|
397
|
+
minDb: config.speaking?.minDb ?? -55,
|
|
398
|
+
maxDb: config.speaking?.maxDb ?? -20,
|
|
399
|
+
speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
|
|
400
|
+
speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
|
|
401
|
+
hangoverMs: config.speaking?.hangoverMs ?? 500,
|
|
402
|
+
attackMs: config.speaking?.attackMs ?? 100,
|
|
403
|
+
releaseMs: config.speaking?.releaseMs ?? 120
|
|
404
|
+
}
|
|
405
|
+
};
|
|
406
|
+
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
407
|
+
throw new Error(
|
|
408
|
+
"createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
|
|
409
|
+
);
|
|
410
|
+
}
|
|
411
|
+
if (sourceTrack.readyState === "ended") {
|
|
412
|
+
throw new Error("Cannot create monitor from an ended MediaStreamTrack");
|
|
413
|
+
}
|
|
414
|
+
const sourceStream = new MediaStream([sourceTrack]);
|
|
415
|
+
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
416
|
+
const emitter = (0, import_mitt2.default)();
|
|
417
|
+
const vad = new LevelBasedVAD(fullConfig.speaking);
|
|
418
|
+
let lastState = { speaking: false, levelDb: -Infinity };
|
|
419
|
+
const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
|
|
420
|
+
try {
|
|
421
|
+
const timestamp = context.currentTime * 1e3;
|
|
422
|
+
const nextState = vad.process(levelDb, timestamp);
|
|
423
|
+
const speakingChanged = nextState.speaking !== lastState.speaking;
|
|
424
|
+
const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
|
|
425
|
+
if (speakingChanged || levelChanged) {
|
|
426
|
+
lastState = nextState;
|
|
427
|
+
emitter.emit("speakingChange", nextState);
|
|
428
|
+
}
|
|
429
|
+
} catch (error) {
|
|
430
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
431
|
+
emitter.emit("error", err);
|
|
432
|
+
}
|
|
433
|
+
});
|
|
434
|
+
sourceNode.connect(levelHandle.node);
|
|
435
|
+
function dispose() {
|
|
436
|
+
try {
|
|
437
|
+
sourceNode.disconnect();
|
|
438
|
+
levelHandle.node.disconnect();
|
|
439
|
+
levelHandle.dispose();
|
|
440
|
+
} catch (error) {
|
|
441
|
+
console.error("Error during remote monitor disposal", error);
|
|
442
|
+
} finally {
|
|
443
|
+
unregisterPipeline();
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
const handle = {
|
|
447
|
+
events: emitter,
|
|
448
|
+
get state() {
|
|
449
|
+
return lastState;
|
|
450
|
+
},
|
|
451
|
+
setConfig: (next) => {
|
|
452
|
+
try {
|
|
453
|
+
if (next.speaking) {
|
|
454
|
+
vad.updateConfig(next.speaking);
|
|
455
|
+
fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
|
|
456
|
+
}
|
|
457
|
+
} catch (error) {
|
|
458
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
459
|
+
emitter.emit("error", err);
|
|
460
|
+
}
|
|
461
|
+
},
|
|
462
|
+
dispose
|
|
463
|
+
};
|
|
464
|
+
return handle;
|
|
465
|
+
}
|
|
466
|
+
|
|
392
467
|
// src/livekit/integration.ts
|
|
393
468
|
async function attachSpeakingDetectionToTrack(track, options = {}) {
|
|
394
469
|
if (!track) {
|
|
@@ -460,7 +535,54 @@ async function attachSpeakingDetectionToTrack(track, options = {}) {
|
|
|
460
535
|
};
|
|
461
536
|
return controller;
|
|
462
537
|
}
|
|
538
|
+
async function attachSpeakingDetectionToRemoteTrack(track, options = {}) {
|
|
539
|
+
if (!track) {
|
|
540
|
+
throw new Error(
|
|
541
|
+
"attachSpeakingDetectionToRemoteTrack requires a valid RemoteAudioTrack"
|
|
542
|
+
);
|
|
543
|
+
}
|
|
544
|
+
const mediaTrack = track.mediaStreamTrack;
|
|
545
|
+
if (!mediaTrack || mediaTrack.readyState === "ended") {
|
|
546
|
+
throw new Error("RemoteAudioTrack has no live MediaStreamTrack to monitor");
|
|
547
|
+
}
|
|
548
|
+
const monitor = await createRemoteAudioMonitor(mediaTrack, options);
|
|
549
|
+
const listeners = /* @__PURE__ */ new Set();
|
|
550
|
+
let currentState = monitor.state;
|
|
551
|
+
const speakingHandler = (state) => {
|
|
552
|
+
currentState = state;
|
|
553
|
+
listeners.forEach((listener) => listener(state));
|
|
554
|
+
};
|
|
555
|
+
monitor.events.on("speakingChange", speakingHandler);
|
|
556
|
+
const errorHandler = (error) => {
|
|
557
|
+
console.error("Remote audio monitor error", error);
|
|
558
|
+
};
|
|
559
|
+
monitor.events.on("error", errorHandler);
|
|
560
|
+
const controller = {
|
|
561
|
+
get speaking() {
|
|
562
|
+
return currentState.speaking;
|
|
563
|
+
},
|
|
564
|
+
get levelDb() {
|
|
565
|
+
return currentState.levelDb;
|
|
566
|
+
},
|
|
567
|
+
onChange: (listener) => {
|
|
568
|
+
listeners.add(listener);
|
|
569
|
+
listener(currentState);
|
|
570
|
+
return () => listeners.delete(listener);
|
|
571
|
+
},
|
|
572
|
+
setConfig: (config) => {
|
|
573
|
+
monitor.setConfig(config);
|
|
574
|
+
},
|
|
575
|
+
dispose: () => {
|
|
576
|
+
monitor.events.off("speakingChange", speakingHandler);
|
|
577
|
+
monitor.events.off("error", errorHandler);
|
|
578
|
+
listeners.clear();
|
|
579
|
+
monitor.dispose();
|
|
580
|
+
}
|
|
581
|
+
};
|
|
582
|
+
return controller;
|
|
583
|
+
}
|
|
463
584
|
// Annotate the CommonJS export names for ESM import in node:
|
|
464
585
|
0 && (module.exports = {
|
|
586
|
+
attachSpeakingDetectionToRemoteTrack,
|
|
465
587
|
attachSpeakingDetectionToTrack
|
|
466
588
|
});
|
package/dist/index.mjs
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import "./chunk-WBQAMGXK.mjs";
|
|
2
2
|
import {
|
|
3
|
+
attachSpeakingDetectionToRemoteTrack,
|
|
3
4
|
attachSpeakingDetectionToTrack
|
|
4
|
-
} from "./chunk-
|
|
5
|
-
import "./chunk-
|
|
6
|
-
import "./chunk-OZ7KMC4S.mjs";
|
|
5
|
+
} from "./chunk-YQPL2O7D.mjs";
|
|
6
|
+
import "./chunk-BAUJY4Q2.mjs";
|
|
7
7
|
import "./chunk-IS37FHDN.mjs";
|
|
8
|
-
import "./chunk-
|
|
8
|
+
import "./chunk-7IKKNKM7.mjs";
|
|
9
9
|
import "./chunk-AQ5RVY33.mjs";
|
|
10
|
+
import "./chunk-OZ7KMC4S.mjs";
|
|
11
|
+
import "./chunk-QNQK6QFB.mjs";
|
|
10
12
|
export {
|
|
13
|
+
attachSpeakingDetectionToRemoteTrack,
|
|
11
14
|
attachSpeakingDetectionToTrack
|
|
12
15
|
};
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { LivekitSpeakingOptions, SpeakingController } from '../types.mjs';
|
|
2
|
-
import { LocalAudioTrack } from 'livekit-client';
|
|
1
|
+
import { LivekitSpeakingOptions, SpeakingController, RemoteSpeakingOptions } from '../types.mjs';
|
|
2
|
+
import { LocalAudioTrack, RemoteAudioTrack } from 'livekit-client';
|
|
3
3
|
import 'mitt';
|
|
4
4
|
|
|
5
5
|
declare function attachSpeakingDetectionToTrack(track: LocalAudioTrack, options?: LivekitSpeakingOptions): Promise<SpeakingController>;
|
|
6
|
+
declare function attachSpeakingDetectionToRemoteTrack(track: RemoteAudioTrack, options?: RemoteSpeakingOptions): Promise<SpeakingController>;
|
|
6
7
|
|
|
7
|
-
export { attachSpeakingDetectionToTrack };
|
|
8
|
+
export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack };
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { LivekitSpeakingOptions, SpeakingController } from '../types.js';
|
|
2
|
-
import { LocalAudioTrack } from 'livekit-client';
|
|
1
|
+
import { LivekitSpeakingOptions, SpeakingController, RemoteSpeakingOptions } from '../types.js';
|
|
2
|
+
import { LocalAudioTrack, RemoteAudioTrack } from 'livekit-client';
|
|
3
3
|
import 'mitt';
|
|
4
4
|
|
|
5
5
|
declare function attachSpeakingDetectionToTrack(track: LocalAudioTrack, options?: LivekitSpeakingOptions): Promise<SpeakingController>;
|
|
6
|
+
declare function attachSpeakingDetectionToRemoteTrack(track: RemoteAudioTrack, options?: RemoteSpeakingOptions): Promise<SpeakingController>;
|
|
6
7
|
|
|
7
|
-
export { attachSpeakingDetectionToTrack };
|
|
8
|
+
export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack };
|
|
@@ -30,10 +30,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
30
30
|
// src/livekit/integration.ts
|
|
31
31
|
var integration_exports = {};
|
|
32
32
|
__export(integration_exports, {
|
|
33
|
+
attachSpeakingDetectionToRemoteTrack: () => attachSpeakingDetectionToRemoteTrack,
|
|
33
34
|
attachSpeakingDetectionToTrack: () => attachSpeakingDetectionToTrack
|
|
34
35
|
});
|
|
35
36
|
module.exports = __toCommonJS(integration_exports);
|
|
36
|
-
var import_mitt2 = require("mitt");
|
|
37
37
|
|
|
38
38
|
// src/pipeline/audio-pipeline.ts
|
|
39
39
|
var import_mitt = __toESM(require("mitt"));
|
|
@@ -242,12 +242,12 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
242
242
|
const fullConfig = {
|
|
243
243
|
noiseSuppression: nsConfig,
|
|
244
244
|
speaking: {
|
|
245
|
-
minDb: config.speaking?.minDb ?? -
|
|
245
|
+
minDb: config.speaking?.minDb ?? -55,
|
|
246
246
|
maxDb: config.speaking?.maxDb ?? -20,
|
|
247
|
-
speakOnRatio: config.speaking?.speakOnRatio ?? 0.
|
|
247
|
+
speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
|
|
248
248
|
speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
|
|
249
|
-
hangoverMs: config.speaking?.hangoverMs ??
|
|
250
|
-
attackMs: config.speaking?.attackMs ??
|
|
249
|
+
hangoverMs: config.speaking?.hangoverMs ?? 500,
|
|
250
|
+
attackMs: config.speaking?.attackMs ?? 100,
|
|
251
251
|
releaseMs: config.speaking?.releaseMs ?? 120
|
|
252
252
|
},
|
|
253
253
|
output: {
|
|
@@ -387,6 +387,83 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
387
387
|
return handle;
|
|
388
388
|
}
|
|
389
389
|
|
|
390
|
+
// src/pipeline/remote-audio-monitor.ts
|
|
391
|
+
var import_mitt2 = __toESM(require("mitt"));
|
|
392
|
+
async function createRemoteAudioMonitor(sourceTrack, config = {}) {
|
|
393
|
+
const context = getAudioContext();
|
|
394
|
+
registerPipeline();
|
|
395
|
+
const fullConfig = {
|
|
396
|
+
speaking: {
|
|
397
|
+
minDb: config.speaking?.minDb ?? -55,
|
|
398
|
+
maxDb: config.speaking?.maxDb ?? -20,
|
|
399
|
+
speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
|
|
400
|
+
speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
|
|
401
|
+
hangoverMs: config.speaking?.hangoverMs ?? 500,
|
|
402
|
+
attackMs: config.speaking?.attackMs ?? 100,
|
|
403
|
+
releaseMs: config.speaking?.releaseMs ?? 120
|
|
404
|
+
}
|
|
405
|
+
};
|
|
406
|
+
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
407
|
+
throw new Error(
|
|
408
|
+
"createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
|
|
409
|
+
);
|
|
410
|
+
}
|
|
411
|
+
if (sourceTrack.readyState === "ended") {
|
|
412
|
+
throw new Error("Cannot create monitor from an ended MediaStreamTrack");
|
|
413
|
+
}
|
|
414
|
+
const sourceStream = new MediaStream([sourceTrack]);
|
|
415
|
+
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
416
|
+
const emitter = (0, import_mitt2.default)();
|
|
417
|
+
const vad = new LevelBasedVAD(fullConfig.speaking);
|
|
418
|
+
let lastState = { speaking: false, levelDb: -Infinity };
|
|
419
|
+
const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
|
|
420
|
+
try {
|
|
421
|
+
const timestamp = context.currentTime * 1e3;
|
|
422
|
+
const nextState = vad.process(levelDb, timestamp);
|
|
423
|
+
const speakingChanged = nextState.speaking !== lastState.speaking;
|
|
424
|
+
const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
|
|
425
|
+
if (speakingChanged || levelChanged) {
|
|
426
|
+
lastState = nextState;
|
|
427
|
+
emitter.emit("speakingChange", nextState);
|
|
428
|
+
}
|
|
429
|
+
} catch (error) {
|
|
430
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
431
|
+
emitter.emit("error", err);
|
|
432
|
+
}
|
|
433
|
+
});
|
|
434
|
+
sourceNode.connect(levelHandle.node);
|
|
435
|
+
function dispose() {
|
|
436
|
+
try {
|
|
437
|
+
sourceNode.disconnect();
|
|
438
|
+
levelHandle.node.disconnect();
|
|
439
|
+
levelHandle.dispose();
|
|
440
|
+
} catch (error) {
|
|
441
|
+
console.error("Error during remote monitor disposal", error);
|
|
442
|
+
} finally {
|
|
443
|
+
unregisterPipeline();
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
const handle = {
|
|
447
|
+
events: emitter,
|
|
448
|
+
get state() {
|
|
449
|
+
return lastState;
|
|
450
|
+
},
|
|
451
|
+
setConfig: (next) => {
|
|
452
|
+
try {
|
|
453
|
+
if (next.speaking) {
|
|
454
|
+
vad.updateConfig(next.speaking);
|
|
455
|
+
fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
|
|
456
|
+
}
|
|
457
|
+
} catch (error) {
|
|
458
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
459
|
+
emitter.emit("error", err);
|
|
460
|
+
}
|
|
461
|
+
},
|
|
462
|
+
dispose
|
|
463
|
+
};
|
|
464
|
+
return handle;
|
|
465
|
+
}
|
|
466
|
+
|
|
390
467
|
// src/livekit/integration.ts
|
|
391
468
|
async function attachSpeakingDetectionToTrack(track, options = {}) {
|
|
392
469
|
if (!track) {
|
|
@@ -458,7 +535,54 @@ async function attachSpeakingDetectionToTrack(track, options = {}) {
|
|
|
458
535
|
};
|
|
459
536
|
return controller;
|
|
460
537
|
}
|
|
538
|
+
async function attachSpeakingDetectionToRemoteTrack(track, options = {}) {
|
|
539
|
+
if (!track) {
|
|
540
|
+
throw new Error(
|
|
541
|
+
"attachSpeakingDetectionToRemoteTrack requires a valid RemoteAudioTrack"
|
|
542
|
+
);
|
|
543
|
+
}
|
|
544
|
+
const mediaTrack = track.mediaStreamTrack;
|
|
545
|
+
if (!mediaTrack || mediaTrack.readyState === "ended") {
|
|
546
|
+
throw new Error("RemoteAudioTrack has no live MediaStreamTrack to monitor");
|
|
547
|
+
}
|
|
548
|
+
const monitor = await createRemoteAudioMonitor(mediaTrack, options);
|
|
549
|
+
const listeners = /* @__PURE__ */ new Set();
|
|
550
|
+
let currentState = monitor.state;
|
|
551
|
+
const speakingHandler = (state) => {
|
|
552
|
+
currentState = state;
|
|
553
|
+
listeners.forEach((listener) => listener(state));
|
|
554
|
+
};
|
|
555
|
+
monitor.events.on("speakingChange", speakingHandler);
|
|
556
|
+
const errorHandler = (error) => {
|
|
557
|
+
console.error("Remote audio monitor error", error);
|
|
558
|
+
};
|
|
559
|
+
monitor.events.on("error", errorHandler);
|
|
560
|
+
const controller = {
|
|
561
|
+
get speaking() {
|
|
562
|
+
return currentState.speaking;
|
|
563
|
+
},
|
|
564
|
+
get levelDb() {
|
|
565
|
+
return currentState.levelDb;
|
|
566
|
+
},
|
|
567
|
+
onChange: (listener) => {
|
|
568
|
+
listeners.add(listener);
|
|
569
|
+
listener(currentState);
|
|
570
|
+
return () => listeners.delete(listener);
|
|
571
|
+
},
|
|
572
|
+
setConfig: (config) => {
|
|
573
|
+
monitor.setConfig(config);
|
|
574
|
+
},
|
|
575
|
+
dispose: () => {
|
|
576
|
+
monitor.events.off("speakingChange", speakingHandler);
|
|
577
|
+
monitor.events.off("error", errorHandler);
|
|
578
|
+
listeners.clear();
|
|
579
|
+
monitor.dispose();
|
|
580
|
+
}
|
|
581
|
+
};
|
|
582
|
+
return controller;
|
|
583
|
+
}
|
|
461
584
|
// Annotate the CommonJS export names for ESM import in node:
|
|
462
585
|
0 && (module.exports = {
|
|
586
|
+
attachSpeakingDetectionToRemoteTrack,
|
|
463
587
|
attachSpeakingDetectionToTrack
|
|
464
588
|
});
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import {
|
|
2
|
+
attachSpeakingDetectionToRemoteTrack,
|
|
2
3
|
attachSpeakingDetectionToTrack
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
5
|
-
import "../chunk-OZ7KMC4S.mjs";
|
|
4
|
+
} from "../chunk-YQPL2O7D.mjs";
|
|
5
|
+
import "../chunk-BAUJY4Q2.mjs";
|
|
6
6
|
import "../chunk-IS37FHDN.mjs";
|
|
7
|
-
import "../chunk-
|
|
7
|
+
import "../chunk-7IKKNKM7.mjs";
|
|
8
8
|
import "../chunk-AQ5RVY33.mjs";
|
|
9
|
+
import "../chunk-OZ7KMC4S.mjs";
|
|
10
|
+
import "../chunk-QNQK6QFB.mjs";
|
|
9
11
|
export {
|
|
12
|
+
attachSpeakingDetectionToRemoteTrack,
|
|
10
13
|
attachSpeakingDetectionToTrack
|
|
11
14
|
};
|
|
@@ -239,12 +239,12 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
239
239
|
const fullConfig = {
|
|
240
240
|
noiseSuppression: nsConfig,
|
|
241
241
|
speaking: {
|
|
242
|
-
minDb: config.speaking?.minDb ?? -
|
|
242
|
+
minDb: config.speaking?.minDb ?? -55,
|
|
243
243
|
maxDb: config.speaking?.maxDb ?? -20,
|
|
244
|
-
speakOnRatio: config.speaking?.speakOnRatio ?? 0.
|
|
244
|
+
speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
|
|
245
245
|
speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
|
|
246
|
-
hangoverMs: config.speaking?.hangoverMs ??
|
|
247
|
-
attackMs: config.speaking?.attackMs ??
|
|
246
|
+
hangoverMs: config.speaking?.hangoverMs ?? 500,
|
|
247
|
+
attackMs: config.speaking?.attackMs ?? 100,
|
|
248
248
|
releaseMs: config.speaking?.releaseMs ?? 120
|
|
249
249
|
},
|
|
250
250
|
output: {
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createAudioPipeline
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-OZ7KMC4S.mjs";
|
|
3
|
+
} from "../chunk-BAUJY4Q2.mjs";
|
|
5
4
|
import "../chunk-IS37FHDN.mjs";
|
|
6
|
-
import "../chunk-QNQK6QFB.mjs";
|
|
7
5
|
import "../chunk-AQ5RVY33.mjs";
|
|
6
|
+
import "../chunk-OZ7KMC4S.mjs";
|
|
7
|
+
import "../chunk-QNQK6QFB.mjs";
|
|
8
8
|
export {
|
|
9
9
|
createAudioPipeline
|
|
10
10
|
};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { Emitter } from 'mitt';
|
|
2
|
+
import { SpeakingEvents, SpeakingState, RemoteSpeakingOptions } from '../types.mjs';
|
|
3
|
+
|
|
4
|
+
interface RemoteAudioMonitorHandle {
|
|
5
|
+
readonly events: Emitter<SpeakingEvents>;
|
|
6
|
+
readonly state: SpeakingState;
|
|
7
|
+
setConfig(config: Partial<RemoteSpeakingOptions>): void;
|
|
8
|
+
dispose(): void;
|
|
9
|
+
}
|
|
10
|
+
declare function createRemoteAudioMonitor(sourceTrack: MediaStreamTrack, config?: RemoteSpeakingOptions): Promise<RemoteAudioMonitorHandle>;
|
|
11
|
+
|
|
12
|
+
export { type RemoteAudioMonitorHandle, createRemoteAudioMonitor };
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { Emitter } from 'mitt';
|
|
2
|
+
import { SpeakingEvents, SpeakingState, RemoteSpeakingOptions } from '../types.js';
|
|
3
|
+
|
|
4
|
+
interface RemoteAudioMonitorHandle {
|
|
5
|
+
readonly events: Emitter<SpeakingEvents>;
|
|
6
|
+
readonly state: SpeakingState;
|
|
7
|
+
setConfig(config: Partial<RemoteSpeakingOptions>): void;
|
|
8
|
+
dispose(): void;
|
|
9
|
+
}
|
|
10
|
+
declare function createRemoteAudioMonitor(sourceTrack: MediaStreamTrack, config?: RemoteSpeakingOptions): Promise<RemoteAudioMonitorHandle>;
|
|
11
|
+
|
|
12
|
+
export { type RemoteAudioMonitorHandle, createRemoteAudioMonitor };
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
|
|
30
|
+
// src/pipeline/remote-audio-monitor.ts
|
|
31
|
+
var remote_audio_monitor_exports = {};
|
|
32
|
+
__export(remote_audio_monitor_exports, {
|
|
33
|
+
createRemoteAudioMonitor: () => createRemoteAudioMonitor
|
|
34
|
+
});
|
|
35
|
+
module.exports = __toCommonJS(remote_audio_monitor_exports);
|
|
36
|
+
var import_mitt = __toESM(require("mitt"));
|
|
37
|
+
|
|
38
|
+
// src/context/audio-context.ts
|
|
39
|
+
var sharedContext = null;
|
|
40
|
+
var activePipelines = 0;
|
|
41
|
+
function getAudioContext(options) {
|
|
42
|
+
if (typeof window === "undefined" || typeof AudioContext === "undefined") {
|
|
43
|
+
throw new Error(
|
|
44
|
+
"AudioContext is not supported in this environment (browser only)."
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
if (!sharedContext || sharedContext.state === "closed") {
|
|
48
|
+
sharedContext = new AudioContext(options);
|
|
49
|
+
}
|
|
50
|
+
return sharedContext;
|
|
51
|
+
}
|
|
52
|
+
function registerPipeline() {
|
|
53
|
+
activePipelines++;
|
|
54
|
+
}
|
|
55
|
+
function unregisterPipeline() {
|
|
56
|
+
activePipelines = Math.max(0, activePipelines - 1);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// src/vad/vad-node.ts
|
|
60
|
+
function createLevelDetectorWorkletCode(smoothing) {
|
|
61
|
+
return `
|
|
62
|
+
class LevelDetectorProcessor extends AudioWorkletProcessor {
|
|
63
|
+
constructor() {
|
|
64
|
+
super();
|
|
65
|
+
this.smoothed = 0;
|
|
66
|
+
this.smoothing = ${smoothing};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
process(inputs) {
|
|
70
|
+
const input = inputs[0];
|
|
71
|
+
if (!input || input.length === 0) return true;
|
|
72
|
+
const channel = input[0];
|
|
73
|
+
if (!channel || channel.length === 0) return true;
|
|
74
|
+
|
|
75
|
+
let sum = 0;
|
|
76
|
+
for (let i = 0; i < channel.length; i++) {
|
|
77
|
+
const sample = channel[i];
|
|
78
|
+
sum += sample * sample;
|
|
79
|
+
}
|
|
80
|
+
const rms = Math.sqrt(sum / channel.length);
|
|
81
|
+
this.smoothed = this.smoothed * this.smoothing + rms * (1 - this.smoothing);
|
|
82
|
+
const levelDb = 20 * Math.log10(Math.max(1e-8, this.smoothed));
|
|
83
|
+
this.port.postMessage({ levelDb });
|
|
84
|
+
return true;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
registerProcessor('level-detector-processor', LevelDetectorProcessor);
|
|
89
|
+
`;
|
|
90
|
+
}
|
|
91
|
+
async function createLevelDetectorNode(context, onLevel, options) {
|
|
92
|
+
const smoothing = options?.smoothing ?? 0.9;
|
|
93
|
+
const workletCode = createLevelDetectorWorkletCode(smoothing);
|
|
94
|
+
const blob = new Blob([workletCode], { type: "application/javascript" });
|
|
95
|
+
const url = URL.createObjectURL(blob);
|
|
96
|
+
try {
|
|
97
|
+
await context.audioWorklet.addModule(url);
|
|
98
|
+
} finally {
|
|
99
|
+
URL.revokeObjectURL(url);
|
|
100
|
+
}
|
|
101
|
+
const node = new AudioWorkletNode(context, "level-detector-processor", {
|
|
102
|
+
numberOfInputs: 1,
|
|
103
|
+
numberOfOutputs: 0
|
|
104
|
+
});
|
|
105
|
+
node.port.onmessage = (event) => {
|
|
106
|
+
const { levelDb } = event.data ?? {};
|
|
107
|
+
if (typeof levelDb === "number" && !Number.isNaN(levelDb)) {
|
|
108
|
+
onLevel(levelDb);
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
node.port.onmessageerror = (event) => {
|
|
112
|
+
console.error("Level detector port error", event);
|
|
113
|
+
};
|
|
114
|
+
return {
|
|
115
|
+
node,
|
|
116
|
+
dispose: () => {
|
|
117
|
+
try {
|
|
118
|
+
node.port.onmessage = null;
|
|
119
|
+
node.port.close();
|
|
120
|
+
} catch (error) {
|
|
121
|
+
console.error("Failed to dispose level detector node", error);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// src/vad/vad-state.ts
|
|
128
|
+
var LevelBasedVAD = class {
|
|
129
|
+
config;
|
|
130
|
+
speaking = false;
|
|
131
|
+
pendingSpeechSince = null;
|
|
132
|
+
pendingSilenceSince = null;
|
|
133
|
+
constructor(config) {
|
|
134
|
+
this.config = {
|
|
135
|
+
minDb: config.minDb,
|
|
136
|
+
maxDb: config.maxDb,
|
|
137
|
+
speakOnRatio: config.speakOnRatio ?? 0.6,
|
|
138
|
+
speakOffRatio: config.speakOffRatio ?? 0.3,
|
|
139
|
+
hangoverMs: config.hangoverMs ?? 350,
|
|
140
|
+
attackMs: config.attackMs ?? 50,
|
|
141
|
+
releaseMs: config.releaseMs ?? 120
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
updateConfig(config) {
|
|
145
|
+
this.config = {
|
|
146
|
+
...this.config,
|
|
147
|
+
...config,
|
|
148
|
+
speakOnRatio: config.speakOnRatio ?? this.config.speakOnRatio,
|
|
149
|
+
speakOffRatio: config.speakOffRatio ?? this.config.speakOffRatio,
|
|
150
|
+
hangoverMs: config.hangoverMs ?? this.config.hangoverMs,
|
|
151
|
+
attackMs: config.attackMs ?? this.config.attackMs,
|
|
152
|
+
releaseMs: config.releaseMs ?? this.config.releaseMs
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
process(levelDb, timestampMs) {
|
|
156
|
+
const {
|
|
157
|
+
minDb,
|
|
158
|
+
maxDb,
|
|
159
|
+
speakOnRatio,
|
|
160
|
+
speakOffRatio,
|
|
161
|
+
hangoverMs,
|
|
162
|
+
attackMs,
|
|
163
|
+
releaseMs
|
|
164
|
+
} = this.config;
|
|
165
|
+
const clamped = Math.min(maxDb, Math.max(minDb, levelDb));
|
|
166
|
+
const norm = (clamped - minDb) / Math.max(1, maxDb - minDb);
|
|
167
|
+
if (!this.speaking) {
|
|
168
|
+
if (norm >= speakOnRatio) {
|
|
169
|
+
this.pendingSpeechSince = this.pendingSpeechSince ?? timestampMs;
|
|
170
|
+
if (timestampMs - this.pendingSpeechSince >= attackMs) {
|
|
171
|
+
this.speaking = true;
|
|
172
|
+
this.pendingSpeechSince = null;
|
|
173
|
+
this.pendingSilenceSince = null;
|
|
174
|
+
}
|
|
175
|
+
} else {
|
|
176
|
+
this.pendingSpeechSince = null;
|
|
177
|
+
}
|
|
178
|
+
} else {
|
|
179
|
+
if (norm <= speakOffRatio) {
|
|
180
|
+
this.pendingSilenceSince = this.pendingSilenceSince ?? timestampMs;
|
|
181
|
+
const releaseWindow = Math.max(releaseMs, hangoverMs);
|
|
182
|
+
if (timestampMs - this.pendingSilenceSince >= releaseWindow) {
|
|
183
|
+
this.speaking = false;
|
|
184
|
+
this.pendingSilenceSince = null;
|
|
185
|
+
this.pendingSpeechSince = null;
|
|
186
|
+
}
|
|
187
|
+
} else {
|
|
188
|
+
this.pendingSilenceSince = null;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return {
|
|
192
|
+
speaking: this.speaking,
|
|
193
|
+
levelDb: clamped
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
// src/pipeline/remote-audio-monitor.ts
|
|
199
|
+
async function createRemoteAudioMonitor(sourceTrack, config = {}) {
|
|
200
|
+
const context = getAudioContext();
|
|
201
|
+
registerPipeline();
|
|
202
|
+
const fullConfig = {
|
|
203
|
+
speaking: {
|
|
204
|
+
minDb: config.speaking?.minDb ?? -55,
|
|
205
|
+
maxDb: config.speaking?.maxDb ?? -20,
|
|
206
|
+
speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
|
|
207
|
+
speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
|
|
208
|
+
hangoverMs: config.speaking?.hangoverMs ?? 500,
|
|
209
|
+
attackMs: config.speaking?.attackMs ?? 100,
|
|
210
|
+
releaseMs: config.speaking?.releaseMs ?? 120
|
|
211
|
+
}
|
|
212
|
+
};
|
|
213
|
+
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
214
|
+
throw new Error(
|
|
215
|
+
"createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
if (sourceTrack.readyState === "ended") {
|
|
219
|
+
throw new Error("Cannot create monitor from an ended MediaStreamTrack");
|
|
220
|
+
}
|
|
221
|
+
const sourceStream = new MediaStream([sourceTrack]);
|
|
222
|
+
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
223
|
+
const emitter = (0, import_mitt.default)();
|
|
224
|
+
const vad = new LevelBasedVAD(fullConfig.speaking);
|
|
225
|
+
let lastState = { speaking: false, levelDb: -Infinity };
|
|
226
|
+
const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
|
|
227
|
+
try {
|
|
228
|
+
const timestamp = context.currentTime * 1e3;
|
|
229
|
+
const nextState = vad.process(levelDb, timestamp);
|
|
230
|
+
const speakingChanged = nextState.speaking !== lastState.speaking;
|
|
231
|
+
const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
|
|
232
|
+
if (speakingChanged || levelChanged) {
|
|
233
|
+
lastState = nextState;
|
|
234
|
+
emitter.emit("speakingChange", nextState);
|
|
235
|
+
}
|
|
236
|
+
} catch (error) {
|
|
237
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
238
|
+
emitter.emit("error", err);
|
|
239
|
+
}
|
|
240
|
+
});
|
|
241
|
+
sourceNode.connect(levelHandle.node);
|
|
242
|
+
function dispose() {
|
|
243
|
+
try {
|
|
244
|
+
sourceNode.disconnect();
|
|
245
|
+
levelHandle.node.disconnect();
|
|
246
|
+
levelHandle.dispose();
|
|
247
|
+
} catch (error) {
|
|
248
|
+
console.error("Error during remote monitor disposal", error);
|
|
249
|
+
} finally {
|
|
250
|
+
unregisterPipeline();
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
const handle = {
|
|
254
|
+
events: emitter,
|
|
255
|
+
get state() {
|
|
256
|
+
return lastState;
|
|
257
|
+
},
|
|
258
|
+
setConfig: (next) => {
|
|
259
|
+
try {
|
|
260
|
+
if (next.speaking) {
|
|
261
|
+
vad.updateConfig(next.speaking);
|
|
262
|
+
fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
|
|
263
|
+
}
|
|
264
|
+
} catch (error) {
|
|
265
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
266
|
+
emitter.emit("error", err);
|
|
267
|
+
}
|
|
268
|
+
},
|
|
269
|
+
dispose
|
|
270
|
+
};
|
|
271
|
+
return handle;
|
|
272
|
+
}
|
|
273
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
274
|
+
0 && (module.exports = {
|
|
275
|
+
createRemoteAudioMonitor
|
|
276
|
+
});
|
package/dist/types.d.mts
CHANGED
|
@@ -29,6 +29,9 @@ interface LivekitSpeakingOptions {
|
|
|
29
29
|
output?: OutputGainConfig;
|
|
30
30
|
muteWhenSilent?: boolean;
|
|
31
31
|
}
|
|
32
|
+
interface RemoteSpeakingOptions {
|
|
33
|
+
speaking?: SpeakingDetectionConfig;
|
|
34
|
+
}
|
|
32
35
|
interface SpeakingState {
|
|
33
36
|
speaking: boolean;
|
|
34
37
|
levelDb: number;
|
|
@@ -52,4 +55,4 @@ interface SpeakingController {
|
|
|
52
55
|
dispose(): void;
|
|
53
56
|
}
|
|
54
57
|
|
|
55
|
-
export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
|
|
58
|
+
export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
|
package/dist/types.d.ts
CHANGED
|
@@ -29,6 +29,9 @@ interface LivekitSpeakingOptions {
|
|
|
29
29
|
output?: OutputGainConfig;
|
|
30
30
|
muteWhenSilent?: boolean;
|
|
31
31
|
}
|
|
32
|
+
interface RemoteSpeakingOptions {
|
|
33
|
+
speaking?: SpeakingDetectionConfig;
|
|
34
|
+
}
|
|
32
35
|
interface SpeakingState {
|
|
33
36
|
speaking: boolean;
|
|
34
37
|
levelDb: number;
|
|
@@ -52,4 +55,4 @@ interface SpeakingController {
|
|
|
52
55
|
dispose(): void;
|
|
53
56
|
}
|
|
54
57
|
|
|
55
|
-
export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
|
|
58
|
+
export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
|