@tensamin/audio 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/{chunk-GFLVGUTU.mjs → chunk-DYY2MXMU.mjs} +14 -6
- package/dist/{chunk-B36JBXOK.mjs → chunk-KEWK2OKV.mjs} +4 -4
- package/dist/{chunk-RLZVZ6D6.mjs → chunk-Q2I22TJG.mjs} +1 -1
- package/dist/{chunk-I5AR7XQD.mjs → chunk-SMZJFNRU.mjs} +2 -2
- package/dist/{chunk-3I4OQD2L.mjs → chunk-XZSFQJW4.mjs} +1 -1
- package/dist/extensibility/plugins.js +4 -4
- package/dist/extensibility/plugins.mjs +2 -2
- package/dist/index.js +18 -10
- package/dist/index.mjs +5 -5
- package/dist/livekit/integration.js +18 -10
- package/dist/livekit/integration.mjs +5 -5
- package/dist/pipeline/audio-pipeline.js +18 -10
- package/dist/pipeline/audio-pipeline.mjs +4 -4
- package/dist/types.d.mts +2 -2
- package/dist/types.d.ts +2 -2
- package/dist/vad/vad-node.js +4 -4
- package/dist/vad/vad-node.mjs +1 -1
- package/dist/vad/vad-state.js +14 -6
- package/dist/vad/vad-state.mjs +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -101,8 +101,8 @@ vad: {
|
|
|
101
101
|
energyVad?: {
|
|
102
102
|
smoothing: number; // Default: 0.95
|
|
103
103
|
initialNoiseFloor: number; // Default: 0.001
|
|
104
|
-
noiseFloorAdaptRateQuiet: number; // Default: 0.
|
|
105
|
-
noiseFloorAdaptRateLoud: number; // Default: 0.
|
|
104
|
+
noiseFloorAdaptRateQuiet: number; // Default: 0.01
|
|
105
|
+
noiseFloorAdaptRateLoud: number; // Default: 0.05
|
|
106
106
|
minSNR: number; // Default: 10.0 (dB)
|
|
107
107
|
snrRange: number; // Default: 10.0 (dB)
|
|
108
108
|
minEnergy: number; // Default: 0.001
|
|
@@ -61,25 +61,33 @@ var VADStateMachine = class {
|
|
|
61
61
|
newState = "silent";
|
|
62
62
|
this.lastSilenceTime = timestamp;
|
|
63
63
|
}
|
|
64
|
-
} else if (this.currentState === "speech_starting"
|
|
64
|
+
} else if (this.currentState === "speech_starting") {
|
|
65
|
+
if (probability >= stopThreshold) {
|
|
66
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
67
|
+
if (speechDuration >= minSpeechDurationMs) {
|
|
68
|
+
newState = "speaking";
|
|
69
|
+
} else {
|
|
70
|
+
newState = "speech_starting";
|
|
71
|
+
}
|
|
72
|
+
this.lastSpeechTime = timestamp;
|
|
73
|
+
} else {
|
|
74
|
+
newState = "silent";
|
|
75
|
+
this.lastSilenceTime = timestamp;
|
|
76
|
+
}
|
|
77
|
+
} else if (this.currentState === "speaking") {
|
|
65
78
|
if (probability >= stopThreshold) {
|
|
66
79
|
newState = "speaking";
|
|
67
80
|
this.lastSpeechTime = timestamp;
|
|
68
81
|
} else {
|
|
69
82
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
70
|
-
const speechDuration = timestamp - this.speechStartTime;
|
|
71
83
|
if (timeSinceSpeech < hangoverMs) {
|
|
72
84
|
newState = "speaking";
|
|
73
|
-
} else if (speechDuration < minSpeechDurationMs) {
|
|
74
|
-
newState = "silent";
|
|
75
|
-
this.lastSilenceTime = timestamp;
|
|
76
85
|
} else {
|
|
77
86
|
newState = "speech_ending";
|
|
78
87
|
this.lastSilenceTime = timestamp;
|
|
79
88
|
}
|
|
80
89
|
}
|
|
81
90
|
}
|
|
82
|
-
if (newState === "speech_starting") newState = "speaking";
|
|
83
91
|
if (newState === "speech_ending") newState = "silent";
|
|
84
92
|
this.currentState = newState;
|
|
85
93
|
return {
|
|
@@ -3,8 +3,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
3
3
|
const energyParams = vadConfig?.energyVad || {};
|
|
4
4
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
5
5
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
6
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
7
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
6
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
7
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
8
8
|
const minSNR = energyParams.minSNR ?? 10;
|
|
9
9
|
const snrRange = energyParams.snrRange ?? 10;
|
|
10
10
|
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
@@ -61,9 +61,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
61
61
|
|
|
62
62
|
let multiplier = 1.0;
|
|
63
63
|
if (this.isSpeaking) {
|
|
64
|
-
multiplier = 0.
|
|
64
|
+
multiplier = 0.05;
|
|
65
65
|
} else if (snrDb > 20) {
|
|
66
|
-
multiplier = 0.
|
|
66
|
+
multiplier = 0.2;
|
|
67
67
|
}
|
|
68
68
|
|
|
69
69
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
VADStateMachine
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-DYY2MXMU.mjs";
|
|
4
4
|
import {
|
|
5
5
|
getAudioContext,
|
|
6
6
|
registerPipeline,
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
import {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-XZSFQJW4.mjs";
|
|
13
13
|
|
|
14
14
|
// src/pipeline/audio-pipeline.ts
|
|
15
15
|
import mitt from "mitt";
|
|
@@ -106,8 +106,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
106
106
|
const energyParams = vadConfig?.energyVad || {};
|
|
107
107
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
108
108
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
109
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
110
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
109
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
110
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
111
111
|
const minSNR = energyParams.minSNR ?? 10;
|
|
112
112
|
const snrRange = energyParams.snrRange ?? 10;
|
|
113
113
|
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
@@ -164,9 +164,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
164
164
|
|
|
165
165
|
let multiplier = 1.0;
|
|
166
166
|
if (this.isSpeaking) {
|
|
167
|
-
multiplier = 0.
|
|
167
|
+
multiplier = 0.05;
|
|
168
168
|
} else if (snrDb > 20) {
|
|
169
|
-
multiplier = 0.
|
|
169
|
+
multiplier = 0.2;
|
|
170
170
|
}
|
|
171
171
|
|
|
172
172
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
@@ -3,9 +3,9 @@ import {
|
|
|
3
3
|
getVADPlugin,
|
|
4
4
|
registerNoiseSuppressionPlugin,
|
|
5
5
|
registerVADPlugin
|
|
6
|
-
} from "../chunk-
|
|
6
|
+
} from "../chunk-XZSFQJW4.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-KEWK2OKV.mjs";
|
|
9
9
|
export {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin,
|
package/dist/index.js
CHANGED
|
@@ -158,8 +158,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
158
158
|
const energyParams = vadConfig?.energyVad || {};
|
|
159
159
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
160
160
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
161
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
162
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
161
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
162
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
163
163
|
const minSNR = energyParams.minSNR ?? 10;
|
|
164
164
|
const snrRange = energyParams.snrRange ?? 10;
|
|
165
165
|
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
@@ -216,9 +216,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
216
216
|
|
|
217
217
|
let multiplier = 1.0;
|
|
218
218
|
if (this.isSpeaking) {
|
|
219
|
-
multiplier = 0.
|
|
219
|
+
multiplier = 0.05;
|
|
220
220
|
} else if (snrDb > 20) {
|
|
221
|
-
multiplier = 0.
|
|
221
|
+
multiplier = 0.2;
|
|
222
222
|
}
|
|
223
223
|
|
|
224
224
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
@@ -412,25 +412,33 @@ var VADStateMachine = class {
|
|
|
412
412
|
newState = "silent";
|
|
413
413
|
this.lastSilenceTime = timestamp;
|
|
414
414
|
}
|
|
415
|
-
} else if (this.currentState === "speech_starting"
|
|
415
|
+
} else if (this.currentState === "speech_starting") {
|
|
416
|
+
if (probability >= stopThreshold) {
|
|
417
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
418
|
+
if (speechDuration >= minSpeechDurationMs) {
|
|
419
|
+
newState = "speaking";
|
|
420
|
+
} else {
|
|
421
|
+
newState = "speech_starting";
|
|
422
|
+
}
|
|
423
|
+
this.lastSpeechTime = timestamp;
|
|
424
|
+
} else {
|
|
425
|
+
newState = "silent";
|
|
426
|
+
this.lastSilenceTime = timestamp;
|
|
427
|
+
}
|
|
428
|
+
} else if (this.currentState === "speaking") {
|
|
416
429
|
if (probability >= stopThreshold) {
|
|
417
430
|
newState = "speaking";
|
|
418
431
|
this.lastSpeechTime = timestamp;
|
|
419
432
|
} else {
|
|
420
433
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
421
|
-
const speechDuration = timestamp - this.speechStartTime;
|
|
422
434
|
if (timeSinceSpeech < hangoverMs) {
|
|
423
435
|
newState = "speaking";
|
|
424
|
-
} else if (speechDuration < minSpeechDurationMs) {
|
|
425
|
-
newState = "silent";
|
|
426
|
-
this.lastSilenceTime = timestamp;
|
|
427
436
|
} else {
|
|
428
437
|
newState = "speech_ending";
|
|
429
438
|
this.lastSilenceTime = timestamp;
|
|
430
439
|
}
|
|
431
440
|
}
|
|
432
441
|
}
|
|
433
|
-
if (newState === "speech_starting") newState = "speaking";
|
|
434
442
|
if (newState === "speech_ending") newState = "silent";
|
|
435
443
|
this.currentState = newState;
|
|
436
444
|
return {
|
package/dist/index.mjs
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import "./chunk-WBQAMGXK.mjs";
|
|
2
2
|
import {
|
|
3
3
|
attachProcessingToTrack
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-Q2I22TJG.mjs";
|
|
5
5
|
import {
|
|
6
6
|
createAudioPipeline
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-SMZJFNRU.mjs";
|
|
8
8
|
import {
|
|
9
9
|
VADStateMachine
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-DYY2MXMU.mjs";
|
|
11
11
|
import {
|
|
12
12
|
closeAudioContext,
|
|
13
13
|
getAudioContext,
|
|
@@ -21,13 +21,13 @@ import {
|
|
|
21
21
|
getVADPlugin,
|
|
22
22
|
registerNoiseSuppressionPlugin,
|
|
23
23
|
registerVADPlugin
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-XZSFQJW4.mjs";
|
|
25
25
|
import {
|
|
26
26
|
RNNoisePlugin
|
|
27
27
|
} from "./chunk-XO6B3D4A.mjs";
|
|
28
28
|
import {
|
|
29
29
|
EnergyVADPlugin
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-KEWK2OKV.mjs";
|
|
31
31
|
export {
|
|
32
32
|
EnergyVADPlugin,
|
|
33
33
|
RNNoisePlugin,
|
|
@@ -127,8 +127,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
127
127
|
const energyParams = vadConfig?.energyVad || {};
|
|
128
128
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
129
129
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
130
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
131
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
130
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
131
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
132
132
|
const minSNR = energyParams.minSNR ?? 10;
|
|
133
133
|
const snrRange = energyParams.snrRange ?? 10;
|
|
134
134
|
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
@@ -185,9 +185,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
185
185
|
|
|
186
186
|
let multiplier = 1.0;
|
|
187
187
|
if (this.isSpeaking) {
|
|
188
|
-
multiplier = 0.
|
|
188
|
+
multiplier = 0.05;
|
|
189
189
|
} else if (snrDb > 20) {
|
|
190
|
-
multiplier = 0.
|
|
190
|
+
multiplier = 0.2;
|
|
191
191
|
}
|
|
192
192
|
|
|
193
193
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
@@ -375,25 +375,33 @@ var VADStateMachine = class {
|
|
|
375
375
|
newState = "silent";
|
|
376
376
|
this.lastSilenceTime = timestamp;
|
|
377
377
|
}
|
|
378
|
-
} else if (this.currentState === "speech_starting"
|
|
378
|
+
} else if (this.currentState === "speech_starting") {
|
|
379
|
+
if (probability >= stopThreshold) {
|
|
380
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
381
|
+
if (speechDuration >= minSpeechDurationMs) {
|
|
382
|
+
newState = "speaking";
|
|
383
|
+
} else {
|
|
384
|
+
newState = "speech_starting";
|
|
385
|
+
}
|
|
386
|
+
this.lastSpeechTime = timestamp;
|
|
387
|
+
} else {
|
|
388
|
+
newState = "silent";
|
|
389
|
+
this.lastSilenceTime = timestamp;
|
|
390
|
+
}
|
|
391
|
+
} else if (this.currentState === "speaking") {
|
|
379
392
|
if (probability >= stopThreshold) {
|
|
380
393
|
newState = "speaking";
|
|
381
394
|
this.lastSpeechTime = timestamp;
|
|
382
395
|
} else {
|
|
383
396
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
384
|
-
const speechDuration = timestamp - this.speechStartTime;
|
|
385
397
|
if (timeSinceSpeech < hangoverMs) {
|
|
386
398
|
newState = "speaking";
|
|
387
|
-
} else if (speechDuration < minSpeechDurationMs) {
|
|
388
|
-
newState = "silent";
|
|
389
|
-
this.lastSilenceTime = timestamp;
|
|
390
399
|
} else {
|
|
391
400
|
newState = "speech_ending";
|
|
392
401
|
this.lastSilenceTime = timestamp;
|
|
393
402
|
}
|
|
394
403
|
}
|
|
395
404
|
}
|
|
396
|
-
if (newState === "speech_starting") newState = "speaking";
|
|
397
405
|
if (newState === "speech_ending") newState = "silent";
|
|
398
406
|
this.currentState = newState;
|
|
399
407
|
return {
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import {
|
|
2
2
|
attachProcessingToTrack
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
5
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-Q2I22TJG.mjs";
|
|
4
|
+
import "../chunk-SMZJFNRU.mjs";
|
|
5
|
+
import "../chunk-DYY2MXMU.mjs";
|
|
6
6
|
import "../chunk-OZ7KMC4S.mjs";
|
|
7
|
-
import "../chunk-
|
|
7
|
+
import "../chunk-XZSFQJW4.mjs";
|
|
8
8
|
import "../chunk-XO6B3D4A.mjs";
|
|
9
|
-
import "../chunk-
|
|
9
|
+
import "../chunk-KEWK2OKV.mjs";
|
|
10
10
|
export {
|
|
11
11
|
attachProcessingToTrack
|
|
12
12
|
};
|
|
@@ -125,8 +125,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
125
125
|
const energyParams = vadConfig?.energyVad || {};
|
|
126
126
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
127
127
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
128
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
129
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
128
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
129
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
130
130
|
const minSNR = energyParams.minSNR ?? 10;
|
|
131
131
|
const snrRange = energyParams.snrRange ?? 10;
|
|
132
132
|
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
@@ -183,9 +183,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
183
183
|
|
|
184
184
|
let multiplier = 1.0;
|
|
185
185
|
if (this.isSpeaking) {
|
|
186
|
-
multiplier = 0.
|
|
186
|
+
multiplier = 0.05;
|
|
187
187
|
} else if (snrDb > 20) {
|
|
188
|
-
multiplier = 0.
|
|
188
|
+
multiplier = 0.2;
|
|
189
189
|
}
|
|
190
190
|
|
|
191
191
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
@@ -373,25 +373,33 @@ var VADStateMachine = class {
|
|
|
373
373
|
newState = "silent";
|
|
374
374
|
this.lastSilenceTime = timestamp;
|
|
375
375
|
}
|
|
376
|
-
} else if (this.currentState === "speech_starting"
|
|
376
|
+
} else if (this.currentState === "speech_starting") {
|
|
377
|
+
if (probability >= stopThreshold) {
|
|
378
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
379
|
+
if (speechDuration >= minSpeechDurationMs) {
|
|
380
|
+
newState = "speaking";
|
|
381
|
+
} else {
|
|
382
|
+
newState = "speech_starting";
|
|
383
|
+
}
|
|
384
|
+
this.lastSpeechTime = timestamp;
|
|
385
|
+
} else {
|
|
386
|
+
newState = "silent";
|
|
387
|
+
this.lastSilenceTime = timestamp;
|
|
388
|
+
}
|
|
389
|
+
} else if (this.currentState === "speaking") {
|
|
377
390
|
if (probability >= stopThreshold) {
|
|
378
391
|
newState = "speaking";
|
|
379
392
|
this.lastSpeechTime = timestamp;
|
|
380
393
|
} else {
|
|
381
394
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
382
|
-
const speechDuration = timestamp - this.speechStartTime;
|
|
383
395
|
if (timeSinceSpeech < hangoverMs) {
|
|
384
396
|
newState = "speaking";
|
|
385
|
-
} else if (speechDuration < minSpeechDurationMs) {
|
|
386
|
-
newState = "silent";
|
|
387
|
-
this.lastSilenceTime = timestamp;
|
|
388
397
|
} else {
|
|
389
398
|
newState = "speech_ending";
|
|
390
399
|
this.lastSilenceTime = timestamp;
|
|
391
400
|
}
|
|
392
401
|
}
|
|
393
402
|
}
|
|
394
|
-
if (newState === "speech_starting") newState = "speaking";
|
|
395
403
|
if (newState === "speech_ending") newState = "silent";
|
|
396
404
|
this.currentState = newState;
|
|
397
405
|
return {
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createAudioPipeline
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-SMZJFNRU.mjs";
|
|
4
|
+
import "../chunk-DYY2MXMU.mjs";
|
|
5
5
|
import "../chunk-OZ7KMC4S.mjs";
|
|
6
|
-
import "../chunk-
|
|
6
|
+
import "../chunk-XZSFQJW4.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-KEWK2OKV.mjs";
|
|
9
9
|
export {
|
|
10
10
|
createAudioPipeline
|
|
11
11
|
};
|
package/dist/types.d.mts
CHANGED
|
@@ -97,12 +97,12 @@ interface AudioProcessingConfig {
|
|
|
97
97
|
initialNoiseFloor?: number;
|
|
98
98
|
/**
|
|
99
99
|
* Rate at which noise floor adapts to quiet signals (0-1).
|
|
100
|
-
* Default: 0.
|
|
100
|
+
* Default: 0.01
|
|
101
101
|
*/
|
|
102
102
|
noiseFloorAdaptRateQuiet?: number;
|
|
103
103
|
/**
|
|
104
104
|
* Rate at which noise floor adapts to loud signals (0-1).
|
|
105
|
-
* Default: 0.
|
|
105
|
+
* Default: 0.05 (faster tracking of rising noise)
|
|
106
106
|
*/
|
|
107
107
|
noiseFloorAdaptRateLoud?: number;
|
|
108
108
|
/**
|
package/dist/types.d.ts
CHANGED
|
@@ -97,12 +97,12 @@ interface AudioProcessingConfig {
|
|
|
97
97
|
initialNoiseFloor?: number;
|
|
98
98
|
/**
|
|
99
99
|
* Rate at which noise floor adapts to quiet signals (0-1).
|
|
100
|
-
* Default: 0.
|
|
100
|
+
* Default: 0.01
|
|
101
101
|
*/
|
|
102
102
|
noiseFloorAdaptRateQuiet?: number;
|
|
103
103
|
/**
|
|
104
104
|
* Rate at which noise floor adapts to loud signals (0-1).
|
|
105
|
-
* Default: 0.
|
|
105
|
+
* Default: 0.05 (faster tracking of rising noise)
|
|
106
106
|
*/
|
|
107
107
|
noiseFloorAdaptRateLoud?: number;
|
|
108
108
|
/**
|
package/dist/vad/vad-node.js
CHANGED
|
@@ -27,8 +27,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
27
27
|
const energyParams = vadConfig?.energyVad || {};
|
|
28
28
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
29
29
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
30
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
31
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
30
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
31
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
32
32
|
const minSNR = energyParams.minSNR ?? 10;
|
|
33
33
|
const snrRange = energyParams.snrRange ?? 10;
|
|
34
34
|
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
@@ -85,9 +85,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
85
85
|
|
|
86
86
|
let multiplier = 1.0;
|
|
87
87
|
if (this.isSpeaking) {
|
|
88
|
-
multiplier = 0.
|
|
88
|
+
multiplier = 0.05;
|
|
89
89
|
} else if (snrDb > 20) {
|
|
90
|
-
multiplier = 0.
|
|
90
|
+
multiplier = 0.2;
|
|
91
91
|
}
|
|
92
92
|
|
|
93
93
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
package/dist/vad/vad-node.mjs
CHANGED
package/dist/vad/vad-state.js
CHANGED
|
@@ -85,25 +85,33 @@ var VADStateMachine = class {
|
|
|
85
85
|
newState = "silent";
|
|
86
86
|
this.lastSilenceTime = timestamp;
|
|
87
87
|
}
|
|
88
|
-
} else if (this.currentState === "speech_starting"
|
|
88
|
+
} else if (this.currentState === "speech_starting") {
|
|
89
|
+
if (probability >= stopThreshold) {
|
|
90
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
91
|
+
if (speechDuration >= minSpeechDurationMs) {
|
|
92
|
+
newState = "speaking";
|
|
93
|
+
} else {
|
|
94
|
+
newState = "speech_starting";
|
|
95
|
+
}
|
|
96
|
+
this.lastSpeechTime = timestamp;
|
|
97
|
+
} else {
|
|
98
|
+
newState = "silent";
|
|
99
|
+
this.lastSilenceTime = timestamp;
|
|
100
|
+
}
|
|
101
|
+
} else if (this.currentState === "speaking") {
|
|
89
102
|
if (probability >= stopThreshold) {
|
|
90
103
|
newState = "speaking";
|
|
91
104
|
this.lastSpeechTime = timestamp;
|
|
92
105
|
} else {
|
|
93
106
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
94
|
-
const speechDuration = timestamp - this.speechStartTime;
|
|
95
107
|
if (timeSinceSpeech < hangoverMs) {
|
|
96
108
|
newState = "speaking";
|
|
97
|
-
} else if (speechDuration < minSpeechDurationMs) {
|
|
98
|
-
newState = "silent";
|
|
99
|
-
this.lastSilenceTime = timestamp;
|
|
100
109
|
} else {
|
|
101
110
|
newState = "speech_ending";
|
|
102
111
|
this.lastSilenceTime = timestamp;
|
|
103
112
|
}
|
|
104
113
|
}
|
|
105
114
|
}
|
|
106
|
-
if (newState === "speech_starting") newState = "speaking";
|
|
107
115
|
if (newState === "speech_ending") newState = "silent";
|
|
108
116
|
this.currentState = newState;
|
|
109
117
|
return {
|
package/dist/vad/vad-state.mjs
CHANGED