@tensamin/audio 0.1.14 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -231
- package/dist/chunk-6BJ4XGSA.mjs +80 -0
- package/dist/chunk-AQ5RVY33.mjs +74 -0
- package/dist/chunk-IS37FHDN.mjs +33 -0
- package/dist/chunk-K4J3UUOR.mjs +178 -0
- package/dist/chunk-QNQK6QFB.mjs +71 -0
- package/dist/context/audio-context.d.mts +0 -24
- package/dist/context/audio-context.d.ts +0 -24
- package/dist/index.d.mts +2 -8
- package/dist/index.d.ts +2 -8
- package/dist/index.js +285 -680
- package/dist/index.mjs +8 -43
- package/dist/livekit/integration.d.mts +3 -7
- package/dist/livekit/integration.d.ts +3 -7
- package/dist/livekit/integration.js +280 -626
- package/dist/livekit/integration.mjs +7 -8
- package/dist/noise-suppression/deepfilternet-node.d.mts +12 -0
- package/dist/noise-suppression/deepfilternet-node.d.ts +12 -0
- package/dist/noise-suppression/deepfilternet-node.js +57 -0
- package/dist/noise-suppression/deepfilternet-node.mjs +6 -0
- package/dist/pipeline/audio-pipeline.d.mts +2 -2
- package/dist/pipeline/audio-pipeline.d.ts +2 -2
- package/dist/pipeline/audio-pipeline.js +219 -554
- package/dist/pipeline/audio-pipeline.mjs +4 -5
- package/dist/types.d.mts +42 -257
- package/dist/types.d.ts +42 -257
- package/dist/vad/vad-node.d.mts +7 -9
- package/dist/vad/vad-node.d.ts +7 -9
- package/dist/vad/vad-node.js +47 -156
- package/dist/vad/vad-node.mjs +3 -3
- package/dist/vad/vad-state.d.mts +9 -11
- package/dist/vad/vad-state.d.ts +9 -11
- package/dist/vad/vad-state.js +50 -79
- package/dist/vad/vad-state.mjs +3 -3
- package/package.json +21 -21
- package/dist/chunk-2G2JFHJY.mjs +0 -180
- package/dist/chunk-6F2HZUYO.mjs +0 -91
- package/dist/chunk-K4YLH73B.mjs +0 -103
- package/dist/chunk-R5M2DGAQ.mjs +0 -311
- package/dist/chunk-UFKIAMG3.mjs +0 -47
- package/dist/chunk-XO6B3D4A.mjs +0 -67
- package/dist/extensibility/plugins.d.mts +0 -9
- package/dist/extensibility/plugins.d.ts +0 -9
- package/dist/extensibility/plugins.js +0 -320
- package/dist/extensibility/plugins.mjs +0 -14
- package/dist/noise-suppression/rnnoise-node.d.mts +0 -10
- package/dist/noise-suppression/rnnoise-node.d.ts +0 -10
- package/dist/noise-suppression/rnnoise-node.js +0 -101
- package/dist/noise-suppression/rnnoise-node.mjs +0 -6
package/dist/vad/vad-node.js
CHANGED
|
@@ -20,185 +20,76 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/vad/vad-node.ts
|
|
21
21
|
var vad_node_exports = {};
|
|
22
22
|
__export(vad_node_exports, {
|
|
23
|
-
|
|
23
|
+
createLevelDetectorNode: () => createLevelDetectorNode
|
|
24
24
|
});
|
|
25
25
|
module.exports = __toCommonJS(vad_node_exports);
|
|
26
|
-
|
|
27
|
-
const energyParams = vadConfig?.energyVad || {};
|
|
28
|
-
const smoothing = energyParams.smoothing ?? 0.95;
|
|
29
|
-
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
30
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 2e-3;
|
|
31
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.02;
|
|
32
|
-
const minSNR = energyParams.minSNR ?? 12;
|
|
33
|
-
const snrRange = energyParams.snrRange ?? 10;
|
|
34
|
-
const minEnergy = energyParams.minEnergy ?? 3e-3;
|
|
26
|
+
function createLevelDetectorWorkletCode(smoothing) {
|
|
35
27
|
return `
|
|
36
|
-
class
|
|
28
|
+
class LevelDetectorProcessor extends AudioWorkletProcessor {
|
|
37
29
|
constructor() {
|
|
38
30
|
super();
|
|
31
|
+
this.smoothed = 0;
|
|
39
32
|
this.smoothing = ${smoothing};
|
|
40
|
-
this.energy = 0;
|
|
41
|
-
this.noiseFloor = ${initialNoiseFloor};
|
|
42
|
-
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
43
|
-
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
44
|
-
this.minSNR = ${minSNR};
|
|
45
|
-
this.snrRange = ${snrRange};
|
|
46
|
-
this.minEnergy = ${minEnergy};
|
|
47
|
-
this.isSpeaking = false;
|
|
48
|
-
|
|
49
|
-
this.port.onmessage = (event) => {
|
|
50
|
-
if (event.data && event.data.isSpeaking !== undefined) {
|
|
51
|
-
this.isSpeaking = event.data.isSpeaking;
|
|
52
|
-
}
|
|
53
|
-
};
|
|
54
33
|
}
|
|
55
34
|
|
|
56
|
-
process(inputs
|
|
35
|
+
process(inputs) {
|
|
57
36
|
const input = inputs[0];
|
|
58
|
-
if (!input ||
|
|
37
|
+
if (!input || input.length === 0) return true;
|
|
59
38
|
const channel = input[0];
|
|
60
|
-
|
|
61
|
-
|
|
39
|
+
if (!channel || channel.length === 0) return true;
|
|
40
|
+
|
|
62
41
|
let sum = 0;
|
|
63
|
-
let peak = 0;
|
|
64
42
|
for (let i = 0; i < channel.length; i++) {
|
|
65
|
-
const sample =
|
|
66
|
-
sum +=
|
|
67
|
-
peak = Math.max(peak, sample);
|
|
68
|
-
}
|
|
69
|
-
const instantRms = Math.sqrt(sum / channel.length);
|
|
70
|
-
|
|
71
|
-
// Smooth the RMS energy to reduce jitter
|
|
72
|
-
// this.energy acts as the smoothed RMS value
|
|
73
|
-
this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
|
|
74
|
-
|
|
75
|
-
// Calculate Crest Factor (peak-to-RMS ratio)
|
|
76
|
-
// Voice typically has crest factor of 2-4 (6-12dB)
|
|
77
|
-
// Keyboard clicks have crest factor of 10-30+ (20-30dB)
|
|
78
|
-
const crestFactor = peak / (instantRms + 1e-10);
|
|
79
|
-
const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
|
|
80
|
-
|
|
81
|
-
// Adaptive noise floor estimation using SMOOTHED energy (not instantaneous)
|
|
82
|
-
// This prevents sharp transients from affecting the noise floor
|
|
83
|
-
if (this.energy < this.noiseFloor) {
|
|
84
|
-
// Signal is quieter than noise floor, adapt downwards slowly
|
|
85
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + this.energy * this.noiseFloorAdaptRateQuiet;
|
|
86
|
-
} else {
|
|
87
|
-
// Calculate SNR based on smoothed energy
|
|
88
|
-
const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
|
|
89
|
-
const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
|
|
90
|
-
|
|
91
|
-
// Only adapt upwards if:
|
|
92
|
-
// 1. SNR is low (< 10dB) - likely just background noise
|
|
93
|
-
// 2. AND crest factor is low (< 15dB) - not a sharp transient
|
|
94
|
-
if (smoothedSnrDb < 10 && crestFactorDb < 15) {
|
|
95
|
-
// This is persistent background noise, adapt upwards
|
|
96
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
|
|
97
|
-
} else {
|
|
98
|
-
// Either high SNR (speech) or high crest factor (click) - adapt very slowly
|
|
99
|
-
const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
|
|
100
|
-
this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
// Ensure noise floor doesn't drop to absolute zero
|
|
105
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
|
|
106
|
-
|
|
107
|
-
// SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
108
|
-
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
109
|
-
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
110
|
-
|
|
111
|
-
// Map SNR dB to probability (0-1)
|
|
112
|
-
// Probability is 0 when snrDb <= minSNR
|
|
113
|
-
// Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
|
|
114
|
-
let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
|
|
115
|
-
|
|
116
|
-
// Apply absolute energy threshold with soft knee
|
|
117
|
-
if (this.energy < this.minEnergy) {
|
|
118
|
-
const energyRatio = this.energy / (this.minEnergy + 1e-6);
|
|
119
|
-
probability *= Math.pow(energyRatio, 2);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
// Apply crest factor penalty
|
|
123
|
-
// Reject signals with high crest factor (sharp transients like keyboard clicks)
|
|
124
|
-
// Voice: 6-12dB, Keyboard: 20-30dB
|
|
125
|
-
// We penalize anything above 14dB
|
|
126
|
-
if (crestFactorDb > 14) {
|
|
127
|
-
const excess = crestFactorDb - 14;
|
|
128
|
-
const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
|
|
129
|
-
probability *= penalty;
|
|
43
|
+
const sample = channel[i];
|
|
44
|
+
sum += sample * sample;
|
|
130
45
|
}
|
|
131
|
-
|
|
132
|
-
this.
|
|
133
|
-
|
|
46
|
+
const rms = Math.sqrt(sum / channel.length);
|
|
47
|
+
this.smoothed = this.smoothed * this.smoothing + rms * (1 - this.smoothing);
|
|
48
|
+
const levelDb = 20 * Math.log10(Math.max(1e-8, this.smoothed));
|
|
49
|
+
this.port.postMessage({ levelDb });
|
|
134
50
|
return true;
|
|
135
51
|
}
|
|
136
52
|
}
|
|
137
|
-
|
|
53
|
+
|
|
54
|
+
registerProcessor('level-detector-processor', LevelDetectorProcessor);
|
|
138
55
|
`;
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
}
|
|
149
|
-
const workletCode = createEnergyVadWorkletCode(config);
|
|
150
|
-
const blob = new Blob([workletCode], {
|
|
151
|
-
type: "application/javascript"
|
|
152
|
-
});
|
|
153
|
-
const url = URL.createObjectURL(blob);
|
|
154
|
-
try {
|
|
155
|
-
await context.audioWorklet.addModule(url);
|
|
156
|
-
console.log("Energy VAD worklet loaded successfully");
|
|
157
|
-
} catch (e) {
|
|
158
|
-
const error = new Error(
|
|
159
|
-
`Failed to load Energy VAD worklet: ${e instanceof Error ? e.message : String(e)}`
|
|
160
|
-
);
|
|
161
|
-
console.error(error.message);
|
|
162
|
-
URL.revokeObjectURL(url);
|
|
163
|
-
throw error;
|
|
164
|
-
}
|
|
56
|
+
}
|
|
57
|
+
async function createLevelDetectorNode(context, onLevel, options) {
|
|
58
|
+
const smoothing = options?.smoothing ?? 0.9;
|
|
59
|
+
const workletCode = createLevelDetectorWorkletCode(smoothing);
|
|
60
|
+
const blob = new Blob([workletCode], { type: "application/javascript" });
|
|
61
|
+
const url = URL.createObjectURL(blob);
|
|
62
|
+
try {
|
|
63
|
+
await context.audioWorklet.addModule(url);
|
|
64
|
+
} finally {
|
|
165
65
|
URL.revokeObjectURL(url);
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
);
|
|
175
|
-
console.error(error.message);
|
|
176
|
-
throw error;
|
|
66
|
+
}
|
|
67
|
+
const node = new AudioWorkletNode(context, "level-detector-processor", {
|
|
68
|
+
numberOfInputs: 1,
|
|
69
|
+
numberOfOutputs: 0
|
|
70
|
+
});
|
|
71
|
+
node.port.onmessage = (event) => {
|
|
72
|
+
const { levelDb } = event.data ?? {};
|
|
73
|
+
if (typeof levelDb === "number" && !Number.isNaN(levelDb)) {
|
|
74
|
+
onLevel(levelDb);
|
|
177
75
|
}
|
|
178
|
-
|
|
76
|
+
};
|
|
77
|
+
node.port.onmessageerror = (event) => {
|
|
78
|
+
console.error("Level detector port error", event);
|
|
79
|
+
};
|
|
80
|
+
return {
|
|
81
|
+
node,
|
|
82
|
+
dispose: () => {
|
|
179
83
|
try {
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
onDecision(probability);
|
|
183
|
-
} else {
|
|
184
|
-
console.warn("Invalid VAD probability received:", event.data);
|
|
185
|
-
}
|
|
84
|
+
node.port.onmessage = null;
|
|
85
|
+
node.port.close();
|
|
186
86
|
} catch (error) {
|
|
187
|
-
console.error("
|
|
87
|
+
console.error("Failed to dispose level detector node", error);
|
|
188
88
|
}
|
|
189
|
-
};
|
|
190
|
-
node.port.onmessageerror = (event) => {
|
|
191
|
-
console.error("VAD port message error:", event);
|
|
192
|
-
};
|
|
193
|
-
return node;
|
|
194
|
-
}
|
|
195
|
-
updateSpeakingState(isSpeaking) {
|
|
196
|
-
if (this.workletNode) {
|
|
197
|
-
this.workletNode.port.postMessage({ isSpeaking });
|
|
198
89
|
}
|
|
199
|
-
}
|
|
200
|
-
}
|
|
90
|
+
};
|
|
91
|
+
}
|
|
201
92
|
// Annotate the CommonJS export names for ESM import in node:
|
|
202
93
|
0 && (module.exports = {
|
|
203
|
-
|
|
94
|
+
createLevelDetectorNode
|
|
204
95
|
});
|
package/dist/vad/vad-node.mjs
CHANGED
package/dist/vad/vad-state.d.mts
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { SpeakingDetectionConfig, SpeakingState } from '../types.mjs';
|
|
2
2
|
import 'mitt';
|
|
3
3
|
|
|
4
|
-
declare class
|
|
4
|
+
declare class LevelBasedVAD {
|
|
5
5
|
private config;
|
|
6
|
-
private
|
|
7
|
-
private
|
|
8
|
-
private
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
updateConfig(config: Partial<AudioProcessingConfig["vad"]>): void;
|
|
13
|
-
processFrame(probability: number, timestamp: number): VADState;
|
|
6
|
+
private speaking;
|
|
7
|
+
private pendingSpeechSince;
|
|
8
|
+
private pendingSilenceSince;
|
|
9
|
+
constructor(config: SpeakingDetectionConfig);
|
|
10
|
+
updateConfig(config: Partial<SpeakingDetectionConfig>): void;
|
|
11
|
+
process(levelDb: number, timestampMs: number): SpeakingState;
|
|
14
12
|
}
|
|
15
13
|
|
|
16
|
-
export {
|
|
14
|
+
export { LevelBasedVAD };
|
package/dist/vad/vad-state.d.ts
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { SpeakingDetectionConfig, SpeakingState } from '../types.js';
|
|
2
2
|
import 'mitt';
|
|
3
3
|
|
|
4
|
-
declare class
|
|
4
|
+
declare class LevelBasedVAD {
|
|
5
5
|
private config;
|
|
6
|
-
private
|
|
7
|
-
private
|
|
8
|
-
private
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
updateConfig(config: Partial<AudioProcessingConfig["vad"]>): void;
|
|
13
|
-
processFrame(probability: number, timestamp: number): VADState;
|
|
6
|
+
private speaking;
|
|
7
|
+
private pendingSpeechSince;
|
|
8
|
+
private pendingSilenceSince;
|
|
9
|
+
constructor(config: SpeakingDetectionConfig);
|
|
10
|
+
updateConfig(config: Partial<SpeakingDetectionConfig>): void;
|
|
11
|
+
process(levelDb: number, timestampMs: number): SpeakingState;
|
|
14
12
|
}
|
|
15
13
|
|
|
16
|
-
export {
|
|
14
|
+
export { LevelBasedVAD };
|
package/dist/vad/vad-state.js
CHANGED
|
@@ -20,108 +20,79 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/vad/vad-state.ts
|
|
21
21
|
var vad_state_exports = {};
|
|
22
22
|
__export(vad_state_exports, {
|
|
23
|
-
|
|
23
|
+
LevelBasedVAD: () => LevelBasedVAD
|
|
24
24
|
});
|
|
25
25
|
module.exports = __toCommonJS(vad_state_exports);
|
|
26
|
-
var
|
|
26
|
+
var LevelBasedVAD = class {
|
|
27
27
|
config;
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
lastSilenceTime = 0;
|
|
32
|
-
frameDurationMs = 20;
|
|
33
|
-
// Assumed frame duration, updated by calls
|
|
28
|
+
speaking = false;
|
|
29
|
+
pendingSpeechSince = null;
|
|
30
|
+
pendingSilenceSince = null;
|
|
34
31
|
constructor(config) {
|
|
35
32
|
this.config = {
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
hangoverMs: config?.hangoverMs ?? 300,
|
|
44
|
-
// Smooth for natural speech
|
|
45
|
-
preRollMs: config?.preRollMs ?? 250,
|
|
46
|
-
// Generous pre-roll
|
|
47
|
-
minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
|
|
48
|
-
// Aggressive transient rejection
|
|
49
|
-
minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
|
|
50
|
-
energyVad: {
|
|
51
|
-
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
52
|
-
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
53
|
-
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 2e-3,
|
|
54
|
-
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.02,
|
|
55
|
-
minSNR: config?.energyVad?.minSNR ?? 12,
|
|
56
|
-
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
57
|
-
minEnergy: config?.energyVad?.minEnergy ?? 3e-3
|
|
58
|
-
}
|
|
33
|
+
minDb: config.minDb,
|
|
34
|
+
maxDb: config.maxDb,
|
|
35
|
+
speakOnRatio: config.speakOnRatio ?? 0.6,
|
|
36
|
+
speakOffRatio: config.speakOffRatio ?? 0.3,
|
|
37
|
+
hangoverMs: config.hangoverMs ?? 350,
|
|
38
|
+
attackMs: config.attackMs ?? 50,
|
|
39
|
+
releaseMs: config.releaseMs ?? 120
|
|
59
40
|
};
|
|
60
|
-
this.lastSilenceTime = Date.now();
|
|
61
41
|
}
|
|
62
42
|
updateConfig(config) {
|
|
63
|
-
this.config = {
|
|
43
|
+
this.config = {
|
|
44
|
+
...this.config,
|
|
45
|
+
...config,
|
|
46
|
+
speakOnRatio: config.speakOnRatio ?? this.config.speakOnRatio,
|
|
47
|
+
speakOffRatio: config.speakOffRatio ?? this.config.speakOffRatio,
|
|
48
|
+
hangoverMs: config.hangoverMs ?? this.config.hangoverMs,
|
|
49
|
+
attackMs: config.attackMs ?? this.config.attackMs,
|
|
50
|
+
releaseMs: config.releaseMs ?? this.config.releaseMs
|
|
51
|
+
};
|
|
64
52
|
}
|
|
65
|
-
|
|
53
|
+
process(levelDb, timestampMs) {
|
|
66
54
|
const {
|
|
67
|
-
|
|
68
|
-
|
|
55
|
+
minDb,
|
|
56
|
+
maxDb,
|
|
57
|
+
speakOnRatio,
|
|
58
|
+
speakOffRatio,
|
|
69
59
|
hangoverMs,
|
|
70
|
-
|
|
71
|
-
|
|
60
|
+
attackMs,
|
|
61
|
+
releaseMs
|
|
72
62
|
} = this.config;
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
this.
|
|
80
|
-
this.
|
|
81
|
-
|
|
82
|
-
newState = "silent";
|
|
63
|
+
const clamped = Math.min(maxDb, Math.max(minDb, levelDb));
|
|
64
|
+
const norm = (clamped - minDb) / Math.max(1, maxDb - minDb);
|
|
65
|
+
if (!this.speaking) {
|
|
66
|
+
if (norm >= speakOnRatio) {
|
|
67
|
+
this.pendingSpeechSince = this.pendingSpeechSince ?? timestampMs;
|
|
68
|
+
if (timestampMs - this.pendingSpeechSince >= attackMs) {
|
|
69
|
+
this.speaking = true;
|
|
70
|
+
this.pendingSpeechSince = null;
|
|
71
|
+
this.pendingSilenceSince = null;
|
|
83
72
|
}
|
|
84
73
|
} else {
|
|
85
|
-
|
|
86
|
-
this.lastSilenceTime = timestamp;
|
|
74
|
+
this.pendingSpeechSince = null;
|
|
87
75
|
}
|
|
88
|
-
} else
|
|
89
|
-
if (
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
76
|
+
} else {
|
|
77
|
+
if (norm <= speakOffRatio) {
|
|
78
|
+
this.pendingSilenceSince = this.pendingSilenceSince ?? timestampMs;
|
|
79
|
+
const releaseWindow = Math.max(releaseMs, hangoverMs);
|
|
80
|
+
if (timestampMs - this.pendingSilenceSince >= releaseWindow) {
|
|
81
|
+
this.speaking = false;
|
|
82
|
+
this.pendingSilenceSince = null;
|
|
83
|
+
this.pendingSpeechSince = null;
|
|
95
84
|
}
|
|
96
|
-
this.lastSpeechTime = timestamp;
|
|
97
85
|
} else {
|
|
98
|
-
|
|
99
|
-
this.lastSilenceTime = timestamp;
|
|
100
|
-
}
|
|
101
|
-
} else if (this.currentState === "speaking") {
|
|
102
|
-
if (probability >= stopThreshold) {
|
|
103
|
-
newState = "speaking";
|
|
104
|
-
this.lastSpeechTime = timestamp;
|
|
105
|
-
} else {
|
|
106
|
-
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
107
|
-
if (timeSinceSpeech < hangoverMs) {
|
|
108
|
-
newState = "speaking";
|
|
109
|
-
} else {
|
|
110
|
-
newState = "speech_ending";
|
|
111
|
-
this.lastSilenceTime = timestamp;
|
|
112
|
-
}
|
|
86
|
+
this.pendingSilenceSince = null;
|
|
113
87
|
}
|
|
114
88
|
}
|
|
115
|
-
if (newState === "speech_ending") newState = "silent";
|
|
116
|
-
this.currentState = newState;
|
|
117
89
|
return {
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
state: newState
|
|
90
|
+
speaking: this.speaking,
|
|
91
|
+
levelDb: clamped
|
|
121
92
|
};
|
|
122
93
|
}
|
|
123
94
|
};
|
|
124
95
|
// Annotate the CommonJS export names for ESM import in node:
|
|
125
96
|
0 && (module.exports = {
|
|
126
|
-
|
|
97
|
+
LevelBasedVAD
|
|
127
98
|
});
|
package/dist/vad/vad-state.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,41 +1,41 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tensamin/audio",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"main": "dist/index.js",
|
|
5
|
-
"module": "dist/index.mjs",
|
|
6
|
-
"types": "dist/index.d.ts",
|
|
3
|
+
"version": "0.2.0",
|
|
7
4
|
"author": {
|
|
8
5
|
"email": "aloisianer@proton.me",
|
|
9
6
|
"name": "Alois"
|
|
10
7
|
},
|
|
11
|
-
"publishConfig": {
|
|
12
|
-
"access": "public"
|
|
13
|
-
},
|
|
14
8
|
"repository": {
|
|
15
9
|
"type": "git",
|
|
16
10
|
"url": "https://github.com/Tensamin/Audio"
|
|
17
11
|
},
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
|
|
21
|
-
"
|
|
22
|
-
"
|
|
12
|
+
"main": "dist/index.js",
|
|
13
|
+
"module": "dist/index.mjs",
|
|
14
|
+
"devDependencies": {
|
|
15
|
+
"tsup": "^8.5.1",
|
|
16
|
+
"@types/bun": "latest",
|
|
17
|
+
"@types/web": "^0.0.298",
|
|
18
|
+
"livekit-client": "^2.16.1",
|
|
19
|
+
"typescript": "^5.9.3"
|
|
23
20
|
},
|
|
24
21
|
"dependencies": {
|
|
25
|
-
"
|
|
22
|
+
"deepfilternet3-noise-filter": "^1.1.2",
|
|
26
23
|
"mitt": "^3.0.1"
|
|
27
24
|
},
|
|
28
25
|
"peerDependencies": {
|
|
29
26
|
"livekit-client": "^2.0.0"
|
|
30
27
|
},
|
|
31
|
-
"devDependencies": {
|
|
32
|
-
"tsup": "^8.5.1",
|
|
33
|
-
"@types/bun": "latest",
|
|
34
|
-
"@types/web": "^0.0.298",
|
|
35
|
-
"livekit-client": "^2.16.0",
|
|
36
|
-
"typescript": "^5.9.3"
|
|
37
|
-
},
|
|
38
28
|
"files": [
|
|
39
29
|
"dist"
|
|
40
|
-
]
|
|
30
|
+
],
|
|
31
|
+
"license": "MIT",
|
|
32
|
+
"publishConfig": {
|
|
33
|
+
"access": "public"
|
|
34
|
+
},
|
|
35
|
+
"scripts": {
|
|
36
|
+
"build": "tsup src/ --format cjs,esm --dts --out-dir dist --clean",
|
|
37
|
+
"format": "bunx prettier --write .",
|
|
38
|
+
"lint": "tsc"
|
|
39
|
+
},
|
|
40
|
+
"types": "dist/index.d.ts"
|
|
41
41
|
}
|