@tensamin/audio 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -3
- package/dist/chunk-6P2RDBW5.mjs +47 -0
- package/dist/chunk-EXH2PNUE.mjs +212 -0
- package/{src/vad/vad-state.ts → dist/chunk-JJASCVEW.mjs} +21 -33
- package/dist/chunk-OZ7KMC4S.mjs +46 -0
- package/dist/chunk-R5JVHKWA.mjs +98 -0
- package/dist/chunk-WBQAMGXK.mjs +0 -0
- package/dist/chunk-XMTQPMQ6.mjs +91 -0
- package/dist/chunk-XO6B3D4A.mjs +67 -0
- package/dist/context/audio-context.d.mts +32 -0
- package/dist/context/audio-context.d.ts +32 -0
- package/dist/context/audio-context.js +75 -0
- package/dist/context/audio-context.mjs +16 -0
- package/dist/extensibility/plugins.d.mts +9 -0
- package/dist/extensibility/plugins.d.ts +9 -0
- package/dist/extensibility/plugins.js +238 -0
- package/dist/extensibility/plugins.mjs +14 -0
- package/dist/index.d.mts +10 -216
- package/dist/index.d.ts +10 -216
- package/dist/index.js +298 -80
- package/dist/index.mjs +29 -352
- package/dist/livekit/integration.d.mts +11 -0
- package/dist/livekit/integration.d.ts +11 -0
- package/dist/livekit/integration.js +585 -0
- package/dist/livekit/integration.mjs +12 -0
- package/dist/noise-suppression/rnnoise-node.d.mts +10 -0
- package/dist/noise-suppression/rnnoise-node.d.ts +10 -0
- package/dist/noise-suppression/rnnoise-node.js +101 -0
- package/dist/noise-suppression/rnnoise-node.mjs +6 -0
- package/dist/pipeline/audio-pipeline.d.mts +6 -0
- package/dist/pipeline/audio-pipeline.d.ts +6 -0
- package/dist/pipeline/audio-pipeline.js +499 -0
- package/dist/pipeline/audio-pipeline.mjs +11 -0
- package/dist/types.d.mts +155 -0
- package/dist/types.d.ts +155 -0
- package/dist/types.js +18 -0
- package/dist/types.mjs +1 -0
- package/dist/vad/vad-node.d.mts +9 -0
- package/dist/vad/vad-node.d.ts +9 -0
- package/dist/vad/vad-node.js +122 -0
- package/dist/vad/vad-node.mjs +6 -0
- package/dist/vad/vad-state.d.mts +15 -0
- package/dist/vad/vad-state.d.ts +15 -0
- package/dist/vad/vad-state.js +83 -0
- package/dist/vad/vad-state.mjs +6 -0
- package/package.json +8 -5
- package/.github/workflows/publish.yml +0 -29
- package/bun.lock +0 -258
- package/src/context/audio-context.ts +0 -69
- package/src/extensibility/plugins.ts +0 -45
- package/src/index.ts +0 -8
- package/src/livekit/integration.ts +0 -61
- package/src/noise-suppression/rnnoise-node.ts +0 -62
- package/src/pipeline/audio-pipeline.ts +0 -154
- package/src/types.ts +0 -167
- package/src/vad/vad-node.ts +0 -78
- package/tsconfig.json +0 -46
package/dist/types.d.mts
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import { Emitter } from 'mitt';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Configuration for the audio processing pipeline.
|
|
5
|
+
*/
|
|
6
|
+
interface AudioProcessingConfig {
|
|
7
|
+
/**
|
|
8
|
+
* Noise suppression configuration.
|
|
9
|
+
*/
|
|
10
|
+
noiseSuppression?: {
|
|
11
|
+
enabled: boolean;
|
|
12
|
+
/**
|
|
13
|
+
* Path or URL to the RNNoise WASM binary.
|
|
14
|
+
* REQUIRED if enabled.
|
|
15
|
+
*/
|
|
16
|
+
wasmUrl?: string;
|
|
17
|
+
/**
|
|
18
|
+
* Path or URL to the RNNoise SIMD WASM binary.
|
|
19
|
+
* REQUIRED if enabled.
|
|
20
|
+
*/
|
|
21
|
+
simdUrl?: string;
|
|
22
|
+
/**
|
|
23
|
+
* Path or URL to the RNNoise worklet script.
|
|
24
|
+
* REQUIRED if enabled.
|
|
25
|
+
*/
|
|
26
|
+
workletUrl?: string;
|
|
27
|
+
/**
|
|
28
|
+
* Plugin name to use. Defaults to 'rnnoise-ns'.
|
|
29
|
+
*/
|
|
30
|
+
pluginName?: string;
|
|
31
|
+
};
|
|
32
|
+
/**
|
|
33
|
+
* Voice Activity Detection (VAD) configuration.
|
|
34
|
+
*/
|
|
35
|
+
vad?: {
|
|
36
|
+
enabled: boolean;
|
|
37
|
+
/**
|
|
38
|
+
* Plugin name to use. Defaults to 'rnnoise-vad' or 'energy-vad'.
|
|
39
|
+
*/
|
|
40
|
+
pluginName?: string;
|
|
41
|
+
/**
|
|
42
|
+
* Probability threshold for speech onset (0-1).
|
|
43
|
+
* Default: 0.5
|
|
44
|
+
*/
|
|
45
|
+
startThreshold?: number;
|
|
46
|
+
/**
|
|
47
|
+
* Probability threshold for speech offset (0-1).
|
|
48
|
+
* Default: 0.4
|
|
49
|
+
*/
|
|
50
|
+
stopThreshold?: number;
|
|
51
|
+
/**
|
|
52
|
+
* Time in ms to wait after speech stops before considering it silent.
|
|
53
|
+
* Default: 300ms
|
|
54
|
+
*/
|
|
55
|
+
hangoverMs?: number;
|
|
56
|
+
/**
|
|
57
|
+
* Time in ms of audio to buffer before speech onset to avoid cutting the start.
|
|
58
|
+
* Default: 200ms
|
|
59
|
+
*/
|
|
60
|
+
preRollMs?: number;
|
|
61
|
+
};
|
|
62
|
+
/**
|
|
63
|
+
* Output gain and muting configuration.
|
|
64
|
+
*/
|
|
65
|
+
output?: {
|
|
66
|
+
/**
|
|
67
|
+
* Gain to apply when speaking (0-1+). Default: 1.0
|
|
68
|
+
*/
|
|
69
|
+
speechGain?: number;
|
|
70
|
+
/**
|
|
71
|
+
* Gain to apply when silent (0-1). Default: 0.0 (mute)
|
|
72
|
+
*/
|
|
73
|
+
silenceGain?: number;
|
|
74
|
+
/**
|
|
75
|
+
* Time in seconds to ramp gain changes. Default: 0.02
|
|
76
|
+
*/
|
|
77
|
+
gainRampTime?: number;
|
|
78
|
+
};
|
|
79
|
+
/**
|
|
80
|
+
* LiveKit integration configuration.
|
|
81
|
+
*/
|
|
82
|
+
livekit?: {
|
|
83
|
+
/**
|
|
84
|
+
* Whether to call track.mute()/unmute() on the LocalAudioTrack based on VAD.
|
|
85
|
+
* This saves bandwidth but has more signaling overhead.
|
|
86
|
+
* Default: false (uses gain gating only)
|
|
87
|
+
*/
|
|
88
|
+
manageTrackMute?: boolean;
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Represents the state of Voice Activity Detection.
|
|
93
|
+
*/
|
|
94
|
+
interface VADState {
|
|
95
|
+
/**
|
|
96
|
+
* Whether speech is currently detected (after hysteresis).
|
|
97
|
+
*/
|
|
98
|
+
isSpeaking: boolean;
|
|
99
|
+
/**
|
|
100
|
+
* Raw probability of speech from the VAD model (0-1).
|
|
101
|
+
*/
|
|
102
|
+
probability: number;
|
|
103
|
+
/**
|
|
104
|
+
* Current state enum.
|
|
105
|
+
*/
|
|
106
|
+
state: "silent" | "speech_starting" | "speaking" | "speech_ending";
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Events emitted by the audio pipeline.
|
|
110
|
+
*/
|
|
111
|
+
type AudioPipelineEvents = {
|
|
112
|
+
vadChange: VADState;
|
|
113
|
+
error: Error;
|
|
114
|
+
};
|
|
115
|
+
/**
|
|
116
|
+
* Handle to a running audio processing pipeline.
|
|
117
|
+
*/
|
|
118
|
+
interface AudioPipelineHandle {
|
|
119
|
+
/**
|
|
120
|
+
* The processed MediaStreamTrack.
|
|
121
|
+
*/
|
|
122
|
+
readonly processedTrack: MediaStreamTrack;
|
|
123
|
+
/**
|
|
124
|
+
* Event emitter for VAD state and errors.
|
|
125
|
+
*/
|
|
126
|
+
readonly events: Emitter<AudioPipelineEvents>;
|
|
127
|
+
/**
|
|
128
|
+
* Current VAD state.
|
|
129
|
+
*/
|
|
130
|
+
readonly state: VADState;
|
|
131
|
+
/**
|
|
132
|
+
* Update configuration at runtime.
|
|
133
|
+
*/
|
|
134
|
+
setConfig(config: Partial<AudioProcessingConfig>): void;
|
|
135
|
+
/**
|
|
136
|
+
* Stop processing and release resources.
|
|
137
|
+
*/
|
|
138
|
+
dispose(): void;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Interface for a Noise Suppression Plugin.
|
|
142
|
+
*/
|
|
143
|
+
interface NoiseSuppressionPlugin {
|
|
144
|
+
name: string;
|
|
145
|
+
createNode(context: AudioContext, config: AudioProcessingConfig["noiseSuppression"]): Promise<AudioNode>;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Interface for a VAD Plugin.
|
|
149
|
+
*/
|
|
150
|
+
interface VADPlugin {
|
|
151
|
+
name: string;
|
|
152
|
+
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
export type { AudioPipelineEvents, AudioPipelineHandle, AudioProcessingConfig, NoiseSuppressionPlugin, VADPlugin, VADState };
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import { Emitter } from 'mitt';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Configuration for the audio processing pipeline.
|
|
5
|
+
*/
|
|
6
|
+
interface AudioProcessingConfig {
|
|
7
|
+
/**
|
|
8
|
+
* Noise suppression configuration.
|
|
9
|
+
*/
|
|
10
|
+
noiseSuppression?: {
|
|
11
|
+
enabled: boolean;
|
|
12
|
+
/**
|
|
13
|
+
* Path or URL to the RNNoise WASM binary.
|
|
14
|
+
* REQUIRED if enabled.
|
|
15
|
+
*/
|
|
16
|
+
wasmUrl?: string;
|
|
17
|
+
/**
|
|
18
|
+
* Path or URL to the RNNoise SIMD WASM binary.
|
|
19
|
+
* REQUIRED if enabled.
|
|
20
|
+
*/
|
|
21
|
+
simdUrl?: string;
|
|
22
|
+
/**
|
|
23
|
+
* Path or URL to the RNNoise worklet script.
|
|
24
|
+
* REQUIRED if enabled.
|
|
25
|
+
*/
|
|
26
|
+
workletUrl?: string;
|
|
27
|
+
/**
|
|
28
|
+
* Plugin name to use. Defaults to 'rnnoise-ns'.
|
|
29
|
+
*/
|
|
30
|
+
pluginName?: string;
|
|
31
|
+
};
|
|
32
|
+
/**
|
|
33
|
+
* Voice Activity Detection (VAD) configuration.
|
|
34
|
+
*/
|
|
35
|
+
vad?: {
|
|
36
|
+
enabled: boolean;
|
|
37
|
+
/**
|
|
38
|
+
* Plugin name to use. Defaults to 'rnnoise-vad' or 'energy-vad'.
|
|
39
|
+
*/
|
|
40
|
+
pluginName?: string;
|
|
41
|
+
/**
|
|
42
|
+
* Probability threshold for speech onset (0-1).
|
|
43
|
+
* Default: 0.5
|
|
44
|
+
*/
|
|
45
|
+
startThreshold?: number;
|
|
46
|
+
/**
|
|
47
|
+
* Probability threshold for speech offset (0-1).
|
|
48
|
+
* Default: 0.4
|
|
49
|
+
*/
|
|
50
|
+
stopThreshold?: number;
|
|
51
|
+
/**
|
|
52
|
+
* Time in ms to wait after speech stops before considering it silent.
|
|
53
|
+
* Default: 300ms
|
|
54
|
+
*/
|
|
55
|
+
hangoverMs?: number;
|
|
56
|
+
/**
|
|
57
|
+
* Time in ms of audio to buffer before speech onset to avoid cutting the start.
|
|
58
|
+
* Default: 200ms
|
|
59
|
+
*/
|
|
60
|
+
preRollMs?: number;
|
|
61
|
+
};
|
|
62
|
+
/**
|
|
63
|
+
* Output gain and muting configuration.
|
|
64
|
+
*/
|
|
65
|
+
output?: {
|
|
66
|
+
/**
|
|
67
|
+
* Gain to apply when speaking (0-1+). Default: 1.0
|
|
68
|
+
*/
|
|
69
|
+
speechGain?: number;
|
|
70
|
+
/**
|
|
71
|
+
* Gain to apply when silent (0-1). Default: 0.0 (mute)
|
|
72
|
+
*/
|
|
73
|
+
silenceGain?: number;
|
|
74
|
+
/**
|
|
75
|
+
* Time in seconds to ramp gain changes. Default: 0.02
|
|
76
|
+
*/
|
|
77
|
+
gainRampTime?: number;
|
|
78
|
+
};
|
|
79
|
+
/**
|
|
80
|
+
* LiveKit integration configuration.
|
|
81
|
+
*/
|
|
82
|
+
livekit?: {
|
|
83
|
+
/**
|
|
84
|
+
* Whether to call track.mute()/unmute() on the LocalAudioTrack based on VAD.
|
|
85
|
+
* This saves bandwidth but has more signaling overhead.
|
|
86
|
+
* Default: false (uses gain gating only)
|
|
87
|
+
*/
|
|
88
|
+
manageTrackMute?: boolean;
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Represents the state of Voice Activity Detection.
|
|
93
|
+
*/
|
|
94
|
+
interface VADState {
|
|
95
|
+
/**
|
|
96
|
+
* Whether speech is currently detected (after hysteresis).
|
|
97
|
+
*/
|
|
98
|
+
isSpeaking: boolean;
|
|
99
|
+
/**
|
|
100
|
+
* Raw probability of speech from the VAD model (0-1).
|
|
101
|
+
*/
|
|
102
|
+
probability: number;
|
|
103
|
+
/**
|
|
104
|
+
* Current state enum.
|
|
105
|
+
*/
|
|
106
|
+
state: "silent" | "speech_starting" | "speaking" | "speech_ending";
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Events emitted by the audio pipeline.
|
|
110
|
+
*/
|
|
111
|
+
type AudioPipelineEvents = {
|
|
112
|
+
vadChange: VADState;
|
|
113
|
+
error: Error;
|
|
114
|
+
};
|
|
115
|
+
/**
|
|
116
|
+
* Handle to a running audio processing pipeline.
|
|
117
|
+
*/
|
|
118
|
+
interface AudioPipelineHandle {
|
|
119
|
+
/**
|
|
120
|
+
* The processed MediaStreamTrack.
|
|
121
|
+
*/
|
|
122
|
+
readonly processedTrack: MediaStreamTrack;
|
|
123
|
+
/**
|
|
124
|
+
* Event emitter for VAD state and errors.
|
|
125
|
+
*/
|
|
126
|
+
readonly events: Emitter<AudioPipelineEvents>;
|
|
127
|
+
/**
|
|
128
|
+
* Current VAD state.
|
|
129
|
+
*/
|
|
130
|
+
readonly state: VADState;
|
|
131
|
+
/**
|
|
132
|
+
* Update configuration at runtime.
|
|
133
|
+
*/
|
|
134
|
+
setConfig(config: Partial<AudioProcessingConfig>): void;
|
|
135
|
+
/**
|
|
136
|
+
* Stop processing and release resources.
|
|
137
|
+
*/
|
|
138
|
+
dispose(): void;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Interface for a Noise Suppression Plugin.
|
|
142
|
+
*/
|
|
143
|
+
interface NoiseSuppressionPlugin {
|
|
144
|
+
name: string;
|
|
145
|
+
createNode(context: AudioContext, config: AudioProcessingConfig["noiseSuppression"]): Promise<AudioNode>;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Interface for a VAD Plugin.
|
|
149
|
+
*/
|
|
150
|
+
interface VADPlugin {
|
|
151
|
+
name: string;
|
|
152
|
+
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
export type { AudioPipelineEvents, AudioPipelineHandle, AudioProcessingConfig, NoiseSuppressionPlugin, VADPlugin, VADState };
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __copyProps = (to, from, except, desc) => {
|
|
7
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
8
|
+
for (let key of __getOwnPropNames(from))
|
|
9
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
10
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
11
|
+
}
|
|
12
|
+
return to;
|
|
13
|
+
};
|
|
14
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
15
|
+
|
|
16
|
+
// src/types.ts
|
|
17
|
+
var types_exports = {};
|
|
18
|
+
module.exports = __toCommonJS(types_exports);
|
package/dist/types.mjs
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import "./chunk-WBQAMGXK.mjs";
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { VADPlugin, AudioProcessingConfig } from '../types.mjs';
|
|
2
|
+
import 'mitt';
|
|
3
|
+
|
|
4
|
+
declare class EnergyVADPlugin implements VADPlugin {
|
|
5
|
+
name: string;
|
|
6
|
+
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export { EnergyVADPlugin };
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { VADPlugin, AudioProcessingConfig } from '../types.js';
|
|
2
|
+
import 'mitt';
|
|
3
|
+
|
|
4
|
+
declare class EnergyVADPlugin implements VADPlugin {
|
|
5
|
+
name: string;
|
|
6
|
+
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export { EnergyVADPlugin };
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/vad/vad-node.ts
|
|
21
|
+
var vad_node_exports = {};
|
|
22
|
+
__export(vad_node_exports, {
|
|
23
|
+
EnergyVADPlugin: () => EnergyVADPlugin
|
|
24
|
+
});
|
|
25
|
+
module.exports = __toCommonJS(vad_node_exports);
|
|
26
|
+
var energyVadWorkletCode = `
|
|
27
|
+
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
28
|
+
constructor() {
|
|
29
|
+
super();
|
|
30
|
+
this.smoothing = 0.95;
|
|
31
|
+
this.energy = 0;
|
|
32
|
+
this.noiseFloor = 0.001;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
process(inputs, outputs, parameters) {
|
|
36
|
+
const input = inputs[0];
|
|
37
|
+
if (!input || !input.length) return true;
|
|
38
|
+
const channel = input[0];
|
|
39
|
+
|
|
40
|
+
// Calculate RMS
|
|
41
|
+
let sum = 0;
|
|
42
|
+
for (let i = 0; i < channel.length; i++) {
|
|
43
|
+
sum += channel[i] * channel[i];
|
|
44
|
+
}
|
|
45
|
+
const rms = Math.sqrt(sum / channel.length);
|
|
46
|
+
|
|
47
|
+
// Simple adaptive noise floor (very basic)
|
|
48
|
+
if (rms < this.noiseFloor) {
|
|
49
|
+
this.noiseFloor = this.noiseFloor * 0.99 + rms * 0.01;
|
|
50
|
+
} else {
|
|
51
|
+
this.noiseFloor = this.noiseFloor * 0.999 + rms * 0.001;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Calculate "probability" based on SNR
|
|
55
|
+
// This is a heuristic mapping from energy to 0-1
|
|
56
|
+
const snr = rms / (this.noiseFloor + 1e-6);
|
|
57
|
+
const probability = Math.min(1, Math.max(0, (snr - 1.5) / 10)); // Arbitrary scaling
|
|
58
|
+
|
|
59
|
+
this.port.postMessage({ probability });
|
|
60
|
+
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
65
|
+
`;
|
|
66
|
+
var EnergyVADPlugin = class {
|
|
67
|
+
name = "energy-vad";
|
|
68
|
+
async createNode(context, config, onDecision) {
|
|
69
|
+
if (!config?.enabled) {
|
|
70
|
+
console.log("VAD disabled, using passthrough node");
|
|
71
|
+
const pass = context.createGain();
|
|
72
|
+
return pass;
|
|
73
|
+
}
|
|
74
|
+
const blob = new Blob([energyVadWorkletCode], {
|
|
75
|
+
type: "application/javascript"
|
|
76
|
+
});
|
|
77
|
+
const url = URL.createObjectURL(blob);
|
|
78
|
+
try {
|
|
79
|
+
await context.audioWorklet.addModule(url);
|
|
80
|
+
console.log("Energy VAD worklet loaded successfully");
|
|
81
|
+
} catch (e) {
|
|
82
|
+
const error = new Error(
|
|
83
|
+
`Failed to load Energy VAD worklet: ${e instanceof Error ? e.message : String(e)}`
|
|
84
|
+
);
|
|
85
|
+
console.error(error.message);
|
|
86
|
+
URL.revokeObjectURL(url);
|
|
87
|
+
throw error;
|
|
88
|
+
}
|
|
89
|
+
URL.revokeObjectURL(url);
|
|
90
|
+
let node;
|
|
91
|
+
try {
|
|
92
|
+
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
93
|
+
console.log("Energy VAD node created successfully");
|
|
94
|
+
} catch (e) {
|
|
95
|
+
const error = new Error(
|
|
96
|
+
`Failed to create Energy VAD node: ${e instanceof Error ? e.message : String(e)}`
|
|
97
|
+
);
|
|
98
|
+
console.error(error.message);
|
|
99
|
+
throw error;
|
|
100
|
+
}
|
|
101
|
+
node.port.onmessage = (event) => {
|
|
102
|
+
try {
|
|
103
|
+
const { probability } = event.data;
|
|
104
|
+
if (typeof probability === "number" && !isNaN(probability)) {
|
|
105
|
+
onDecision(probability);
|
|
106
|
+
} else {
|
|
107
|
+
console.warn("Invalid VAD probability received:", event.data);
|
|
108
|
+
}
|
|
109
|
+
} catch (error) {
|
|
110
|
+
console.error("Error in VAD message handler:", error);
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
node.port.onmessageerror = (event) => {
|
|
114
|
+
console.error("VAD port message error:", event);
|
|
115
|
+
};
|
|
116
|
+
return node;
|
|
117
|
+
}
|
|
118
|
+
};
|
|
119
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
120
|
+
0 && (module.exports = {
|
|
121
|
+
EnergyVADPlugin
|
|
122
|
+
});
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { AudioProcessingConfig, VADState } from '../types.mjs';
|
|
2
|
+
import 'mitt';
|
|
3
|
+
|
|
4
|
+
declare class VADStateMachine {
|
|
5
|
+
private config;
|
|
6
|
+
private currentState;
|
|
7
|
+
private lastSpeechTime;
|
|
8
|
+
private speechStartTime;
|
|
9
|
+
private frameDurationMs;
|
|
10
|
+
constructor(config: AudioProcessingConfig["vad"]);
|
|
11
|
+
updateConfig(config: Partial<AudioProcessingConfig["vad"]>): void;
|
|
12
|
+
processFrame(probability: number, timestamp: number): VADState;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export { VADStateMachine };
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { AudioProcessingConfig, VADState } from '../types.js';
|
|
2
|
+
import 'mitt';
|
|
3
|
+
|
|
4
|
+
declare class VADStateMachine {
|
|
5
|
+
private config;
|
|
6
|
+
private currentState;
|
|
7
|
+
private lastSpeechTime;
|
|
8
|
+
private speechStartTime;
|
|
9
|
+
private frameDurationMs;
|
|
10
|
+
constructor(config: AudioProcessingConfig["vad"]);
|
|
11
|
+
updateConfig(config: Partial<AudioProcessingConfig["vad"]>): void;
|
|
12
|
+
processFrame(probability: number, timestamp: number): VADState;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export { VADStateMachine };
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/vad/vad-state.ts
|
|
21
|
+
var vad_state_exports = {};
|
|
22
|
+
__export(vad_state_exports, {
|
|
23
|
+
VADStateMachine: () => VADStateMachine
|
|
24
|
+
});
|
|
25
|
+
module.exports = __toCommonJS(vad_state_exports);
|
|
26
|
+
var VADStateMachine = class {
|
|
27
|
+
config;
|
|
28
|
+
currentState = "silent";
|
|
29
|
+
lastSpeechTime = 0;
|
|
30
|
+
speechStartTime = 0;
|
|
31
|
+
frameDurationMs = 20;
|
|
32
|
+
// Assumed frame duration, updated by calls
|
|
33
|
+
constructor(config) {
|
|
34
|
+
this.config = {
|
|
35
|
+
enabled: config?.enabled ?? true,
|
|
36
|
+
pluginName: config?.pluginName ?? "energy-vad",
|
|
37
|
+
startThreshold: config?.startThreshold ?? 0.5,
|
|
38
|
+
stopThreshold: config?.stopThreshold ?? 0.4,
|
|
39
|
+
hangoverMs: config?.hangoverMs ?? 300,
|
|
40
|
+
preRollMs: config?.preRollMs ?? 200
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
updateConfig(config) {
|
|
44
|
+
this.config = { ...this.config, ...config };
|
|
45
|
+
}
|
|
46
|
+
processFrame(probability, timestamp) {
|
|
47
|
+
const { startThreshold, stopThreshold, hangoverMs } = this.config;
|
|
48
|
+
let newState = this.currentState;
|
|
49
|
+
if (this.currentState === "silent" || this.currentState === "speech_ending") {
|
|
50
|
+
if (probability >= startThreshold) {
|
|
51
|
+
newState = "speech_starting";
|
|
52
|
+
this.speechStartTime = timestamp;
|
|
53
|
+
this.lastSpeechTime = timestamp;
|
|
54
|
+
} else {
|
|
55
|
+
newState = "silent";
|
|
56
|
+
}
|
|
57
|
+
} else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
|
|
58
|
+
if (probability >= stopThreshold) {
|
|
59
|
+
newState = "speaking";
|
|
60
|
+
this.lastSpeechTime = timestamp;
|
|
61
|
+
} else {
|
|
62
|
+
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
63
|
+
if (timeSinceSpeech < hangoverMs) {
|
|
64
|
+
newState = "speaking";
|
|
65
|
+
} else {
|
|
66
|
+
newState = "speech_ending";
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (newState === "speech_starting") newState = "speaking";
|
|
71
|
+
if (newState === "speech_ending") newState = "silent";
|
|
72
|
+
this.currentState = newState;
|
|
73
|
+
return {
|
|
74
|
+
isSpeaking: newState === "speaking",
|
|
75
|
+
probability,
|
|
76
|
+
state: newState
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
81
|
+
0 && (module.exports = {
|
|
82
|
+
VADStateMachine
|
|
83
|
+
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tensamin/audio",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"main": "dist/index.js",
|
|
5
5
|
"module": "dist/index.mjs",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -17,22 +17,25 @@
|
|
|
17
17
|
},
|
|
18
18
|
"license": "MIT",
|
|
19
19
|
"scripts": {
|
|
20
|
-
"build": "tsup src/
|
|
20
|
+
"build": "tsup src/ --format cjs,esm --dts --out-dir dist --clean",
|
|
21
21
|
"format": "bunx prettier --write .",
|
|
22
22
|
"lint": "tsc"
|
|
23
23
|
},
|
|
24
24
|
"dependencies": {
|
|
25
25
|
"@sapphi-red/web-noise-suppressor": "^0.3.5",
|
|
26
|
-
"mitt": "^3.0.1"
|
|
27
|
-
"tsup": "^8.5.1"
|
|
26
|
+
"mitt": "^3.0.1"
|
|
28
27
|
},
|
|
29
28
|
"peerDependencies": {
|
|
30
29
|
"livekit-client": "^2.0.0"
|
|
31
30
|
},
|
|
32
31
|
"devDependencies": {
|
|
32
|
+
"tsup": "^8.5.1",
|
|
33
33
|
"@types/bun": "latest",
|
|
34
34
|
"@types/web": "^0.0.298",
|
|
35
35
|
"livekit-client": "^2.16.0",
|
|
36
36
|
"typescript": "^5.9.3"
|
|
37
|
-
}
|
|
37
|
+
},
|
|
38
|
+
"files": [
|
|
39
|
+
"dist"
|
|
40
|
+
]
|
|
38
41
|
}
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
name: Publish
|
|
2
|
-
|
|
3
|
-
on:
|
|
4
|
-
workflow_dispatch:
|
|
5
|
-
|
|
6
|
-
permissions:
|
|
7
|
-
contents: read
|
|
8
|
-
id-token: write
|
|
9
|
-
|
|
10
|
-
jobs:
|
|
11
|
-
build-and-publish:
|
|
12
|
-
runs-on: ubuntu-latest
|
|
13
|
-
steps:
|
|
14
|
-
- uses: actions/checkout@v4
|
|
15
|
-
- uses: oven-sh/setup-bun@v2
|
|
16
|
-
- uses: actions/setup-node@v4
|
|
17
|
-
with:
|
|
18
|
-
node-version: "24.x"
|
|
19
|
-
registry-url: https://registry.npmjs.org/
|
|
20
|
-
|
|
21
|
-
- name: Build
|
|
22
|
-
run: |
|
|
23
|
-
bun install --frozen-lockfile
|
|
24
|
-
bun run lint
|
|
25
|
-
bun run build
|
|
26
|
-
bun audit signatures
|
|
27
|
-
|
|
28
|
-
- name: Publish
|
|
29
|
-
run: npm publish --access public
|