@tensamin/audio 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -2
- package/dist/chunk-FS635GMR.mjs +47 -0
- package/dist/chunk-HFSKQ33X.mjs +38 -0
- package/{src/vad/vad-state.ts → dist/chunk-JJASCVEW.mjs} +21 -33
- package/dist/chunk-OZ7KMC4S.mjs +46 -0
- package/dist/chunk-QU7E5HBA.mjs +106 -0
- package/dist/chunk-SDTOKWM2.mjs +39 -0
- package/{src/vad/vad-node.ts → dist/chunk-UMU2KIB6.mjs} +10 -20
- package/dist/chunk-WBQAMGXK.mjs +0 -0
- package/dist/context/audio-context.d.mts +32 -0
- package/dist/context/audio-context.d.ts +32 -0
- package/dist/context/audio-context.js +75 -0
- package/dist/context/audio-context.mjs +16 -0
- package/dist/extensibility/plugins.d.mts +9 -0
- package/dist/extensibility/plugins.d.ts +9 -0
- package/dist/extensibility/plugins.js +180 -0
- package/dist/extensibility/plugins.mjs +14 -0
- package/dist/index.d.mts +10 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +419 -0
- package/dist/index.mjs +47 -0
- package/dist/livekit/integration.d.mts +11 -0
- package/dist/livekit/integration.d.ts +11 -0
- package/dist/livekit/integration.js +368 -0
- package/dist/livekit/integration.mjs +12 -0
- package/dist/noise-suppression/rnnoise-node.d.mts +10 -0
- package/dist/noise-suppression/rnnoise-node.d.ts +10 -0
- package/dist/noise-suppression/rnnoise-node.js +73 -0
- package/dist/noise-suppression/rnnoise-node.mjs +6 -0
- package/dist/pipeline/audio-pipeline.d.mts +6 -0
- package/dist/pipeline/audio-pipeline.d.ts +6 -0
- package/dist/pipeline/audio-pipeline.js +335 -0
- package/dist/pipeline/audio-pipeline.mjs +11 -0
- package/dist/types.d.mts +155 -0
- package/dist/types.d.ts +155 -0
- package/dist/types.js +18 -0
- package/dist/types.mjs +1 -0
- package/dist/vad/vad-node.d.mts +9 -0
- package/dist/vad/vad-node.d.ts +9 -0
- package/dist/vad/vad-node.js +92 -0
- package/dist/vad/vad-node.mjs +6 -0
- package/dist/vad/vad-state.d.mts +15 -0
- package/dist/vad/vad-state.d.ts +15 -0
- package/dist/vad/vad-state.js +83 -0
- package/dist/vad/vad-state.mjs +6 -0
- package/package.json +11 -14
- package/.github/workflows/publish.yml +0 -23
- package/src/context/audio-context.ts +0 -69
- package/src/extensibility/plugins.ts +0 -45
- package/src/index.ts +0 -8
- package/src/livekit/integration.ts +0 -61
- package/src/noise-suppression/rnnoise-node.ts +0 -62
- package/src/pipeline/audio-pipeline.ts +0 -154
- package/src/types.ts +0 -167
- package/tsconfig.json +0 -29
|
@@ -1,154 +0,0 @@
|
|
|
1
|
-
import mitt from "mitt";
|
|
2
|
-
import {
|
|
3
|
-
getAudioContext,
|
|
4
|
-
registerPipeline,
|
|
5
|
-
unregisterPipeline,
|
|
6
|
-
} from "../context/audio-context.js";
|
|
7
|
-
import {
|
|
8
|
-
getNoiseSuppressionPlugin,
|
|
9
|
-
getVADPlugin,
|
|
10
|
-
} from "../extensibility/plugins.js";
|
|
11
|
-
import { VADStateMachine } from "../vad/vad-state.js";
|
|
12
|
-
import type {
|
|
13
|
-
AudioPipelineEvents,
|
|
14
|
-
AudioPipelineHandle,
|
|
15
|
-
AudioProcessingConfig,
|
|
16
|
-
VADState,
|
|
17
|
-
} from "../types.js";
|
|
18
|
-
|
|
19
|
-
export async function createAudioPipeline(
|
|
20
|
-
sourceTrack: MediaStreamTrack,
|
|
21
|
-
config: AudioProcessingConfig = {},
|
|
22
|
-
): Promise<AudioPipelineHandle> {
|
|
23
|
-
const context = getAudioContext();
|
|
24
|
-
registerPipeline();
|
|
25
|
-
|
|
26
|
-
// Defaults
|
|
27
|
-
const fullConfig: AudioProcessingConfig = {
|
|
28
|
-
noiseSuppression: { enabled: true, ...config.noiseSuppression },
|
|
29
|
-
vad: { enabled: true, ...config.vad },
|
|
30
|
-
output: {
|
|
31
|
-
speechGain: 1.0,
|
|
32
|
-
silenceGain: 0.0,
|
|
33
|
-
gainRampTime: 0.02,
|
|
34
|
-
...config.output,
|
|
35
|
-
},
|
|
36
|
-
livekit: { manageTrackMute: false, ...config.livekit },
|
|
37
|
-
};
|
|
38
|
-
|
|
39
|
-
// 1. Source
|
|
40
|
-
const sourceStream = new MediaStream([sourceTrack]);
|
|
41
|
-
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
42
|
-
|
|
43
|
-
// 2. Noise Suppression
|
|
44
|
-
const nsPlugin = getNoiseSuppressionPlugin(
|
|
45
|
-
fullConfig.noiseSuppression?.pluginName,
|
|
46
|
-
);
|
|
47
|
-
const nsNode = await nsPlugin.createNode(
|
|
48
|
-
context,
|
|
49
|
-
fullConfig.noiseSuppression,
|
|
50
|
-
);
|
|
51
|
-
|
|
52
|
-
// 3. VAD
|
|
53
|
-
const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
54
|
-
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
55
|
-
const emitter = mitt<AudioPipelineEvents>();
|
|
56
|
-
|
|
57
|
-
const vadNode = await vadPlugin.createNode(
|
|
58
|
-
context,
|
|
59
|
-
fullConfig.vad,
|
|
60
|
-
(prob) => {
|
|
61
|
-
const timestamp = context.currentTime * 1000;
|
|
62
|
-
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
63
|
-
|
|
64
|
-
// Emit if state changed or periodically?
|
|
65
|
-
// For now, emit on change.
|
|
66
|
-
if (
|
|
67
|
-
newState.state !== lastVadState.state ||
|
|
68
|
-
Math.abs(newState.probability - lastVadState.probability) > 0.1
|
|
69
|
-
) {
|
|
70
|
-
emitter.emit("vadChange", newState);
|
|
71
|
-
lastVadState = newState;
|
|
72
|
-
updateGain(newState);
|
|
73
|
-
}
|
|
74
|
-
},
|
|
75
|
-
);
|
|
76
|
-
|
|
77
|
-
let lastVadState: VADState = {
|
|
78
|
-
isSpeaking: false,
|
|
79
|
-
probability: 0,
|
|
80
|
-
state: "silent",
|
|
81
|
-
};
|
|
82
|
-
|
|
83
|
-
// 4. Pipeline Wiring
|
|
84
|
-
// Source -> NS -> Splitter
|
|
85
|
-
// Splitter -> VAD
|
|
86
|
-
// Splitter -> Delay -> Gain -> Destination
|
|
87
|
-
|
|
88
|
-
const splitter = context.createGain(); // Using Gain as splitter (fan-out)
|
|
89
|
-
|
|
90
|
-
sourceNode.connect(nsNode);
|
|
91
|
-
nsNode.connect(splitter);
|
|
92
|
-
|
|
93
|
-
// Path 1: VAD
|
|
94
|
-
splitter.connect(vadNode);
|
|
95
|
-
// vadNode usually doesn't output audio, or we don't connect it to destination.
|
|
96
|
-
|
|
97
|
-
// Path 2: Audio Output
|
|
98
|
-
const delayNode = context.createDelay(1.0); // Max 1 sec
|
|
99
|
-
const preRollSeconds = (fullConfig.vad?.preRollMs ?? 200) / 1000;
|
|
100
|
-
delayNode.delayTime.value = preRollSeconds;
|
|
101
|
-
|
|
102
|
-
const gainNode = context.createGain();
|
|
103
|
-
gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
|
|
104
|
-
|
|
105
|
-
const destination = context.createMediaStreamDestination();
|
|
106
|
-
|
|
107
|
-
splitter.connect(delayNode);
|
|
108
|
-
delayNode.connect(gainNode);
|
|
109
|
-
gainNode.connect(destination);
|
|
110
|
-
|
|
111
|
-
// Helper to update gain
|
|
112
|
-
function updateGain(state: VADState) {
|
|
113
|
-
const { speechGain, silenceGain, gainRampTime } = fullConfig.output!;
|
|
114
|
-
const targetGain = state.isSpeaking
|
|
115
|
-
? (speechGain ?? 1.0)
|
|
116
|
-
: (silenceGain ?? 0.0);
|
|
117
|
-
|
|
118
|
-
// Ramp to target
|
|
119
|
-
const now = context.currentTime;
|
|
120
|
-
gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime ?? 0.02);
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
// Handle disposal
|
|
124
|
-
function dispose() {
|
|
125
|
-
sourceNode.disconnect();
|
|
126
|
-
nsNode.disconnect();
|
|
127
|
-
splitter.disconnect();
|
|
128
|
-
vadNode.disconnect();
|
|
129
|
-
delayNode.disconnect();
|
|
130
|
-
gainNode.disconnect();
|
|
131
|
-
|
|
132
|
-
// Stop tracks? No, we don't own the source track.
|
|
133
|
-
// But we own the destination track.
|
|
134
|
-
destination.stream.getTracks().forEach((t) => t.stop());
|
|
135
|
-
|
|
136
|
-
unregisterPipeline();
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
return {
|
|
140
|
-
processedTrack: destination.stream.getAudioTracks()[0]!,
|
|
141
|
-
events: emitter,
|
|
142
|
-
get state() {
|
|
143
|
-
return lastVadState;
|
|
144
|
-
},
|
|
145
|
-
setConfig: (newConfig) => {
|
|
146
|
-
// TODO: Implement runtime config updates
|
|
147
|
-
// For now, just update the VAD state machine config
|
|
148
|
-
if (newConfig.vad) {
|
|
149
|
-
vadStateMachine.updateConfig(newConfig.vad);
|
|
150
|
-
}
|
|
151
|
-
},
|
|
152
|
-
dispose,
|
|
153
|
-
};
|
|
154
|
-
}
|
package/src/types.ts
DELETED
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
import type { LocalAudioTrack, TrackPublication } from "livekit-client";
|
|
2
|
-
import type { Emitter } from "mitt";
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Configuration for the audio processing pipeline.
|
|
6
|
-
*/
|
|
7
|
-
export interface AudioProcessingConfig {
|
|
8
|
-
/**
|
|
9
|
-
* Noise suppression configuration.
|
|
10
|
-
*/
|
|
11
|
-
noiseSuppression?: {
|
|
12
|
-
enabled: boolean;
|
|
13
|
-
/**
|
|
14
|
-
* Path or URL to the RNNoise WASM binary.
|
|
15
|
-
* If not provided, the default from @sapphi-red/web-noise-suppressor will be used (if bundler supports it).
|
|
16
|
-
*/
|
|
17
|
-
wasmUrl?: string;
|
|
18
|
-
/**
|
|
19
|
-
* Path or URL to the RNNoise worklet script.
|
|
20
|
-
*/
|
|
21
|
-
workletUrl?: string;
|
|
22
|
-
/**
|
|
23
|
-
* Plugin name to use. Defaults to 'rnnoise-ns'.
|
|
24
|
-
*/
|
|
25
|
-
pluginName?: string;
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
/**
|
|
29
|
-
* Voice Activity Detection (VAD) configuration.
|
|
30
|
-
*/
|
|
31
|
-
vad?: {
|
|
32
|
-
enabled: boolean;
|
|
33
|
-
/**
|
|
34
|
-
* Plugin name to use. Defaults to 'rnnoise-vad' or 'energy-vad'.
|
|
35
|
-
*/
|
|
36
|
-
pluginName?: string;
|
|
37
|
-
/**
|
|
38
|
-
* Probability threshold for speech onset (0-1).
|
|
39
|
-
* Default: 0.5
|
|
40
|
-
*/
|
|
41
|
-
startThreshold?: number;
|
|
42
|
-
/**
|
|
43
|
-
* Probability threshold for speech offset (0-1).
|
|
44
|
-
* Default: 0.4
|
|
45
|
-
*/
|
|
46
|
-
stopThreshold?: number;
|
|
47
|
-
/**
|
|
48
|
-
* Time in ms to wait after speech stops before considering it silent.
|
|
49
|
-
* Default: 300ms
|
|
50
|
-
*/
|
|
51
|
-
hangoverMs?: number;
|
|
52
|
-
/**
|
|
53
|
-
* Time in ms of audio to buffer before speech onset to avoid cutting the start.
|
|
54
|
-
* Default: 200ms
|
|
55
|
-
*/
|
|
56
|
-
preRollMs?: number;
|
|
57
|
-
};
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Output gain and muting configuration.
|
|
61
|
-
*/
|
|
62
|
-
output?: {
|
|
63
|
-
/**
|
|
64
|
-
* Gain to apply when speaking (0-1+). Default: 1.0
|
|
65
|
-
*/
|
|
66
|
-
speechGain?: number;
|
|
67
|
-
/**
|
|
68
|
-
* Gain to apply when silent (0-1). Default: 0.0 (mute)
|
|
69
|
-
*/
|
|
70
|
-
silenceGain?: number;
|
|
71
|
-
/**
|
|
72
|
-
* Time in seconds to ramp gain changes. Default: 0.02
|
|
73
|
-
*/
|
|
74
|
-
gainRampTime?: number;
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
/**
|
|
78
|
-
* LiveKit integration configuration.
|
|
79
|
-
*/
|
|
80
|
-
livekit?: {
|
|
81
|
-
/**
|
|
82
|
-
* Whether to call track.mute()/unmute() on the LocalAudioTrack based on VAD.
|
|
83
|
-
* This saves bandwidth but has more signaling overhead.
|
|
84
|
-
* Default: false (uses gain gating only)
|
|
85
|
-
*/
|
|
86
|
-
manageTrackMute?: boolean;
|
|
87
|
-
};
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* Represents the state of Voice Activity Detection.
|
|
92
|
-
*/
|
|
93
|
-
export interface VADState {
|
|
94
|
-
/**
|
|
95
|
-
* Whether speech is currently detected (after hysteresis).
|
|
96
|
-
*/
|
|
97
|
-
isSpeaking: boolean;
|
|
98
|
-
/**
|
|
99
|
-
* Raw probability of speech from the VAD model (0-1).
|
|
100
|
-
*/
|
|
101
|
-
probability: number;
|
|
102
|
-
/**
|
|
103
|
-
* Current state enum.
|
|
104
|
-
*/
|
|
105
|
-
state: "silent" | "speech_starting" | "speaking" | "speech_ending";
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
/**
|
|
109
|
-
* Events emitted by the audio pipeline.
|
|
110
|
-
*/
|
|
111
|
-
export type AudioPipelineEvents = {
|
|
112
|
-
vadChange: VADState;
|
|
113
|
-
error: Error;
|
|
114
|
-
};
|
|
115
|
-
|
|
116
|
-
/**
|
|
117
|
-
* Handle to a running audio processing pipeline.
|
|
118
|
-
*/
|
|
119
|
-
export interface AudioPipelineHandle {
|
|
120
|
-
/**
|
|
121
|
-
* The processed MediaStreamTrack.
|
|
122
|
-
*/
|
|
123
|
-
readonly processedTrack: MediaStreamTrack;
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Event emitter for VAD state and errors.
|
|
127
|
-
*/
|
|
128
|
-
readonly events: Emitter<AudioPipelineEvents>;
|
|
129
|
-
|
|
130
|
-
/**
|
|
131
|
-
* Current VAD state.
|
|
132
|
-
*/
|
|
133
|
-
readonly state: VADState;
|
|
134
|
-
|
|
135
|
-
/**
|
|
136
|
-
* Update configuration at runtime.
|
|
137
|
-
*/
|
|
138
|
-
setConfig(config: Partial<AudioProcessingConfig>): void;
|
|
139
|
-
|
|
140
|
-
/**
|
|
141
|
-
* Stop processing and release resources.
|
|
142
|
-
*/
|
|
143
|
-
dispose(): void;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
/**
|
|
147
|
-
* Interface for a Noise Suppression Plugin.
|
|
148
|
-
*/
|
|
149
|
-
export interface NoiseSuppressionPlugin {
|
|
150
|
-
name: string;
|
|
151
|
-
createNode(
|
|
152
|
-
context: AudioContext,
|
|
153
|
-
config: AudioProcessingConfig["noiseSuppression"],
|
|
154
|
-
): Promise<AudioNode>;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
/**
|
|
158
|
-
* Interface for a VAD Plugin.
|
|
159
|
-
*/
|
|
160
|
-
export interface VADPlugin {
|
|
161
|
-
name: string;
|
|
162
|
-
createNode(
|
|
163
|
-
context: AudioContext,
|
|
164
|
-
config: AudioProcessingConfig["vad"],
|
|
165
|
-
onDecision: (probability: number) => void,
|
|
166
|
-
): Promise<AudioNode>;
|
|
167
|
-
}
|
package/tsconfig.json
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"compilerOptions": {
|
|
3
|
-
// Environment setup & latest features
|
|
4
|
-
"lib": ["ESNext", "DOM", "DOM.Iterable"],
|
|
5
|
-
"target": "ESNext",
|
|
6
|
-
"module": "Preserve",
|
|
7
|
-
"moduleDetection": "force",
|
|
8
|
-
"jsx": "react-jsx",
|
|
9
|
-
"allowJs": true,
|
|
10
|
-
|
|
11
|
-
// Bundler mode
|
|
12
|
-
"moduleResolution": "bundler",
|
|
13
|
-
"allowImportingTsExtensions": true,
|
|
14
|
-
"verbatimModuleSyntax": true,
|
|
15
|
-
"noEmit": true,
|
|
16
|
-
|
|
17
|
-
// Best practices
|
|
18
|
-
"strict": true,
|
|
19
|
-
"skipLibCheck": true,
|
|
20
|
-
"noFallthroughCasesInSwitch": true,
|
|
21
|
-
"noUncheckedIndexedAccess": true,
|
|
22
|
-
"noImplicitOverride": true,
|
|
23
|
-
|
|
24
|
-
// Some stricter flags (disabled by default)
|
|
25
|
-
"noUnusedLocals": false,
|
|
26
|
-
"noUnusedParameters": false,
|
|
27
|
-
"noPropertyAccessFromIndexSignature": false
|
|
28
|
-
}
|
|
29
|
-
}
|