@storyteller-platform/ghost-story 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +611 -0
- package/README.md +18 -0
- package/dist/api/APIOptions.cjs +16 -0
- package/dist/api/APIOptions.d.cts +18 -0
- package/dist/api/APIOptions.d.ts +18 -0
- package/dist/api/APIOptions.js +0 -0
- package/dist/api/Recognition.cjs +263 -0
- package/dist/api/Recognition.d.cts +77 -0
- package/dist/api/Recognition.d.ts +77 -0
- package/dist/api/Recognition.js +233 -0
- package/dist/api/VoiceActivityDetection.cjs +77 -0
- package/dist/api/VoiceActivityDetection.d.cts +24 -0
- package/dist/api/VoiceActivityDetection.d.ts +24 -0
- package/dist/api/VoiceActivityDetection.js +43 -0
- package/dist/audio/AudioConverter.cjs +331 -0
- package/dist/audio/AudioConverter.d.cts +53 -0
- package/dist/audio/AudioConverter.d.ts +53 -0
- package/dist/audio/AudioConverter.js +310 -0
- package/dist/audio/AudioFormat.cjs +151 -0
- package/dist/audio/AudioFormat.d.cts +25 -0
- package/dist/audio/AudioFormat.d.ts +25 -0
- package/dist/audio/AudioFormat.js +123 -0
- package/dist/audio/AudioSource.cjs +119 -0
- package/dist/audio/AudioSource.d.cts +33 -0
- package/dist/audio/AudioSource.d.ts +33 -0
- package/dist/audio/AudioSource.js +88 -0
- package/dist/audio/index.cjs +74 -0
- package/dist/audio/index.d.cts +6 -0
- package/dist/audio/index.d.ts +6 -0
- package/dist/audio/index.js +54 -0
- package/dist/cli/bin.cjs +277 -0
- package/dist/cli/bin.d.cts +1 -0
- package/dist/cli/bin.d.ts +1 -0
- package/dist/cli/bin.js +275 -0
- package/dist/cli/config.cjs +347 -0
- package/dist/cli/config.d.cts +33 -0
- package/dist/cli/config.d.ts +33 -0
- package/dist/cli/config.js +285 -0
- package/dist/cli/install.cjs +334 -0
- package/dist/cli/install.d.cts +62 -0
- package/dist/cli/install.d.ts +62 -0
- package/dist/cli/install.js +316 -0
- package/dist/cli/whisper-server.cjs +172 -0
- package/dist/cli/whisper-server.d.cts +24 -0
- package/dist/cli/whisper-server.d.ts +24 -0
- package/dist/cli/whisper-server.js +152 -0
- package/dist/config.cjs +60 -0
- package/dist/config.d.cts +12 -0
- package/dist/config.d.ts +12 -0
- package/dist/config.js +32 -0
- package/dist/convert.cjs +88 -0
- package/dist/convert.d.cts +12 -0
- package/dist/convert.d.ts +12 -0
- package/dist/convert.js +63 -0
- package/dist/encodings/Ascii.cjs +75 -0
- package/dist/encodings/Ascii.d.cts +13 -0
- package/dist/encodings/Ascii.d.ts +13 -0
- package/dist/encodings/Ascii.js +48 -0
- package/dist/encodings/Base64.cjs +155 -0
- package/dist/encodings/Base64.d.cts +5 -0
- package/dist/encodings/Base64.d.ts +5 -0
- package/dist/encodings/Base64.js +129 -0
- package/dist/encodings/TextEncodingsCommon.cjs +16 -0
- package/dist/encodings/TextEncodingsCommon.d.cts +6 -0
- package/dist/encodings/TextEncodingsCommon.d.ts +6 -0
- package/dist/encodings/TextEncodingsCommon.js +0 -0
- package/dist/index.cjs +153 -0
- package/dist/index.d.cts +15 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +140 -0
- package/dist/recognition/AmazonTranscribeSTT.cjs +188 -0
- package/dist/recognition/AmazonTranscribeSTT.d.cts +21 -0
- package/dist/recognition/AmazonTranscribeSTT.d.ts +21 -0
- package/dist/recognition/AmazonTranscribeSTT.js +160 -0
- package/dist/recognition/AzureCognitiveServicesSTT.cjs +124 -0
- package/dist/recognition/AzureCognitiveServicesSTT.d.cts +21 -0
- package/dist/recognition/AzureCognitiveServicesSTT.d.ts +21 -0
- package/dist/recognition/AzureCognitiveServicesSTT.js +95 -0
- package/dist/recognition/DeepgramSTT.cjs +172 -0
- package/dist/recognition/DeepgramSTT.d.cts +23 -0
- package/dist/recognition/DeepgramSTT.d.ts +23 -0
- package/dist/recognition/DeepgramSTT.js +153 -0
- package/dist/recognition/GoogleCloudSTT.cjs +125 -0
- package/dist/recognition/GoogleCloudSTT.d.cts +35 -0
- package/dist/recognition/GoogleCloudSTT.d.ts +35 -0
- package/dist/recognition/GoogleCloudSTT.js +107 -0
- package/dist/recognition/OpenAICloudSTT.cjs +180 -0
- package/dist/recognition/OpenAICloudSTT.d.cts +29 -0
- package/dist/recognition/OpenAICloudSTT.d.ts +29 -0
- package/dist/recognition/OpenAICloudSTT.js +150 -0
- package/dist/recognition/WhisperCppSTT.cjs +296 -0
- package/dist/recognition/WhisperCppSTT.d.cts +40 -0
- package/dist/recognition/WhisperCppSTT.d.ts +40 -0
- package/dist/recognition/WhisperCppSTT.js +275 -0
- package/dist/recognition/WhisperServerSTT.cjs +119 -0
- package/dist/recognition/WhisperServerSTT.d.cts +24 -0
- package/dist/recognition/WhisperServerSTT.d.ts +24 -0
- package/dist/recognition/WhisperServerSTT.js +105 -0
- package/dist/utilities/FileSystem.cjs +54 -0
- package/dist/utilities/FileSystem.d.cts +3 -0
- package/dist/utilities/FileSystem.d.ts +3 -0
- package/dist/utilities/FileSystem.js +20 -0
- package/dist/utilities/Locale.cjs +46 -0
- package/dist/utilities/Locale.d.cts +9 -0
- package/dist/utilities/Locale.d.ts +9 -0
- package/dist/utilities/Locale.js +20 -0
- package/dist/utilities/ObjectUtilities.cjs +41 -0
- package/dist/utilities/ObjectUtilities.d.cts +3 -0
- package/dist/utilities/ObjectUtilities.d.ts +3 -0
- package/dist/utilities/ObjectUtilities.js +7 -0
- package/dist/utilities/Timeline.cjs +120 -0
- package/dist/utilities/Timeline.d.cts +23 -0
- package/dist/utilities/Timeline.d.ts +23 -0
- package/dist/utilities/Timeline.js +94 -0
- package/dist/utilities/Timing.cjs +287 -0
- package/dist/utilities/Timing.d.cts +64 -0
- package/dist/utilities/Timing.d.ts +64 -0
- package/dist/utilities/Timing.js +256 -0
- package/dist/utilities/WhisperTimeline.cjs +344 -0
- package/dist/utilities/WhisperTimeline.d.cts +86 -0
- package/dist/utilities/WhisperTimeline.d.ts +86 -0
- package/dist/utilities/WhisperTimeline.js +313 -0
- package/dist/vad/ActiveGate.cjs +357 -0
- package/dist/vad/ActiveGate.d.cts +53 -0
- package/dist/vad/ActiveGate.d.ts +53 -0
- package/dist/vad/ActiveGate.js +329 -0
- package/dist/vad/ActiveGateOg.cjs +1366 -0
- package/dist/vad/ActiveGateOg.d.cts +33 -0
- package/dist/vad/ActiveGateOg.d.ts +33 -0
- package/dist/vad/ActiveGateOg.js +1341 -0
- package/dist/vad/Silero.cjs +174 -0
- package/dist/vad/Silero.d.cts +25 -0
- package/dist/vad/Silero.d.ts +25 -0
- package/dist/vad/Silero.js +153 -0
- package/package.json +125 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
|
|
3
|
+
interface VadOptions {
|
|
4
|
+
lowCutoff?: number;
|
|
5
|
+
highCutoff?: number;
|
|
6
|
+
positiveAdaptationRate?: number;
|
|
7
|
+
negativeAdaptationRate?: number;
|
|
8
|
+
peakLoudnessDecay?: number;
|
|
9
|
+
backwardExtensionDuration?: number;
|
|
10
|
+
relativeThreshold?: number;
|
|
11
|
+
}
|
|
12
|
+
declare const defaultVadOptions: Required<VadOptions>;
|
|
13
|
+
interface VadSegment {
|
|
14
|
+
startTime: number;
|
|
15
|
+
endTime: number;
|
|
16
|
+
isActive: boolean;
|
|
17
|
+
}
|
|
18
|
+
type RawAudio = {
|
|
19
|
+
audioChannels: Float32Array[];
|
|
20
|
+
sampleRate: number;
|
|
21
|
+
};
|
|
22
|
+
declare function detectVoiceActivity(rawAudio: RawAudio, options?: VadOptions): VadSegment[];
|
|
23
|
+
interface StreamOptions extends VadOptions {
|
|
24
|
+
sampleRate?: number;
|
|
25
|
+
channelCount?: number;
|
|
26
|
+
}
|
|
27
|
+
declare function vadFromFile(path: string, options?: StreamOptions): Promise<VadSegment[]>;
|
|
28
|
+
declare function vadFromStream(stream: Readable, options?: StreamOptions): AsyncGenerator<VadSegment>;
|
|
29
|
+
declare class StreamingVad {
|
|
30
|
+
readonly sampleRate: number;
|
|
31
|
+
readonly channelCount: number;
|
|
32
|
+
private readonly opts;
|
|
33
|
+
private readonly channelFilters;
|
|
34
|
+
private readonly loudness;
|
|
35
|
+
private readonly minLoudness;
|
|
36
|
+
private readonly maxLoudness;
|
|
37
|
+
private readonly frameDuration;
|
|
38
|
+
private readonly ticksPerSecond;
|
|
39
|
+
private readonly backwardFrameCount;
|
|
40
|
+
private frameBuffer;
|
|
41
|
+
private currentSampleIndex;
|
|
42
|
+
private emittedUpToTime;
|
|
43
|
+
private pendingSegment;
|
|
44
|
+
constructor(sampleRate: number, channelCount: number, options?: VadOptions);
|
|
45
|
+
process(sample: number, channel: number): void;
|
|
46
|
+
flush(final?: boolean): VadSegment[];
|
|
47
|
+
private computeActivity;
|
|
48
|
+
finalize(): VadSegment[];
|
|
49
|
+
getSegments(): VadSegment[];
|
|
50
|
+
reset(): void;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export { type RawAudio, type StreamOptions, StreamingVad, type VadOptions, type VadSegment, defaultVadOptions, detectVoiceActivity, vadFromFile, vadFromStream };
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
|
|
3
|
+
interface VadOptions {
|
|
4
|
+
lowCutoff?: number;
|
|
5
|
+
highCutoff?: number;
|
|
6
|
+
positiveAdaptationRate?: number;
|
|
7
|
+
negativeAdaptationRate?: number;
|
|
8
|
+
peakLoudnessDecay?: number;
|
|
9
|
+
backwardExtensionDuration?: number;
|
|
10
|
+
relativeThreshold?: number;
|
|
11
|
+
}
|
|
12
|
+
declare const defaultVadOptions: Required<VadOptions>;
|
|
13
|
+
interface VadSegment {
|
|
14
|
+
startTime: number;
|
|
15
|
+
endTime: number;
|
|
16
|
+
isActive: boolean;
|
|
17
|
+
}
|
|
18
|
+
type RawAudio = {
|
|
19
|
+
audioChannels: Float32Array[];
|
|
20
|
+
sampleRate: number;
|
|
21
|
+
};
|
|
22
|
+
declare function detectVoiceActivity(rawAudio: RawAudio, options?: VadOptions): VadSegment[];
|
|
23
|
+
interface StreamOptions extends VadOptions {
|
|
24
|
+
sampleRate?: number;
|
|
25
|
+
channelCount?: number;
|
|
26
|
+
}
|
|
27
|
+
declare function vadFromFile(path: string, options?: StreamOptions): Promise<VadSegment[]>;
|
|
28
|
+
declare function vadFromStream(stream: Readable, options?: StreamOptions): AsyncGenerator<VadSegment>;
|
|
29
|
+
declare class StreamingVad {
|
|
30
|
+
readonly sampleRate: number;
|
|
31
|
+
readonly channelCount: number;
|
|
32
|
+
private readonly opts;
|
|
33
|
+
private readonly channelFilters;
|
|
34
|
+
private readonly loudness;
|
|
35
|
+
private readonly minLoudness;
|
|
36
|
+
private readonly maxLoudness;
|
|
37
|
+
private readonly frameDuration;
|
|
38
|
+
private readonly ticksPerSecond;
|
|
39
|
+
private readonly backwardFrameCount;
|
|
40
|
+
private frameBuffer;
|
|
41
|
+
private currentSampleIndex;
|
|
42
|
+
private emittedUpToTime;
|
|
43
|
+
private pendingSegment;
|
|
44
|
+
constructor(sampleRate: number, channelCount: number, options?: VadOptions);
|
|
45
|
+
process(sample: number, channel: number): void;
|
|
46
|
+
flush(final?: boolean): VadSegment[];
|
|
47
|
+
private computeActivity;
|
|
48
|
+
finalize(): VadSegment[];
|
|
49
|
+
getSegments(): VadSegment[];
|
|
50
|
+
reset(): void;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export { type RawAudio, type StreamOptions, StreamingVad, type VadOptions, type VadSegment, defaultVadOptions, detectVoiceActivity, vadFromFile, vadFromStream };
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
import { Converter } from "ffmpeg-stream";
|
|
2
|
+
const defaultVadOptions = {
|
|
3
|
+
lowCutoff: 100,
|
|
4
|
+
highCutoff: 1e3,
|
|
5
|
+
positiveAdaptationRate: 400,
|
|
6
|
+
negativeAdaptationRate: 10,
|
|
7
|
+
peakLoudnessDecay: 4,
|
|
8
|
+
backwardExtensionDuration: 0.2,
|
|
9
|
+
relativeThreshold: -15
|
|
10
|
+
};
|
|
11
|
+
function detectVoiceActivity(rawAudio, options = {}) {
|
|
12
|
+
var _a;
|
|
13
|
+
const channelCount = rawAudio.audioChannels.length;
|
|
14
|
+
const firstChannel = rawAudio.audioChannels[0];
|
|
15
|
+
if (!firstChannel || channelCount === 0) return [];
|
|
16
|
+
const vad = new StreamingVad(rawAudio.sampleRate, channelCount, options);
|
|
17
|
+
for (let i = 0; i < firstChannel.length; i++) {
|
|
18
|
+
for (let ch = 0; ch < channelCount; ch++) {
|
|
19
|
+
const sample = ((_a = rawAudio.audioChannels[ch]) == null ? void 0 : _a[i]) ?? 0;
|
|
20
|
+
vad.process(sample, ch);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return vad.finalize();
|
|
24
|
+
}
|
|
25
|
+
const defaultStreamOptions = {
|
|
26
|
+
sampleRate: 16e3,
|
|
27
|
+
channelCount: 1
|
|
28
|
+
};
|
|
29
|
+
async function vadFromFile(path, options = {}) {
|
|
30
|
+
const converter = new Converter();
|
|
31
|
+
converter.createInputFromFile(path);
|
|
32
|
+
const outputStream = converter.createOutputStream({
|
|
33
|
+
f: "f32le",
|
|
34
|
+
ar: "16000",
|
|
35
|
+
ac: "1",
|
|
36
|
+
acodec: "pcm_f32le"
|
|
37
|
+
});
|
|
38
|
+
const segments = [];
|
|
39
|
+
const run = converter.run();
|
|
40
|
+
try {
|
|
41
|
+
for await (const seg of vadFromStream(outputStream, options)) {
|
|
42
|
+
segments.push(seg);
|
|
43
|
+
}
|
|
44
|
+
} catch (error) {
|
|
45
|
+
converter.kill();
|
|
46
|
+
throw error;
|
|
47
|
+
} finally {
|
|
48
|
+
await run;
|
|
49
|
+
}
|
|
50
|
+
return segments;
|
|
51
|
+
}
|
|
52
|
+
async function* vadFromStream(stream, options = {}) {
|
|
53
|
+
const sampleRate = options.sampleRate ?? defaultStreamOptions.sampleRate;
|
|
54
|
+
const channelCount = options.channelCount ?? defaultStreamOptions.channelCount;
|
|
55
|
+
const vad = new StreamingVad(sampleRate, channelCount, options);
|
|
56
|
+
let leftover = Buffer.alloc(0);
|
|
57
|
+
for await (const chunk of stream) {
|
|
58
|
+
const combined = Buffer.concat([leftover, chunk]);
|
|
59
|
+
const bytesPerSample = 4;
|
|
60
|
+
const bytesPerFrame = bytesPerSample * channelCount;
|
|
61
|
+
const completeFrames = Math.floor(combined.length / bytesPerFrame);
|
|
62
|
+
const usableBytes = completeFrames * bytesPerFrame;
|
|
63
|
+
for (let offset = 0; offset < usableBytes; offset += bytesPerFrame) {
|
|
64
|
+
for (let ch = 0; ch < channelCount; ch++) {
|
|
65
|
+
const sample = combined.readFloatLE(offset + ch * bytesPerSample);
|
|
66
|
+
vad.process(sample, ch);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
leftover = combined.subarray(usableBytes);
|
|
70
|
+
for (const seg of vad.flush()) {
|
|
71
|
+
yield seg;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
for (const seg of vad.flush(true)) {
|
|
75
|
+
yield seg;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
class StreamingVad {
|
|
79
|
+
constructor(sampleRate, channelCount, options = {}) {
|
|
80
|
+
this.sampleRate = sampleRate;
|
|
81
|
+
this.channelCount = channelCount;
|
|
82
|
+
this.opts = { ...defaultVadOptions, ...options };
|
|
83
|
+
this.ticksPerSecond = sampleRate * channelCount;
|
|
84
|
+
this.backwardFrameCount = Math.ceil(
|
|
85
|
+
this.opts.backwardExtensionDuration / this.frameDuration
|
|
86
|
+
);
|
|
87
|
+
this.channelFilters = Array.from({ length: channelCount }, () => ({
|
|
88
|
+
highpass: createBiquadFilter("highpass", sampleRate, this.opts.lowCutoff),
|
|
89
|
+
lowpass: createBiquadFilter("lowpass", sampleRate, this.opts.highCutoff)
|
|
90
|
+
}));
|
|
91
|
+
const initialPower = dbToGain(-60) ** 2;
|
|
92
|
+
this.loudness = new SmoothEstimator(
|
|
93
|
+
this.opts.positiveAdaptationRate / this.ticksPerSecond,
|
|
94
|
+
this.opts.negativeAdaptationRate / this.ticksPerSecond,
|
|
95
|
+
initialPower
|
|
96
|
+
);
|
|
97
|
+
this.minLoudness = new DecayingPeak(
|
|
98
|
+
"min",
|
|
99
|
+
-60,
|
|
100
|
+
this.opts.peakLoudnessDecay / this.ticksPerSecond
|
|
101
|
+
);
|
|
102
|
+
this.maxLoudness = new DecayingPeak(
|
|
103
|
+
"max",
|
|
104
|
+
-60,
|
|
105
|
+
this.opts.peakLoudnessDecay / this.ticksPerSecond
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
opts;
|
|
109
|
+
channelFilters;
|
|
110
|
+
loudness;
|
|
111
|
+
minLoudness;
|
|
112
|
+
maxLoudness;
|
|
113
|
+
frameDuration = 0.01;
|
|
114
|
+
ticksPerSecond;
|
|
115
|
+
backwardFrameCount;
|
|
116
|
+
frameBuffer = [];
|
|
117
|
+
currentSampleIndex = 0;
|
|
118
|
+
emittedUpToTime = 0;
|
|
119
|
+
pendingSegment = null;
|
|
120
|
+
process(sample, channel) {
|
|
121
|
+
const filter = this.channelFilters[channel];
|
|
122
|
+
if (!filter) return;
|
|
123
|
+
sample = filter.highpass.process(sample);
|
|
124
|
+
sample = filter.lowpass.process(sample);
|
|
125
|
+
this.loudness.update(sample ** 2);
|
|
126
|
+
const currentDb = gainToDb(
|
|
127
|
+
Math.sqrt(Math.max(this.loudness.value, dbToGain(-60) ** 2))
|
|
128
|
+
);
|
|
129
|
+
this.minLoudness.update(currentDb);
|
|
130
|
+
if (currentDb >= -60) {
|
|
131
|
+
this.maxLoudness.update(currentDb);
|
|
132
|
+
}
|
|
133
|
+
const time = this.currentSampleIndex / this.sampleRate;
|
|
134
|
+
const lastFrame = this.frameBuffer[this.frameBuffer.length - 1];
|
|
135
|
+
if (!lastFrame || time > lastFrame.time + this.frameDuration) {
|
|
136
|
+
this.frameBuffer.push({
|
|
137
|
+
time,
|
|
138
|
+
loudness: currentDb,
|
|
139
|
+
maxLoudness: this.maxLoudness.value
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
if (channel === this.channelCount - 1) {
|
|
143
|
+
this.currentSampleIndex++;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
// flush finalized segments, keeping buffer for backward extension
|
|
147
|
+
// call with final=true when done processing to get remaining segments
|
|
148
|
+
flush(final = false) {
|
|
149
|
+
var _a;
|
|
150
|
+
const segments = [];
|
|
151
|
+
const frameCount = this.frameBuffer.length;
|
|
152
|
+
if (frameCount === 0) return segments;
|
|
153
|
+
const finalizeCount = final ? frameCount : Math.max(0, frameCount - this.backwardFrameCount);
|
|
154
|
+
if (finalizeCount === 0) return segments;
|
|
155
|
+
const active = this.computeActivity(finalizeCount, final);
|
|
156
|
+
for (let i = 0; i < finalizeCount; i++) {
|
|
157
|
+
const frame = this.frameBuffer[i];
|
|
158
|
+
if (!frame) continue;
|
|
159
|
+
const isActive = active[i] ?? false;
|
|
160
|
+
const startTime = frame.time;
|
|
161
|
+
const endTime = startTime + this.frameDuration;
|
|
162
|
+
if (this.pendingSegment && this.pendingSegment.isActive === isActive) {
|
|
163
|
+
this.pendingSegment.endTime = endTime;
|
|
164
|
+
} else {
|
|
165
|
+
if (this.pendingSegment) {
|
|
166
|
+
segments.push(this.pendingSegment);
|
|
167
|
+
}
|
|
168
|
+
this.pendingSegment = { startTime, endTime, isActive };
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
if (final && this.pendingSegment) {
|
|
172
|
+
segments.push(this.pendingSegment);
|
|
173
|
+
this.pendingSegment = null;
|
|
174
|
+
}
|
|
175
|
+
this.frameBuffer.splice(0, finalizeCount);
|
|
176
|
+
this.emittedUpToTime = ((_a = segments[segments.length - 1]) == null ? void 0 : _a.endTime) ?? this.emittedUpToTime;
|
|
177
|
+
return segments;
|
|
178
|
+
}
|
|
179
|
+
computeActivity(count, includeBuffer) {
|
|
180
|
+
const active = new Array(count).fill(false);
|
|
181
|
+
const lookAheadEnd = includeBuffer ? this.frameBuffer.length : Math.min(count + this.backwardFrameCount, this.frameBuffer.length);
|
|
182
|
+
let extendTo = count;
|
|
183
|
+
for (let i = lookAheadEnd - 1; i >= 0; i--) {
|
|
184
|
+
const frame = this.frameBuffer[i];
|
|
185
|
+
if (!frame) continue;
|
|
186
|
+
const refLoudness = Math.max(frame.maxLoudness, -30);
|
|
187
|
+
const isLoud = frame.loudness >= refLoudness + this.opts.relativeThreshold;
|
|
188
|
+
if (isLoud) {
|
|
189
|
+
extendTo = Math.max(i - this.backwardFrameCount, 0);
|
|
190
|
+
}
|
|
191
|
+
if (i < count) {
|
|
192
|
+
if (i >= extendTo || isLoud) {
|
|
193
|
+
active[i] = true;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return active;
|
|
198
|
+
}
|
|
199
|
+
finalize() {
|
|
200
|
+
const segments = this.flush(true);
|
|
201
|
+
return segments;
|
|
202
|
+
}
|
|
203
|
+
getSegments() {
|
|
204
|
+
return this.flush(true);
|
|
205
|
+
}
|
|
206
|
+
// reset all state to process a new audio stream
|
|
207
|
+
reset() {
|
|
208
|
+
this.frameBuffer = [];
|
|
209
|
+
this.currentSampleIndex = 0;
|
|
210
|
+
this.emittedUpToTime = 0;
|
|
211
|
+
this.pendingSegment = null;
|
|
212
|
+
this.loudness.reset(dbToGain(-60) ** 2);
|
|
213
|
+
this.minLoudness.reset(-60);
|
|
214
|
+
this.maxLoudness.reset(-60);
|
|
215
|
+
for (const filter of this.channelFilters) {
|
|
216
|
+
filter.highpass.reset();
|
|
217
|
+
filter.lowpass.reset();
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
class SmoothEstimator {
|
|
222
|
+
constructor(upRate, downRate, initial) {
|
|
223
|
+
this.upRate = upRate;
|
|
224
|
+
this.downRate = downRate;
|
|
225
|
+
this.value = initial;
|
|
226
|
+
}
|
|
227
|
+
value;
|
|
228
|
+
update(target) {
|
|
229
|
+
const diff = target - this.value;
|
|
230
|
+
const rate = diff >= 0 ? this.upRate : this.downRate;
|
|
231
|
+
this.value += diff * rate;
|
|
232
|
+
}
|
|
233
|
+
reset(value) {
|
|
234
|
+
this.value = value;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
class DecayingPeak {
|
|
238
|
+
constructor(kind, initial, decay) {
|
|
239
|
+
this.kind = kind;
|
|
240
|
+
this.decay = decay;
|
|
241
|
+
this.value = initial;
|
|
242
|
+
}
|
|
243
|
+
value;
|
|
244
|
+
update(v) {
|
|
245
|
+
if (this.kind === "max") {
|
|
246
|
+
this.value -= this.decay;
|
|
247
|
+
this.value = Math.max(v, this.value);
|
|
248
|
+
} else {
|
|
249
|
+
this.value += this.decay;
|
|
250
|
+
this.value = Math.min(v, this.value);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
reset(value) {
|
|
254
|
+
this.value = value;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
class BiquadFilter {
|
|
258
|
+
constructor(c) {
|
|
259
|
+
this.c = c;
|
|
260
|
+
}
|
|
261
|
+
x1 = 0;
|
|
262
|
+
x2 = 0;
|
|
263
|
+
y1 = 0;
|
|
264
|
+
y2 = 0;
|
|
265
|
+
process(x) {
|
|
266
|
+
const y = this.c.b0 * x + this.c.b1 * this.x1 + this.c.b2 * this.x2 - this.c.a1 * this.y1 - this.c.a2 * this.y2;
|
|
267
|
+
this.x2 = this.x1;
|
|
268
|
+
this.x1 = x;
|
|
269
|
+
this.y2 = this.y1;
|
|
270
|
+
this.y1 = y;
|
|
271
|
+
return y;
|
|
272
|
+
}
|
|
273
|
+
reset() {
|
|
274
|
+
this.x1 = 0;
|
|
275
|
+
this.x2 = 0;
|
|
276
|
+
this.y1 = 0;
|
|
277
|
+
this.y2 = 0;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
function createBiquadFilter(type, sampleRate, freq, q = 0.7071) {
|
|
281
|
+
const nyquist = sampleRate / 2;
|
|
282
|
+
const w = Math.min(freq / nyquist, 1);
|
|
283
|
+
if (w === 1) {
|
|
284
|
+
return new BiquadFilter(
|
|
285
|
+
type === "lowpass" ? { b0: 1, b1: 0, b2: 0, a1: 0, a2: 0 } : { b0: 0, b1: 0, b2: 0, a1: 0, a2: 0 }
|
|
286
|
+
);
|
|
287
|
+
}
|
|
288
|
+
if (w === 0 && type === "highpass") {
|
|
289
|
+
return new BiquadFilter({ b0: 1, b1: 0, b2: 0, a1: 0, a2: 0 });
|
|
290
|
+
}
|
|
291
|
+
const theta = Math.PI * w;
|
|
292
|
+
const alpha = Math.sin(theta) / (2 * Math.pow(10, q / 20));
|
|
293
|
+
const cosw = Math.cos(theta);
|
|
294
|
+
let b0, b1, b2;
|
|
295
|
+
if (type === "lowpass") {
|
|
296
|
+
const beta = (1 - cosw) / 2;
|
|
297
|
+
b0 = beta;
|
|
298
|
+
b1 = 2 * beta;
|
|
299
|
+
b2 = beta;
|
|
300
|
+
} else {
|
|
301
|
+
const beta = (1 + cosw) / 2;
|
|
302
|
+
b0 = beta;
|
|
303
|
+
b1 = -2 * beta;
|
|
304
|
+
b2 = beta;
|
|
305
|
+
}
|
|
306
|
+
const a0 = 1 + alpha;
|
|
307
|
+
const a1 = -2 * cosw;
|
|
308
|
+
const a2 = 1 - alpha;
|
|
309
|
+
return new BiquadFilter({
|
|
310
|
+
b0: b0 / a0,
|
|
311
|
+
b1: b1 / a0,
|
|
312
|
+
b2: b2 / a0,
|
|
313
|
+
a1: a1 / a0,
|
|
314
|
+
a2: a2 / a0
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
function gainToDb(gain) {
|
|
318
|
+
return gain <= 1e-5 ? -100 : 20 * Math.log10(gain);
|
|
319
|
+
}
|
|
320
|
+
function dbToGain(db) {
|
|
321
|
+
return db <= -100 ? 0 : Math.pow(10, db / 20);
|
|
322
|
+
}
|
|
323
|
+
export {
|
|
324
|
+
StreamingVad,
|
|
325
|
+
defaultVadOptions,
|
|
326
|
+
detectVoiceActivity,
|
|
327
|
+
vadFromFile,
|
|
328
|
+
vadFromStream
|
|
329
|
+
};
|