@storyteller-platform/ghost-story 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +611 -0
- package/README.md +18 -0
- package/dist/api/APIOptions.cjs +16 -0
- package/dist/api/APIOptions.d.cts +18 -0
- package/dist/api/APIOptions.d.ts +18 -0
- package/dist/api/APIOptions.js +0 -0
- package/dist/api/Recognition.cjs +263 -0
- package/dist/api/Recognition.d.cts +77 -0
- package/dist/api/Recognition.d.ts +77 -0
- package/dist/api/Recognition.js +233 -0
- package/dist/api/VoiceActivityDetection.cjs +77 -0
- package/dist/api/VoiceActivityDetection.d.cts +24 -0
- package/dist/api/VoiceActivityDetection.d.ts +24 -0
- package/dist/api/VoiceActivityDetection.js +43 -0
- package/dist/audio/AudioConverter.cjs +331 -0
- package/dist/audio/AudioConverter.d.cts +53 -0
- package/dist/audio/AudioConverter.d.ts +53 -0
- package/dist/audio/AudioConverter.js +310 -0
- package/dist/audio/AudioFormat.cjs +151 -0
- package/dist/audio/AudioFormat.d.cts +25 -0
- package/dist/audio/AudioFormat.d.ts +25 -0
- package/dist/audio/AudioFormat.js +123 -0
- package/dist/audio/AudioSource.cjs +119 -0
- package/dist/audio/AudioSource.d.cts +33 -0
- package/dist/audio/AudioSource.d.ts +33 -0
- package/dist/audio/AudioSource.js +88 -0
- package/dist/audio/index.cjs +74 -0
- package/dist/audio/index.d.cts +6 -0
- package/dist/audio/index.d.ts +6 -0
- package/dist/audio/index.js +54 -0
- package/dist/cli/bin.cjs +277 -0
- package/dist/cli/bin.d.cts +1 -0
- package/dist/cli/bin.d.ts +1 -0
- package/dist/cli/bin.js +275 -0
- package/dist/cli/config.cjs +347 -0
- package/dist/cli/config.d.cts +33 -0
- package/dist/cli/config.d.ts +33 -0
- package/dist/cli/config.js +285 -0
- package/dist/cli/install.cjs +334 -0
- package/dist/cli/install.d.cts +62 -0
- package/dist/cli/install.d.ts +62 -0
- package/dist/cli/install.js +316 -0
- package/dist/cli/whisper-server.cjs +172 -0
- package/dist/cli/whisper-server.d.cts +24 -0
- package/dist/cli/whisper-server.d.ts +24 -0
- package/dist/cli/whisper-server.js +152 -0
- package/dist/config.cjs +60 -0
- package/dist/config.d.cts +12 -0
- package/dist/config.d.ts +12 -0
- package/dist/config.js +32 -0
- package/dist/convert.cjs +88 -0
- package/dist/convert.d.cts +12 -0
- package/dist/convert.d.ts +12 -0
- package/dist/convert.js +63 -0
- package/dist/encodings/Ascii.cjs +75 -0
- package/dist/encodings/Ascii.d.cts +13 -0
- package/dist/encodings/Ascii.d.ts +13 -0
- package/dist/encodings/Ascii.js +48 -0
- package/dist/encodings/Base64.cjs +155 -0
- package/dist/encodings/Base64.d.cts +5 -0
- package/dist/encodings/Base64.d.ts +5 -0
- package/dist/encodings/Base64.js +129 -0
- package/dist/encodings/TextEncodingsCommon.cjs +16 -0
- package/dist/encodings/TextEncodingsCommon.d.cts +6 -0
- package/dist/encodings/TextEncodingsCommon.d.ts +6 -0
- package/dist/encodings/TextEncodingsCommon.js +0 -0
- package/dist/index.cjs +153 -0
- package/dist/index.d.cts +15 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +140 -0
- package/dist/recognition/AmazonTranscribeSTT.cjs +188 -0
- package/dist/recognition/AmazonTranscribeSTT.d.cts +21 -0
- package/dist/recognition/AmazonTranscribeSTT.d.ts +21 -0
- package/dist/recognition/AmazonTranscribeSTT.js +160 -0
- package/dist/recognition/AzureCognitiveServicesSTT.cjs +124 -0
- package/dist/recognition/AzureCognitiveServicesSTT.d.cts +21 -0
- package/dist/recognition/AzureCognitiveServicesSTT.d.ts +21 -0
- package/dist/recognition/AzureCognitiveServicesSTT.js +95 -0
- package/dist/recognition/DeepgramSTT.cjs +172 -0
- package/dist/recognition/DeepgramSTT.d.cts +23 -0
- package/dist/recognition/DeepgramSTT.d.ts +23 -0
- package/dist/recognition/DeepgramSTT.js +153 -0
- package/dist/recognition/GoogleCloudSTT.cjs +125 -0
- package/dist/recognition/GoogleCloudSTT.d.cts +35 -0
- package/dist/recognition/GoogleCloudSTT.d.ts +35 -0
- package/dist/recognition/GoogleCloudSTT.js +107 -0
- package/dist/recognition/OpenAICloudSTT.cjs +180 -0
- package/dist/recognition/OpenAICloudSTT.d.cts +29 -0
- package/dist/recognition/OpenAICloudSTT.d.ts +29 -0
- package/dist/recognition/OpenAICloudSTT.js +150 -0
- package/dist/recognition/WhisperCppSTT.cjs +296 -0
- package/dist/recognition/WhisperCppSTT.d.cts +40 -0
- package/dist/recognition/WhisperCppSTT.d.ts +40 -0
- package/dist/recognition/WhisperCppSTT.js +275 -0
- package/dist/recognition/WhisperServerSTT.cjs +119 -0
- package/dist/recognition/WhisperServerSTT.d.cts +24 -0
- package/dist/recognition/WhisperServerSTT.d.ts +24 -0
- package/dist/recognition/WhisperServerSTT.js +105 -0
- package/dist/utilities/FileSystem.cjs +54 -0
- package/dist/utilities/FileSystem.d.cts +3 -0
- package/dist/utilities/FileSystem.d.ts +3 -0
- package/dist/utilities/FileSystem.js +20 -0
- package/dist/utilities/Locale.cjs +46 -0
- package/dist/utilities/Locale.d.cts +9 -0
- package/dist/utilities/Locale.d.ts +9 -0
- package/dist/utilities/Locale.js +20 -0
- package/dist/utilities/ObjectUtilities.cjs +41 -0
- package/dist/utilities/ObjectUtilities.d.cts +3 -0
- package/dist/utilities/ObjectUtilities.d.ts +3 -0
- package/dist/utilities/ObjectUtilities.js +7 -0
- package/dist/utilities/Timeline.cjs +120 -0
- package/dist/utilities/Timeline.d.cts +23 -0
- package/dist/utilities/Timeline.d.ts +23 -0
- package/dist/utilities/Timeline.js +94 -0
- package/dist/utilities/Timing.cjs +287 -0
- package/dist/utilities/Timing.d.cts +64 -0
- package/dist/utilities/Timing.d.ts +64 -0
- package/dist/utilities/Timing.js +256 -0
- package/dist/utilities/WhisperTimeline.cjs +344 -0
- package/dist/utilities/WhisperTimeline.d.cts +86 -0
- package/dist/utilities/WhisperTimeline.d.ts +86 -0
- package/dist/utilities/WhisperTimeline.js +313 -0
- package/dist/vad/ActiveGate.cjs +357 -0
- package/dist/vad/ActiveGate.d.cts +53 -0
- package/dist/vad/ActiveGate.d.ts +53 -0
- package/dist/vad/ActiveGate.js +329 -0
- package/dist/vad/ActiveGateOg.cjs +1366 -0
- package/dist/vad/ActiveGateOg.d.cts +33 -0
- package/dist/vad/ActiveGateOg.d.ts +33 -0
- package/dist/vad/ActiveGateOg.js +1341 -0
- package/dist/vad/Silero.cjs +174 -0
- package/dist/vad/Silero.d.cts +25 -0
- package/dist/vad/Silero.d.ts +25 -0
- package/dist/vad/Silero.js +153 -0
- package/package.json +125 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var Silero_exports = {};
|
|
20
|
+
__export(Silero_exports, {
|
|
21
|
+
detectVoiceActivity: () => detectVoiceActivity,
|
|
22
|
+
ensureVadInstalled: () => ensureVadInstalled,
|
|
23
|
+
segmentsToTimeline: () => segmentsToTimeline
|
|
24
|
+
});
|
|
25
|
+
module.exports = __toCommonJS(Silero_exports);
|
|
26
|
+
var import_node_child_process = require("node:child_process");
|
|
27
|
+
var import_node_fs = require("node:fs");
|
|
28
|
+
var import_fs_extra = require("fs-extra");
|
|
29
|
+
var import_config = require("../cli/config.cjs");
|
|
30
|
+
var import_install = require("../cli/install.cjs");
|
|
31
|
+
const defaultOptions = {
|
|
32
|
+
printOutput: false,
|
|
33
|
+
autoInstall: true,
|
|
34
|
+
threshold: 0.5,
|
|
35
|
+
minSpeechDurationMs: 250,
|
|
36
|
+
minSilenceDurationMs: 100,
|
|
37
|
+
speechPadMs: 30
|
|
38
|
+
};
|
|
39
|
+
async function ensureVadInstalled(options = {}) {
|
|
40
|
+
const opts = { ...defaultOptions, ...options };
|
|
41
|
+
const modelDir = opts.modelDir ?? (0, import_config.getModelDir)();
|
|
42
|
+
const installDir = opts.installDir ?? (0, import_config.getInstallDir)();
|
|
43
|
+
await (0, import_fs_extra.ensureDir)(modelDir);
|
|
44
|
+
if (opts.autoInstall) {
|
|
45
|
+
await (0, import_install.installBinary)({ printOutput: opts.printOutput });
|
|
46
|
+
await (0, import_install.installVadModel)({ modelDir, printOutput: opts.printOutput });
|
|
47
|
+
}
|
|
48
|
+
const vadModelPath = (0, import_config.getVadModelPath)(modelDir);
|
|
49
|
+
if (!(0, import_node_fs.existsSync)(vadModelPath)) {
|
|
50
|
+
throw new Error(
|
|
51
|
+
`VAD model not found at ${vadModelPath}. Run 'ghost-story install --vad' to install.`
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
return { installDir, vadModelPath };
|
|
55
|
+
}
|
|
56
|
+
async function detectVoiceActivity(inputPath, options = {}) {
|
|
57
|
+
const opts = { ...defaultOptions, ...options };
|
|
58
|
+
if (!(0, import_node_fs.existsSync)(inputPath)) {
|
|
59
|
+
throw new Error(`Input file does not exist: ${inputPath}`);
|
|
60
|
+
}
|
|
61
|
+
const { installDir, vadModelPath } = await ensureVadInstalled(opts);
|
|
62
|
+
const executablePath = (0, import_config.getVadExecutablePath)(installDir);
|
|
63
|
+
if (!(0, import_node_fs.existsSync)(executablePath)) {
|
|
64
|
+
throw new Error(
|
|
65
|
+
`VAD executable not found at ${executablePath}. Run 'storywhisper install binary' to install.`
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
const pathToInput = inputPath;
|
|
69
|
+
const args = [
|
|
70
|
+
"-f",
|
|
71
|
+
pathToInput,
|
|
72
|
+
"-vm",
|
|
73
|
+
vadModelPath,
|
|
74
|
+
"-vt",
|
|
75
|
+
String(opts.threshold),
|
|
76
|
+
// vspd is not supported for some reason
|
|
77
|
+
// "-vspd",
|
|
78
|
+
// String(opts.minSpeechDurationMs),
|
|
79
|
+
"-vsd",
|
|
80
|
+
String(opts.minSilenceDurationMs),
|
|
81
|
+
"-vp",
|
|
82
|
+
String(opts.speechPadMs)
|
|
83
|
+
];
|
|
84
|
+
if (opts.printOutput) {
|
|
85
|
+
console.log(`Running VAD: ${executablePath} ${args.join(" ")}`);
|
|
86
|
+
}
|
|
87
|
+
const segments = await runVadProcess(
|
|
88
|
+
executablePath,
|
|
89
|
+
args,
|
|
90
|
+
installDir,
|
|
91
|
+
opts.printOutput
|
|
92
|
+
);
|
|
93
|
+
return segments;
|
|
94
|
+
}
|
|
95
|
+
async function runVadProcess(executable, args, cwd, printOutput) {
|
|
96
|
+
if (printOutput) {
|
|
97
|
+
console.log(`Running VAD: ${executable} ${args.join(" ")}`);
|
|
98
|
+
}
|
|
99
|
+
return new Promise((resolve, reject) => {
|
|
100
|
+
const proc = (0, import_node_child_process.spawn)(executable, args, { cwd });
|
|
101
|
+
let stdout = "";
|
|
102
|
+
let stderr = "";
|
|
103
|
+
proc.stdout.on("data", (data) => {
|
|
104
|
+
const str = data.toString("utf-8");
|
|
105
|
+
stdout += str;
|
|
106
|
+
if (printOutput) {
|
|
107
|
+
process.stdout.write(data);
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
proc.stderr.on("data", (data) => {
|
|
111
|
+
const str = data.toString("utf-8");
|
|
112
|
+
stderr += str;
|
|
113
|
+
if (printOutput) {
|
|
114
|
+
process.stderr.write(data);
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
proc.on("exit", (code, signal) => {
|
|
118
|
+
if (printOutput) {
|
|
119
|
+
console.log(`VAD process exited with code ${code} and signal ${signal}`);
|
|
120
|
+
}
|
|
121
|
+
const segments = parseVadOutput(stdout + stderr);
|
|
122
|
+
if (printOutput) {
|
|
123
|
+
console.log(segments);
|
|
124
|
+
}
|
|
125
|
+
if (code !== 0 && segments.length === 0) {
|
|
126
|
+
reject(
|
|
127
|
+
new Error(
|
|
128
|
+
`VAD process exited with code ${code}: ${stderr || stdout}`
|
|
129
|
+
)
|
|
130
|
+
);
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
resolve(segments);
|
|
134
|
+
});
|
|
135
|
+
proc.on("error", (err) => {
|
|
136
|
+
if (printOutput) {
|
|
137
|
+
console.error(err);
|
|
138
|
+
}
|
|
139
|
+
reject(new Error(`Failed to start VAD process: ${err.message}`));
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
function parseVadOutput(output) {
|
|
144
|
+
const segments = [];
|
|
145
|
+
const segmentRegex = /Speech segment (\d+): start = (\d+\.\d+), end = (\d+\.\d+)/g;
|
|
146
|
+
let match;
|
|
147
|
+
while ((match = segmentRegex.exec(output)) !== null) {
|
|
148
|
+
if (!match[2] || !match[3]) {
|
|
149
|
+
throw new Error(`Invalid segment format: ${match[0]}`);
|
|
150
|
+
}
|
|
151
|
+
const startTime = parseFloat(match[2]);
|
|
152
|
+
const endTime = parseFloat(match[3]);
|
|
153
|
+
segments.push({
|
|
154
|
+
startTime,
|
|
155
|
+
endTime,
|
|
156
|
+
isSpeech: true
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
return segments;
|
|
160
|
+
}
|
|
161
|
+
function segmentsToTimeline(segments) {
|
|
162
|
+
return segments.map((seg) => ({
|
|
163
|
+
type: "segment",
|
|
164
|
+
text: seg.isSpeech ? "speech" : "silence",
|
|
165
|
+
startTime: seg.startTime,
|
|
166
|
+
endTime: seg.endTime
|
|
167
|
+
}));
|
|
168
|
+
}
|
|
169
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
170
|
+
0 && (module.exports = {
|
|
171
|
+
detectVoiceActivity,
|
|
172
|
+
ensureVadInstalled,
|
|
173
|
+
segmentsToTimeline
|
|
174
|
+
});
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { Timeline } from '../utilities/Timeline.cjs';
|
|
2
|
+
|
|
3
|
+
interface SileroOptions {
|
|
4
|
+
modelDir?: string;
|
|
5
|
+
installDir?: string;
|
|
6
|
+
printOutput?: boolean;
|
|
7
|
+
autoInstall?: boolean;
|
|
8
|
+
threshold?: number;
|
|
9
|
+
minSpeechDurationMs?: number;
|
|
10
|
+
minSilenceDurationMs?: number;
|
|
11
|
+
speechPadMs?: number;
|
|
12
|
+
}
|
|
13
|
+
interface VadSegment {
|
|
14
|
+
startTime: number;
|
|
15
|
+
endTime: number;
|
|
16
|
+
isSpeech: boolean;
|
|
17
|
+
}
|
|
18
|
+
declare function ensureVadInstalled(options?: SileroOptions): Promise<{
|
|
19
|
+
installDir: string;
|
|
20
|
+
vadModelPath: string;
|
|
21
|
+
}>;
|
|
22
|
+
declare function detectVoiceActivity(inputPath: string, options?: SileroOptions): Promise<VadSegment[]>;
|
|
23
|
+
declare function segmentsToTimeline(segments: VadSegment[]): Timeline;
|
|
24
|
+
|
|
25
|
+
export { type SileroOptions, type VadSegment, detectVoiceActivity, ensureVadInstalled, segmentsToTimeline };
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { Timeline } from '../utilities/Timeline.js';
|
|
2
|
+
|
|
3
|
+
interface SileroOptions {
|
|
4
|
+
modelDir?: string;
|
|
5
|
+
installDir?: string;
|
|
6
|
+
printOutput?: boolean;
|
|
7
|
+
autoInstall?: boolean;
|
|
8
|
+
threshold?: number;
|
|
9
|
+
minSpeechDurationMs?: number;
|
|
10
|
+
minSilenceDurationMs?: number;
|
|
11
|
+
speechPadMs?: number;
|
|
12
|
+
}
|
|
13
|
+
interface VadSegment {
|
|
14
|
+
startTime: number;
|
|
15
|
+
endTime: number;
|
|
16
|
+
isSpeech: boolean;
|
|
17
|
+
}
|
|
18
|
+
declare function ensureVadInstalled(options?: SileroOptions): Promise<{
|
|
19
|
+
installDir: string;
|
|
20
|
+
vadModelPath: string;
|
|
21
|
+
}>;
|
|
22
|
+
declare function detectVoiceActivity(inputPath: string, options?: SileroOptions): Promise<VadSegment[]>;
|
|
23
|
+
declare function segmentsToTimeline(segments: VadSegment[]): Timeline;
|
|
24
|
+
|
|
25
|
+
export { type SileroOptions, type VadSegment, detectVoiceActivity, ensureVadInstalled, segmentsToTimeline };
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { ensureDir } from "fs-extra";
|
|
4
|
+
import {
|
|
5
|
+
getInstallDir,
|
|
6
|
+
getModelDir,
|
|
7
|
+
getVadExecutablePath,
|
|
8
|
+
getVadModelPath
|
|
9
|
+
} from "../cli/config.js";
|
|
10
|
+
import { installBinary, installVadModel } from "../cli/install.js";
|
|
11
|
+
const defaultOptions = {
|
|
12
|
+
printOutput: false,
|
|
13
|
+
autoInstall: true,
|
|
14
|
+
threshold: 0.5,
|
|
15
|
+
minSpeechDurationMs: 250,
|
|
16
|
+
minSilenceDurationMs: 100,
|
|
17
|
+
speechPadMs: 30
|
|
18
|
+
};
|
|
19
|
+
async function ensureVadInstalled(options = {}) {
|
|
20
|
+
const opts = { ...defaultOptions, ...options };
|
|
21
|
+
const modelDir = opts.modelDir ?? getModelDir();
|
|
22
|
+
const installDir = opts.installDir ?? getInstallDir();
|
|
23
|
+
await ensureDir(modelDir);
|
|
24
|
+
if (opts.autoInstall) {
|
|
25
|
+
await installBinary({ printOutput: opts.printOutput });
|
|
26
|
+
await installVadModel({ modelDir, printOutput: opts.printOutput });
|
|
27
|
+
}
|
|
28
|
+
const vadModelPath = getVadModelPath(modelDir);
|
|
29
|
+
if (!existsSync(vadModelPath)) {
|
|
30
|
+
throw new Error(
|
|
31
|
+
`VAD model not found at ${vadModelPath}. Run 'ghost-story install --vad' to install.`
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
return { installDir, vadModelPath };
|
|
35
|
+
}
|
|
36
|
+
async function detectVoiceActivity(inputPath, options = {}) {
|
|
37
|
+
const opts = { ...defaultOptions, ...options };
|
|
38
|
+
if (!existsSync(inputPath)) {
|
|
39
|
+
throw new Error(`Input file does not exist: ${inputPath}`);
|
|
40
|
+
}
|
|
41
|
+
const { installDir, vadModelPath } = await ensureVadInstalled(opts);
|
|
42
|
+
const executablePath = getVadExecutablePath(installDir);
|
|
43
|
+
if (!existsSync(executablePath)) {
|
|
44
|
+
throw new Error(
|
|
45
|
+
`VAD executable not found at ${executablePath}. Run 'storywhisper install binary' to install.`
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
const pathToInput = inputPath;
|
|
49
|
+
const args = [
|
|
50
|
+
"-f",
|
|
51
|
+
pathToInput,
|
|
52
|
+
"-vm",
|
|
53
|
+
vadModelPath,
|
|
54
|
+
"-vt",
|
|
55
|
+
String(opts.threshold),
|
|
56
|
+
// vspd is not supported for some reason
|
|
57
|
+
// "-vspd",
|
|
58
|
+
// String(opts.minSpeechDurationMs),
|
|
59
|
+
"-vsd",
|
|
60
|
+
String(opts.minSilenceDurationMs),
|
|
61
|
+
"-vp",
|
|
62
|
+
String(opts.speechPadMs)
|
|
63
|
+
];
|
|
64
|
+
if (opts.printOutput) {
|
|
65
|
+
console.log(`Running VAD: ${executablePath} ${args.join(" ")}`);
|
|
66
|
+
}
|
|
67
|
+
const segments = await runVadProcess(
|
|
68
|
+
executablePath,
|
|
69
|
+
args,
|
|
70
|
+
installDir,
|
|
71
|
+
opts.printOutput
|
|
72
|
+
);
|
|
73
|
+
return segments;
|
|
74
|
+
}
|
|
75
|
+
async function runVadProcess(executable, args, cwd, printOutput) {
|
|
76
|
+
if (printOutput) {
|
|
77
|
+
console.log(`Running VAD: ${executable} ${args.join(" ")}`);
|
|
78
|
+
}
|
|
79
|
+
return new Promise((resolve, reject) => {
|
|
80
|
+
const proc = spawn(executable, args, { cwd });
|
|
81
|
+
let stdout = "";
|
|
82
|
+
let stderr = "";
|
|
83
|
+
proc.stdout.on("data", (data) => {
|
|
84
|
+
const str = data.toString("utf-8");
|
|
85
|
+
stdout += str;
|
|
86
|
+
if (printOutput) {
|
|
87
|
+
process.stdout.write(data);
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
proc.stderr.on("data", (data) => {
|
|
91
|
+
const str = data.toString("utf-8");
|
|
92
|
+
stderr += str;
|
|
93
|
+
if (printOutput) {
|
|
94
|
+
process.stderr.write(data);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
proc.on("exit", (code, signal) => {
|
|
98
|
+
if (printOutput) {
|
|
99
|
+
console.log(`VAD process exited with code ${code} and signal ${signal}`);
|
|
100
|
+
}
|
|
101
|
+
const segments = parseVadOutput(stdout + stderr);
|
|
102
|
+
if (printOutput) {
|
|
103
|
+
console.log(segments);
|
|
104
|
+
}
|
|
105
|
+
if (code !== 0 && segments.length === 0) {
|
|
106
|
+
reject(
|
|
107
|
+
new Error(
|
|
108
|
+
`VAD process exited with code ${code}: ${stderr || stdout}`
|
|
109
|
+
)
|
|
110
|
+
);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
resolve(segments);
|
|
114
|
+
});
|
|
115
|
+
proc.on("error", (err) => {
|
|
116
|
+
if (printOutput) {
|
|
117
|
+
console.error(err);
|
|
118
|
+
}
|
|
119
|
+
reject(new Error(`Failed to start VAD process: ${err.message}`));
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
function parseVadOutput(output) {
|
|
124
|
+
const segments = [];
|
|
125
|
+
const segmentRegex = /Speech segment (\d+): start = (\d+\.\d+), end = (\d+\.\d+)/g;
|
|
126
|
+
let match;
|
|
127
|
+
while ((match = segmentRegex.exec(output)) !== null) {
|
|
128
|
+
if (!match[2] || !match[3]) {
|
|
129
|
+
throw new Error(`Invalid segment format: ${match[0]}`);
|
|
130
|
+
}
|
|
131
|
+
const startTime = parseFloat(match[2]);
|
|
132
|
+
const endTime = parseFloat(match[3]);
|
|
133
|
+
segments.push({
|
|
134
|
+
startTime,
|
|
135
|
+
endTime,
|
|
136
|
+
isSpeech: true
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
return segments;
|
|
140
|
+
}
|
|
141
|
+
function segmentsToTimeline(segments) {
|
|
142
|
+
return segments.map((seg) => ({
|
|
143
|
+
type: "segment",
|
|
144
|
+
text: seg.isSpeech ? "speech" : "silence",
|
|
145
|
+
startTime: seg.startTime,
|
|
146
|
+
endTime: seg.endTime
|
|
147
|
+
}));
|
|
148
|
+
}
|
|
149
|
+
export {
|
|
150
|
+
detectVoiceActivity,
|
|
151
|
+
ensureVadInstalled,
|
|
152
|
+
segmentsToTimeline
|
|
153
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@storyteller-platform/ghost-story",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "An easy-to-use speech toolset. Fork of the original echogarden project.",
|
|
5
|
+
"author": "",
|
|
6
|
+
"license": "GPL-3.0",
|
|
7
|
+
"keywords": [
|
|
8
|
+
"text-to-speech"
|
|
9
|
+
],
|
|
10
|
+
"repository": {
|
|
11
|
+
"type": "git",
|
|
12
|
+
"url": "https://github.com/storyteller-platform/storyteller"
|
|
13
|
+
},
|
|
14
|
+
"bugs": {
|
|
15
|
+
"url": "https://github.com/storyteller-platform/storyteller/issues"
|
|
16
|
+
},
|
|
17
|
+
"engines": {
|
|
18
|
+
"node": ">=20"
|
|
19
|
+
},
|
|
20
|
+
"os": [
|
|
21
|
+
"win32",
|
|
22
|
+
"darwin",
|
|
23
|
+
"linux"
|
|
24
|
+
],
|
|
25
|
+
"type": "module",
|
|
26
|
+
"files": [
|
|
27
|
+
"dist",
|
|
28
|
+
"README.md",
|
|
29
|
+
"LICENSE.md"
|
|
30
|
+
],
|
|
31
|
+
"exports": {
|
|
32
|
+
".": {
|
|
33
|
+
"import": {
|
|
34
|
+
"types": "./dist/index.d.ts",
|
|
35
|
+
"default": "./dist/index.js"
|
|
36
|
+
},
|
|
37
|
+
"require": {
|
|
38
|
+
"types": "./dist/index.d.cts",
|
|
39
|
+
"default": "./dist/index.cjs"
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
"./server": {
|
|
43
|
+
"import": {
|
|
44
|
+
"types": "./dist/cli/whisper-server.d.ts",
|
|
45
|
+
"default": "./dist/cli/whisper-server.js"
|
|
46
|
+
},
|
|
47
|
+
"require": {
|
|
48
|
+
"types": "./dist/cli/whisper-server.d.cts",
|
|
49
|
+
"default": "./dist/cli/whisper-server.cjs"
|
|
50
|
+
}
|
|
51
|
+
},
|
|
52
|
+
"./recognition": {
|
|
53
|
+
"types": "./dist/api/Recognition.d.ts",
|
|
54
|
+
"default": "./dist/api/Recognition.js"
|
|
55
|
+
},
|
|
56
|
+
"./vad": {
|
|
57
|
+
"types": "./dist/api/VoiceActivityDetection.d.ts",
|
|
58
|
+
"default": "./dist/api/VoiceActivityDetection.js"
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
"scripts": {
|
|
62
|
+
"build": "tsup",
|
|
63
|
+
"prepack": "yarn build",
|
|
64
|
+
"test": "tsx -C @storyteller --test test/whisper-server.test.ts && tsx -C @storyteller --test test/whispercpp.test.ts && tsx -C @storyteller --test test/vad.test.ts"
|
|
65
|
+
},
|
|
66
|
+
"bin": "./dist/cli/bin.js",
|
|
67
|
+
"dependencies": {
|
|
68
|
+
"@aws-sdk/client-transcribe-streaming": "~3.817.0",
|
|
69
|
+
"@robingenz/zli": "^0.2.0",
|
|
70
|
+
"chalk": "^5.4.1",
|
|
71
|
+
"ffmpeg-stream": "^1.0.1",
|
|
72
|
+
"fs-extra": "~11.3.0",
|
|
73
|
+
"lodash.merge": "^4.6.2",
|
|
74
|
+
"microsoft-cognitiveservices-speech-sdk": "~1.44.0",
|
|
75
|
+
"openai": "~4.103.0",
|
|
76
|
+
"tar": "^7.4.3",
|
|
77
|
+
"zod": "^3.25.0"
|
|
78
|
+
},
|
|
79
|
+
"devDependencies": {
|
|
80
|
+
"@storyteller-platform/eslint": "0.1.0",
|
|
81
|
+
"@storyteller-platform/tsup": "^0.1.0",
|
|
82
|
+
"@tsconfig/strictest": "^2.0.5",
|
|
83
|
+
"@types/fs-extra": "~11.0.4",
|
|
84
|
+
"@types/lodash.merge": "^4",
|
|
85
|
+
"@types/node": "^24.0.0",
|
|
86
|
+
"@types/tar": "^6.1.13",
|
|
87
|
+
"eslint": "^8.0.0",
|
|
88
|
+
"tsup": "^8.5.0",
|
|
89
|
+
"tsx": "^4.19.2",
|
|
90
|
+
"typescript": "~5.8.3"
|
|
91
|
+
},
|
|
92
|
+
"publishConfig": {
|
|
93
|
+
"access": "public",
|
|
94
|
+
"exports": {
|
|
95
|
+
".": {
|
|
96
|
+
"import": {
|
|
97
|
+
"types": "./dist/index.d.ts",
|
|
98
|
+
"default": "./dist/index.js"
|
|
99
|
+
},
|
|
100
|
+
"require": {
|
|
101
|
+
"types": "./dist/index.d.cts",
|
|
102
|
+
"default": "./dist/index.cjs"
|
|
103
|
+
}
|
|
104
|
+
},
|
|
105
|
+
"./server": {
|
|
106
|
+
"import": {
|
|
107
|
+
"types": "./dist/cli/whisper-server.d.ts",
|
|
108
|
+
"default": "./dist/cli/whisper-server.js"
|
|
109
|
+
},
|
|
110
|
+
"require": {
|
|
111
|
+
"types": "./dist/cli/whisper-server.d.cts",
|
|
112
|
+
"default": "./dist/cli/whisper-server.cjs"
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
"./recognition": {
|
|
116
|
+
"types": "./dist/api/Recognition.d.ts",
|
|
117
|
+
"default": "./dist/api/Recognition.js"
|
|
118
|
+
},
|
|
119
|
+
"./vad": {
|
|
120
|
+
"types": "./dist/api/VoiceActivityDetection.d.ts",
|
|
121
|
+
"default": "./dist/api/VoiceActivityDetection.js"
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|