@storyteller-platform/align 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.txt +21 -0
- package/README.md +3 -0
- package/dist/align/align.cjs +525 -0
- package/dist/align/align.d.cts +58 -0
- package/dist/align/align.d.ts +58 -0
- package/dist/align/align.js +458 -0
- package/dist/align/fuzzy.cjs +164 -0
- package/dist/align/fuzzy.d.cts +6 -0
- package/dist/align/fuzzy.d.ts +6 -0
- package/dist/align/fuzzy.js +141 -0
- package/dist/align/getSentenceRanges.cjs +304 -0
- package/dist/align/getSentenceRanges.d.cts +31 -0
- package/dist/align/getSentenceRanges.d.ts +31 -0
- package/dist/align/getSentenceRanges.js +277 -0
- package/dist/align/parse.cjs +63 -0
- package/dist/align/parse.d.cts +30 -0
- package/dist/align/parse.d.ts +30 -0
- package/dist/align/parse.js +51 -0
- package/dist/chunk-BIEQXUOY.js +50 -0
- package/dist/cli/bin.cjs +368 -0
- package/dist/cli/bin.d.cts +1 -0
- package/dist/cli/bin.d.ts +1 -0
- package/dist/cli/bin.js +319 -0
- package/dist/common/ffmpeg.cjs +232 -0
- package/dist/common/ffmpeg.d.cts +33 -0
- package/dist/common/ffmpeg.d.ts +33 -0
- package/dist/common/ffmpeg.js +196 -0
- package/dist/common/logging.cjs +45 -0
- package/dist/common/logging.d.cts +5 -0
- package/dist/common/logging.d.ts +5 -0
- package/dist/common/logging.js +12 -0
- package/dist/common/parse.cjs +73 -0
- package/dist/common/parse.d.cts +28 -0
- package/dist/common/parse.d.ts +28 -0
- package/dist/common/parse.js +56 -0
- package/dist/common/shell.cjs +30 -0
- package/dist/common/shell.d.cts +3 -0
- package/dist/common/shell.d.ts +3 -0
- package/dist/common/shell.js +7 -0
- package/dist/index.cjs +37 -0
- package/dist/index.d.cts +12 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +11 -0
- package/dist/markup/__tests__/markup.test.cjs +464 -0
- package/dist/markup/__tests__/markup.test.d.cts +2 -0
- package/dist/markup/__tests__/markup.test.d.ts +2 -0
- package/dist/markup/__tests__/markup.test.js +441 -0
- package/dist/markup/markup.cjs +316 -0
- package/dist/markup/markup.d.cts +24 -0
- package/dist/markup/markup.d.ts +24 -0
- package/dist/markup/markup.js +254 -0
- package/dist/markup/parse.cjs +55 -0
- package/dist/markup/parse.d.cts +17 -0
- package/dist/markup/parse.d.ts +17 -0
- package/dist/markup/parse.js +43 -0
- package/dist/markup/segmentation.cjs +87 -0
- package/dist/markup/segmentation.d.cts +8 -0
- package/dist/markup/segmentation.d.ts +8 -0
- package/dist/markup/segmentation.js +67 -0
- package/dist/markup/semantics.cjs +79 -0
- package/dist/markup/semantics.d.cts +6 -0
- package/dist/markup/semantics.d.ts +6 -0
- package/dist/markup/semantics.js +53 -0
- package/dist/process/AudioEncoding.cjs +16 -0
- package/dist/process/AudioEncoding.d.cts +8 -0
- package/dist/process/AudioEncoding.d.ts +8 -0
- package/dist/process/AudioEncoding.js +0 -0
- package/dist/process/__tests__/processAudiobook.test.cjs +232 -0
- package/dist/process/__tests__/processAudiobook.test.d.cts +2 -0
- package/dist/process/__tests__/processAudiobook.test.d.ts +2 -0
- package/dist/process/__tests__/processAudiobook.test.js +209 -0
- package/dist/process/mime.cjs +43 -0
- package/dist/process/mime.d.cts +3 -0
- package/dist/process/mime.d.ts +3 -0
- package/dist/process/mime.js +24 -0
- package/dist/process/parse.cjs +84 -0
- package/dist/process/parse.d.cts +28 -0
- package/dist/process/parse.d.ts +28 -0
- package/dist/process/parse.js +73 -0
- package/dist/process/processAudiobook.cjs +220 -0
- package/dist/process/processAudiobook.d.cts +24 -0
- package/dist/process/processAudiobook.d.ts +24 -0
- package/dist/process/processAudiobook.js +166 -0
- package/dist/process/ranges.cjs +203 -0
- package/dist/process/ranges.d.cts +15 -0
- package/dist/process/ranges.d.ts +15 -0
- package/dist/process/ranges.js +137 -0
- package/dist/transcribe/parse.cjs +149 -0
- package/dist/transcribe/parse.d.cts +114 -0
- package/dist/transcribe/parse.d.ts +114 -0
- package/dist/transcribe/parse.js +143 -0
- package/dist/transcribe/transcribe.cjs +400 -0
- package/dist/transcribe/transcribe.d.cts +41 -0
- package/dist/transcribe/transcribe.d.ts +41 -0
- package/dist/transcribe/transcribe.js +330 -0
- package/package.json +96 -0
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __knownSymbol = (name, symbol) => (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name);
|
|
9
|
+
var __typeError = (msg) => {
|
|
10
|
+
throw TypeError(msg);
|
|
11
|
+
};
|
|
12
|
+
var __export = (target, all) => {
|
|
13
|
+
for (var name in all)
|
|
14
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
15
|
+
};
|
|
16
|
+
var __copyProps = (to, from, except, desc) => {
|
|
17
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
18
|
+
for (let key of __getOwnPropNames(from))
|
|
19
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
20
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
21
|
+
}
|
|
22
|
+
return to;
|
|
23
|
+
};
|
|
24
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
25
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
26
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
27
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
28
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
29
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
30
|
+
mod
|
|
31
|
+
));
|
|
32
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
33
|
+
var __using = (stack, value, async) => {
|
|
34
|
+
if (value != null) {
|
|
35
|
+
if (typeof value !== "object" && typeof value !== "function") __typeError("Object expected");
|
|
36
|
+
var dispose, inner;
|
|
37
|
+
if (async) dispose = value[__knownSymbol("asyncDispose")];
|
|
38
|
+
if (dispose === void 0) {
|
|
39
|
+
dispose = value[__knownSymbol("dispose")];
|
|
40
|
+
if (async) inner = dispose;
|
|
41
|
+
}
|
|
42
|
+
if (typeof dispose !== "function") __typeError("Object not disposable");
|
|
43
|
+
if (inner) dispose = function() {
|
|
44
|
+
try {
|
|
45
|
+
inner.call(this);
|
|
46
|
+
} catch (e) {
|
|
47
|
+
return Promise.reject(e);
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
stack.push([async, dispose, value]);
|
|
51
|
+
} else if (async) {
|
|
52
|
+
stack.push([async]);
|
|
53
|
+
}
|
|
54
|
+
return value;
|
|
55
|
+
};
|
|
56
|
+
var __callDispose = (stack, error, hasError) => {
|
|
57
|
+
var E = typeof SuppressedError === "function" ? SuppressedError : function(e, s, m, _) {
|
|
58
|
+
return _ = Error(m), _.name = "SuppressedError", _.error = e, _.suppressed = s, _;
|
|
59
|
+
};
|
|
60
|
+
var fail = (e) => error = hasError ? new E(e, error, "An error was suppressed during disposal") : (hasError = true, e);
|
|
61
|
+
var next = (it) => {
|
|
62
|
+
while (it = stack.pop()) {
|
|
63
|
+
try {
|
|
64
|
+
var result = it[1] && it[1].call(it[2]);
|
|
65
|
+
if (it[0]) return Promise.resolve(result).then(next, (e) => (fail(e), next()));
|
|
66
|
+
} catch (e) {
|
|
67
|
+
fail(e);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (hasError) throw error;
|
|
71
|
+
};
|
|
72
|
+
return next();
|
|
73
|
+
};
|
|
74
|
+
var transcribe_exports = {};
|
|
75
|
+
__export(transcribe_exports, {
|
|
76
|
+
transcribe: () => transcribe,
|
|
77
|
+
transcribeFile: () => transcribeFile
|
|
78
|
+
});
|
|
79
|
+
module.exports = __toCommonJS(transcribe_exports);
|
|
80
|
+
var import_promises = require("node:fs/promises");
|
|
81
|
+
var os = __toESM(require("node:os"), 1);
|
|
82
|
+
var import_node_path = require("node:path");
|
|
83
|
+
var import_async_semaphore = require("@esfx/async-semaphore");
|
|
84
|
+
var import_audiobook = require("@storyteller-platform/audiobook");
|
|
85
|
+
var import_ghost_story = require("@storyteller-platform/ghost-story");
|
|
86
|
+
async function transcribe(input, output, locale, options) {
|
|
87
|
+
var _a;
|
|
88
|
+
if (process.env["DEBUG_TRANSCRIBE"] === "true") {
|
|
89
|
+
const inspector = await import("node:inspector");
|
|
90
|
+
inspector.open(9231, "0.0.0.0", true);
|
|
91
|
+
}
|
|
92
|
+
const semaphore = new import_async_semaphore.AsyncSemaphore(options.parallelism ?? 1);
|
|
93
|
+
const controller = new AbortController();
|
|
94
|
+
const signal = AbortSignal.any([
|
|
95
|
+
options.signal ?? new AbortSignal(),
|
|
96
|
+
controller.signal
|
|
97
|
+
]);
|
|
98
|
+
await (0, import_promises.mkdir)(output, { recursive: true });
|
|
99
|
+
const allFiles = await (0, import_promises.readdir)(input, { recursive: true });
|
|
100
|
+
const filenames = allFiles.filter((f) => (0, import_audiobook.isAudioFile)(f));
|
|
101
|
+
if (!filenames.length) {
|
|
102
|
+
throw new Error(
|
|
103
|
+
`Failed to transcribe audio: found no audio files in ${input}`
|
|
104
|
+
);
|
|
105
|
+
}
|
|
106
|
+
const engine = options.engine ?? "whisper.cpp";
|
|
107
|
+
const model = options.model ?? "tiny.en";
|
|
108
|
+
if (engine === "whisper.cpp") {
|
|
109
|
+
await (0, import_ghost_story.ensureWhisperInstalled)({
|
|
110
|
+
model,
|
|
111
|
+
printOutput: ["debug", "info"].includes(
|
|
112
|
+
((_a = options.logger) == null ? void 0 : _a.level) ?? "silent"
|
|
113
|
+
),
|
|
114
|
+
signal
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
const transcriptions = [];
|
|
118
|
+
function aborted() {
|
|
119
|
+
return signal.aborted;
|
|
120
|
+
}
|
|
121
|
+
const perFileProgress = /* @__PURE__ */ new Map();
|
|
122
|
+
const timing = (0, import_ghost_story.createAggregator)();
|
|
123
|
+
timing.setMetadata("engine", engine);
|
|
124
|
+
timing.setMetadata("parallelization", options.parallelism ?? 1);
|
|
125
|
+
timing.setMetadata("processors", options.processors ?? 1);
|
|
126
|
+
timing.setMetadata("threads", options.threads ?? 4);
|
|
127
|
+
await Promise.all(
|
|
128
|
+
filenames.map(async (filename) => {
|
|
129
|
+
var _a2, _b, _c;
|
|
130
|
+
var _stack = [];
|
|
131
|
+
try {
|
|
132
|
+
if (aborted()) throw new Error("Aborted");
|
|
133
|
+
const filepath = (0, import_node_path.join)(input, filename);
|
|
134
|
+
const transcriptionFilepath = (0, import_node_path.join)(
|
|
135
|
+
output,
|
|
136
|
+
`${(0, import_node_path.basename)(filename, (0, import_node_path.extname)(filename))}.json`
|
|
137
|
+
);
|
|
138
|
+
try {
|
|
139
|
+
await (0, import_promises.readFile)(transcriptionFilepath, {
|
|
140
|
+
encoding: "utf-8",
|
|
141
|
+
signal
|
|
142
|
+
});
|
|
143
|
+
(_a2 = options.logger) == null ? void 0 : _a2.info(`Found existing transcription for ${filepath}`);
|
|
144
|
+
transcriptions.push(transcriptionFilepath);
|
|
145
|
+
} catch {
|
|
146
|
+
}
|
|
147
|
+
if (aborted()) throw new Error("Aborted");
|
|
148
|
+
const stack = __using(_stack, new DisposableStack());
|
|
149
|
+
stack.defer(() => {
|
|
150
|
+
semaphore.release();
|
|
151
|
+
});
|
|
152
|
+
await semaphore.wait();
|
|
153
|
+
function onFileProgress(progress) {
|
|
154
|
+
var _a3, _b2;
|
|
155
|
+
perFileProgress.set(filename, progress);
|
|
156
|
+
const updatedProgress = Array.from(perFileProgress.values()).reduce((acc, p) => acc + p) / filenames.length;
|
|
157
|
+
(_a3 = options.logger) == null ? void 0 : _a3.info(
|
|
158
|
+
`Progress: ${Math.floor(updatedProgress * 100)}%`
|
|
159
|
+
);
|
|
160
|
+
(_b2 = options.onProgress) == null ? void 0 : _b2.call(options, updatedProgress);
|
|
161
|
+
}
|
|
162
|
+
const transcription = await transcribeFile(filepath, locale, {
|
|
163
|
+
...options,
|
|
164
|
+
signal,
|
|
165
|
+
engine,
|
|
166
|
+
model,
|
|
167
|
+
processors: options.processors ?? 1,
|
|
168
|
+
threads: options.threads ?? 4,
|
|
169
|
+
onProgress: onFileProgress
|
|
170
|
+
});
|
|
171
|
+
(_b = options.logger) == null ? void 0 : _b.info(
|
|
172
|
+
(0, import_ghost_story.formatSingleReport)(
|
|
173
|
+
transcription.timing,
|
|
174
|
+
`Transcription Timing Report for ${filepath}`
|
|
175
|
+
)
|
|
176
|
+
);
|
|
177
|
+
timing.add(transcription.timing);
|
|
178
|
+
await (0, import_promises.writeFile)(
|
|
179
|
+
transcriptionFilepath,
|
|
180
|
+
JSON.stringify({
|
|
181
|
+
transcript: transcription.transcript,
|
|
182
|
+
timeline: transcription.timeline
|
|
183
|
+
}),
|
|
184
|
+
{ signal }
|
|
185
|
+
);
|
|
186
|
+
transcriptions.push(transcriptionFilepath);
|
|
187
|
+
(_c = options.onProgress) == null ? void 0 : _c.call(options, (transcriptions.length + 1) / filenames.length);
|
|
188
|
+
} catch (_) {
|
|
189
|
+
var _error = _, _hasError = true;
|
|
190
|
+
} finally {
|
|
191
|
+
__callDispose(_stack, _error, _hasError);
|
|
192
|
+
}
|
|
193
|
+
}).map(
|
|
194
|
+
(p) => p.catch((e) => {
|
|
195
|
+
controller.abort(e);
|
|
196
|
+
throw e;
|
|
197
|
+
})
|
|
198
|
+
)
|
|
199
|
+
);
|
|
200
|
+
return timing;
|
|
201
|
+
}
|
|
202
|
+
async function transcribeFile(input, locale, options) {
|
|
203
|
+
var _a, _b;
|
|
204
|
+
const audioFilepath = (0, import_node_path.resolve)(process.cwd(), input);
|
|
205
|
+
const sharedOptions = {
|
|
206
|
+
signal: options.signal,
|
|
207
|
+
language: locale.language
|
|
208
|
+
};
|
|
209
|
+
switch (options.engine) {
|
|
210
|
+
case "whisper.cpp": {
|
|
211
|
+
const fallbackVariant = getCpuOverrideVariant(
|
|
212
|
+
options.whisperCpuOverride ?? null
|
|
213
|
+
);
|
|
214
|
+
const whisperOptions = await (0, import_ghost_story.ensureWhisperInstalled)({
|
|
215
|
+
model: options.model,
|
|
216
|
+
variant: fallbackVariant,
|
|
217
|
+
printOutput: ["debug", "info"].includes(
|
|
218
|
+
((_a = options.logger) == null ? void 0 : _a.level) ?? "silent"
|
|
219
|
+
),
|
|
220
|
+
signal: options.signal
|
|
221
|
+
});
|
|
222
|
+
(_b = options.logger) == null ? void 0 : _b.info(`Transcribing audio file ${audioFilepath}`);
|
|
223
|
+
return (0, import_ghost_story.recognize)(audioFilepath, {
|
|
224
|
+
engine: options.engine,
|
|
225
|
+
options: {
|
|
226
|
+
flashAttention: true,
|
|
227
|
+
model: getWhisperCppModelId(sharedOptions.language, options.model),
|
|
228
|
+
processors: options.processors,
|
|
229
|
+
threads: options.threads,
|
|
230
|
+
onProgress: (progress) => {
|
|
231
|
+
var _a2;
|
|
232
|
+
if (options.onProgress) {
|
|
233
|
+
options.onProgress(progress);
|
|
234
|
+
return;
|
|
235
|
+
}
|
|
236
|
+
(_a2 = options.logger) == null ? void 0 : _a2.info(
|
|
237
|
+
`Transcribing ${audioFilepath} progress: ${Math.floor(progress * 100)}%`
|
|
238
|
+
);
|
|
239
|
+
},
|
|
240
|
+
...whisperOptions
|
|
241
|
+
},
|
|
242
|
+
...sharedOptions
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
case "google-cloud": {
|
|
246
|
+
if (!options.googleCloudApiKey) {
|
|
247
|
+
throw new Error(
|
|
248
|
+
"Failed to start transcription with engine google-cloud: missing API key"
|
|
249
|
+
);
|
|
250
|
+
}
|
|
251
|
+
return (0, import_ghost_story.recognize)(audioFilepath, {
|
|
252
|
+
engine: "google-cloud",
|
|
253
|
+
options: {
|
|
254
|
+
apiKey: options.googleCloudApiKey
|
|
255
|
+
},
|
|
256
|
+
...sharedOptions
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
case "microsoft-azure": {
|
|
260
|
+
if (!options.azureServiceRegion) {
|
|
261
|
+
throw new Error(
|
|
262
|
+
"Failed to start transcription with engine microsoft-azure: missing service region"
|
|
263
|
+
);
|
|
264
|
+
}
|
|
265
|
+
if (!options.azureSubscriptionKey) {
|
|
266
|
+
throw new Error(
|
|
267
|
+
"Failed to start transcription with engine microsoft-azure: missing subscription key"
|
|
268
|
+
);
|
|
269
|
+
}
|
|
270
|
+
return (0, import_ghost_story.recognize)(audioFilepath, {
|
|
271
|
+
engine: "microsoft-azure",
|
|
272
|
+
options: {
|
|
273
|
+
serviceRegion: options.azureServiceRegion,
|
|
274
|
+
subscriptionKey: options.azureSubscriptionKey
|
|
275
|
+
},
|
|
276
|
+
...sharedOptions
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
case "amazon-transcribe": {
|
|
280
|
+
if (!options.amazonTranscribeRegion) {
|
|
281
|
+
throw new Error(
|
|
282
|
+
"Failed to start transcription with engine amazon-transcribe: missing region"
|
|
283
|
+
);
|
|
284
|
+
}
|
|
285
|
+
if (!options.amazonTranscribeAccessKeyId) {
|
|
286
|
+
throw new Error(
|
|
287
|
+
"Failed to start transcription with engine amazon-transcribe: missing access key id"
|
|
288
|
+
);
|
|
289
|
+
}
|
|
290
|
+
if (!options.amazonTranscribeSecretAccessKey) {
|
|
291
|
+
throw new Error(
|
|
292
|
+
"Failed to start transcription with engine amazon-transcribe: missing access secret access key"
|
|
293
|
+
);
|
|
294
|
+
}
|
|
295
|
+
if (!options.amazonTranscribeBucketName) {
|
|
296
|
+
throw new Error(
|
|
297
|
+
"Failed to start transcription with engine amazon-transcribe: missing bucket name"
|
|
298
|
+
);
|
|
299
|
+
}
|
|
300
|
+
return (0, import_ghost_story.recognize)(audioFilepath, {
|
|
301
|
+
engine: "amazon-transcribe",
|
|
302
|
+
options: {
|
|
303
|
+
region: options.amazonTranscribeRegion,
|
|
304
|
+
accessKeyId: options.amazonTranscribeAccessKeyId,
|
|
305
|
+
secretAccessKey: options.amazonTranscribeSecretAccessKey,
|
|
306
|
+
bucketName: options.amazonTranscribeBucketName
|
|
307
|
+
},
|
|
308
|
+
...sharedOptions
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
case "openai-cloud": {
|
|
312
|
+
return (0, import_ghost_story.recognize)(audioFilepath, {
|
|
313
|
+
engine: "openai-cloud",
|
|
314
|
+
options: {
|
|
315
|
+
...options.openAiApiKey && { apiKey: options.openAiApiKey },
|
|
316
|
+
...options.openAiOrganization && {
|
|
317
|
+
organization: options.openAiOrganization
|
|
318
|
+
},
|
|
319
|
+
...options.openAiBaseUrl && { baseURL: options.openAiBaseUrl },
|
|
320
|
+
model: options.openAiModelName ?? "whisper-1"
|
|
321
|
+
},
|
|
322
|
+
...sharedOptions
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
case "whisper-server": {
|
|
326
|
+
if (!options.whisperServerUrl) {
|
|
327
|
+
throw new Error(
|
|
328
|
+
"Failed to start transcription with engine whisper-server: missing server url"
|
|
329
|
+
);
|
|
330
|
+
}
|
|
331
|
+
return (0, import_ghost_story.recognize)(audioFilepath, {
|
|
332
|
+
engine: "whisper-server",
|
|
333
|
+
options: {
|
|
334
|
+
baseURL: options.whisperServerUrl,
|
|
335
|
+
...options.whisperServerApiKey && {
|
|
336
|
+
apiKey: options.whisperServerApiKey
|
|
337
|
+
}
|
|
338
|
+
},
|
|
339
|
+
...sharedOptions
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
case "deepgram": {
|
|
343
|
+
if (!options.deepgramApiKey) {
|
|
344
|
+
throw new Error(
|
|
345
|
+
"Failed to start transcription with engine deepgram: missing api key"
|
|
346
|
+
);
|
|
347
|
+
}
|
|
348
|
+
return (0, import_ghost_story.recognize)(audioFilepath, {
|
|
349
|
+
engine: "deepgram",
|
|
350
|
+
options: {
|
|
351
|
+
apiKey: options.deepgramApiKey,
|
|
352
|
+
// nova-3 is just as cheap as nova-2 and has better performance
|
|
353
|
+
model: options.deepgramModel ?? "nova-3",
|
|
354
|
+
punctuate: true
|
|
355
|
+
},
|
|
356
|
+
...sharedOptions
|
|
357
|
+
});
|
|
358
|
+
}
|
|
359
|
+
default: {
|
|
360
|
+
throw new Error(
|
|
361
|
+
`Unknown transcription engine: ${options.engine}`
|
|
362
|
+
);
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
function getWhisperCppModelId(language, modelType) {
|
|
367
|
+
if (modelType === "large") return "large-v3-turbo";
|
|
368
|
+
if (language !== "en" || modelType.startsWith("large")) return modelType;
|
|
369
|
+
if (modelType.includes(".en")) return modelType;
|
|
370
|
+
const quant = modelType.indexOf("-q");
|
|
371
|
+
if (quant === -1) return `${modelType}.en`;
|
|
372
|
+
return `${modelType.slice(0, quant)}.en${modelType.slice(quant)}`;
|
|
373
|
+
}
|
|
374
|
+
function getCpuOverrideVariant(override) {
|
|
375
|
+
if (!override) return void 0;
|
|
376
|
+
const platform = os.platform();
|
|
377
|
+
const arch = os.arch();
|
|
378
|
+
if (platform === "linux" && arch === "x64") {
|
|
379
|
+
const variant = override === "blas" ? "linux-x64-blas" : "linux-x64-cpu";
|
|
380
|
+
return (0, import_ghost_story.applyLegacyCpuFallback)(variant);
|
|
381
|
+
}
|
|
382
|
+
if (platform === "linux" && arch === "arm64") {
|
|
383
|
+
return "linux-arm64-cpu";
|
|
384
|
+
}
|
|
385
|
+
if (platform === "darwin" && arch === "arm64") {
|
|
386
|
+
return "darwin-arm64-cpu";
|
|
387
|
+
}
|
|
388
|
+
if (platform === "darwin" && arch === "x64") {
|
|
389
|
+
return "darwin-x64-cpu";
|
|
390
|
+
}
|
|
391
|
+
if (platform === "win32") {
|
|
392
|
+
return "windows-x64-cpu";
|
|
393
|
+
}
|
|
394
|
+
return void 0;
|
|
395
|
+
}
|
|
396
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
397
|
+
0 && (module.exports = {
|
|
398
|
+
transcribe,
|
|
399
|
+
transcribeFile
|
|
400
|
+
});
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import * as _storyteller_platform_ghost_story from '@storyteller-platform/ghost-story';
|
|
2
|
+
import { RecognitionEngine, WhisperModel, TimingAggregator } from '@storyteller-platform/ghost-story';
|
|
3
|
+
import { Logger } from 'pino';
|
|
4
|
+
|
|
5
|
+
type WhisperCpuOverride = "blas" | "cpu" | null;
|
|
6
|
+
interface TranscribeOptions {
|
|
7
|
+
onProgress?: ((progress: number) => void) | null | undefined;
|
|
8
|
+
parallelism?: number | null | undefined;
|
|
9
|
+
signal?: AbortSignal | null | undefined;
|
|
10
|
+
engine?: RecognitionEngine | null | undefined;
|
|
11
|
+
model?: WhisperModel | null | undefined;
|
|
12
|
+
processors?: number | null | undefined;
|
|
13
|
+
threads?: number | null | undefined;
|
|
14
|
+
whisperCpuOverride?: WhisperCpuOverride | null | undefined;
|
|
15
|
+
logger?: Logger | null | undefined;
|
|
16
|
+
googleCloudApiKey?: string | null | undefined;
|
|
17
|
+
azureServiceRegion?: string | null | undefined;
|
|
18
|
+
azureSubscriptionKey?: string | null | undefined;
|
|
19
|
+
amazonTranscribeRegion?: string | null | undefined;
|
|
20
|
+
amazonTranscribeAccessKeyId?: string | null | undefined;
|
|
21
|
+
amazonTranscribeSecretAccessKey?: string | null | undefined;
|
|
22
|
+
amazonTranscribeBucketName?: string | null | undefined;
|
|
23
|
+
openAiApiKey?: string | null | undefined;
|
|
24
|
+
openAiOrganization?: string | null | undefined;
|
|
25
|
+
openAiBaseUrl?: string | null | undefined;
|
|
26
|
+
openAiModelName?: string | null | undefined;
|
|
27
|
+
whisperServerUrl?: string | null | undefined;
|
|
28
|
+
whisperServerApiKey?: string | null | undefined;
|
|
29
|
+
deepgramApiKey?: string | null | undefined;
|
|
30
|
+
deepgramModel?: string | null | undefined;
|
|
31
|
+
}
|
|
32
|
+
declare function transcribe(input: string, output: string, locale: Intl.Locale, options: TranscribeOptions): Promise<TimingAggregator>;
|
|
33
|
+
interface TranscribeFileOptions extends Omit<TranscribeOptions, "engine" | "model" | "processors" | "threads"> {
|
|
34
|
+
engine: RecognitionEngine;
|
|
35
|
+
model: WhisperModel;
|
|
36
|
+
processors: number;
|
|
37
|
+
threads: number;
|
|
38
|
+
}
|
|
39
|
+
declare function transcribeFile(input: string, locale: Intl.Locale, options: TranscribeFileOptions): Promise<_storyteller_platform_ghost_story.RecognitionResult>;
|
|
40
|
+
|
|
41
|
+
export { type TranscribeFileOptions, type TranscribeOptions, transcribe, transcribeFile };
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import * as _storyteller_platform_ghost_story from '@storyteller-platform/ghost-story';
|
|
2
|
+
import { RecognitionEngine, WhisperModel, TimingAggregator } from '@storyteller-platform/ghost-story';
|
|
3
|
+
import { Logger } from 'pino';
|
|
4
|
+
|
|
5
|
+
type WhisperCpuOverride = "blas" | "cpu" | null;
|
|
6
|
+
interface TranscribeOptions {
|
|
7
|
+
onProgress?: ((progress: number) => void) | null | undefined;
|
|
8
|
+
parallelism?: number | null | undefined;
|
|
9
|
+
signal?: AbortSignal | null | undefined;
|
|
10
|
+
engine?: RecognitionEngine | null | undefined;
|
|
11
|
+
model?: WhisperModel | null | undefined;
|
|
12
|
+
processors?: number | null | undefined;
|
|
13
|
+
threads?: number | null | undefined;
|
|
14
|
+
whisperCpuOverride?: WhisperCpuOverride | null | undefined;
|
|
15
|
+
logger?: Logger | null | undefined;
|
|
16
|
+
googleCloudApiKey?: string | null | undefined;
|
|
17
|
+
azureServiceRegion?: string | null | undefined;
|
|
18
|
+
azureSubscriptionKey?: string | null | undefined;
|
|
19
|
+
amazonTranscribeRegion?: string | null | undefined;
|
|
20
|
+
amazonTranscribeAccessKeyId?: string | null | undefined;
|
|
21
|
+
amazonTranscribeSecretAccessKey?: string | null | undefined;
|
|
22
|
+
amazonTranscribeBucketName?: string | null | undefined;
|
|
23
|
+
openAiApiKey?: string | null | undefined;
|
|
24
|
+
openAiOrganization?: string | null | undefined;
|
|
25
|
+
openAiBaseUrl?: string | null | undefined;
|
|
26
|
+
openAiModelName?: string | null | undefined;
|
|
27
|
+
whisperServerUrl?: string | null | undefined;
|
|
28
|
+
whisperServerApiKey?: string | null | undefined;
|
|
29
|
+
deepgramApiKey?: string | null | undefined;
|
|
30
|
+
deepgramModel?: string | null | undefined;
|
|
31
|
+
}
|
|
32
|
+
declare function transcribe(input: string, output: string, locale: Intl.Locale, options: TranscribeOptions): Promise<TimingAggregator>;
|
|
33
|
+
interface TranscribeFileOptions extends Omit<TranscribeOptions, "engine" | "model" | "processors" | "threads"> {
|
|
34
|
+
engine: RecognitionEngine;
|
|
35
|
+
model: WhisperModel;
|
|
36
|
+
processors: number;
|
|
37
|
+
threads: number;
|
|
38
|
+
}
|
|
39
|
+
declare function transcribeFile(input: string, locale: Intl.Locale, options: TranscribeFileOptions): Promise<_storyteller_platform_ghost_story.RecognitionResult>;
|
|
40
|
+
|
|
41
|
+
export { type TranscribeFileOptions, type TranscribeOptions, transcribe, transcribeFile };
|