@storyteller-platform/align 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/LICENSE.txt +21 -0
  2. package/README.md +3 -0
  3. package/dist/align/align.cjs +525 -0
  4. package/dist/align/align.d.cts +58 -0
  5. package/dist/align/align.d.ts +58 -0
  6. package/dist/align/align.js +458 -0
  7. package/dist/align/fuzzy.cjs +164 -0
  8. package/dist/align/fuzzy.d.cts +6 -0
  9. package/dist/align/fuzzy.d.ts +6 -0
  10. package/dist/align/fuzzy.js +141 -0
  11. package/dist/align/getSentenceRanges.cjs +304 -0
  12. package/dist/align/getSentenceRanges.d.cts +31 -0
  13. package/dist/align/getSentenceRanges.d.ts +31 -0
  14. package/dist/align/getSentenceRanges.js +277 -0
  15. package/dist/align/parse.cjs +63 -0
  16. package/dist/align/parse.d.cts +30 -0
  17. package/dist/align/parse.d.ts +30 -0
  18. package/dist/align/parse.js +51 -0
  19. package/dist/chunk-BIEQXUOY.js +50 -0
  20. package/dist/cli/bin.cjs +368 -0
  21. package/dist/cli/bin.d.cts +1 -0
  22. package/dist/cli/bin.d.ts +1 -0
  23. package/dist/cli/bin.js +319 -0
  24. package/dist/common/ffmpeg.cjs +232 -0
  25. package/dist/common/ffmpeg.d.cts +33 -0
  26. package/dist/common/ffmpeg.d.ts +33 -0
  27. package/dist/common/ffmpeg.js +196 -0
  28. package/dist/common/logging.cjs +45 -0
  29. package/dist/common/logging.d.cts +5 -0
  30. package/dist/common/logging.d.ts +5 -0
  31. package/dist/common/logging.js +12 -0
  32. package/dist/common/parse.cjs +73 -0
  33. package/dist/common/parse.d.cts +28 -0
  34. package/dist/common/parse.d.ts +28 -0
  35. package/dist/common/parse.js +56 -0
  36. package/dist/common/shell.cjs +30 -0
  37. package/dist/common/shell.d.cts +3 -0
  38. package/dist/common/shell.d.ts +3 -0
  39. package/dist/common/shell.js +7 -0
  40. package/dist/index.cjs +37 -0
  41. package/dist/index.d.cts +12 -0
  42. package/dist/index.d.ts +12 -0
  43. package/dist/index.js +11 -0
  44. package/dist/markup/__tests__/markup.test.cjs +464 -0
  45. package/dist/markup/__tests__/markup.test.d.cts +2 -0
  46. package/dist/markup/__tests__/markup.test.d.ts +2 -0
  47. package/dist/markup/__tests__/markup.test.js +441 -0
  48. package/dist/markup/markup.cjs +316 -0
  49. package/dist/markup/markup.d.cts +24 -0
  50. package/dist/markup/markup.d.ts +24 -0
  51. package/dist/markup/markup.js +254 -0
  52. package/dist/markup/parse.cjs +55 -0
  53. package/dist/markup/parse.d.cts +17 -0
  54. package/dist/markup/parse.d.ts +17 -0
  55. package/dist/markup/parse.js +43 -0
  56. package/dist/markup/segmentation.cjs +87 -0
  57. package/dist/markup/segmentation.d.cts +8 -0
  58. package/dist/markup/segmentation.d.ts +8 -0
  59. package/dist/markup/segmentation.js +67 -0
  60. package/dist/markup/semantics.cjs +79 -0
  61. package/dist/markup/semantics.d.cts +6 -0
  62. package/dist/markup/semantics.d.ts +6 -0
  63. package/dist/markup/semantics.js +53 -0
  64. package/dist/process/AudioEncoding.cjs +16 -0
  65. package/dist/process/AudioEncoding.d.cts +8 -0
  66. package/dist/process/AudioEncoding.d.ts +8 -0
  67. package/dist/process/AudioEncoding.js +0 -0
  68. package/dist/process/__tests__/processAudiobook.test.cjs +232 -0
  69. package/dist/process/__tests__/processAudiobook.test.d.cts +2 -0
  70. package/dist/process/__tests__/processAudiobook.test.d.ts +2 -0
  71. package/dist/process/__tests__/processAudiobook.test.js +209 -0
  72. package/dist/process/mime.cjs +43 -0
  73. package/dist/process/mime.d.cts +3 -0
  74. package/dist/process/mime.d.ts +3 -0
  75. package/dist/process/mime.js +24 -0
  76. package/dist/process/parse.cjs +84 -0
  77. package/dist/process/parse.d.cts +28 -0
  78. package/dist/process/parse.d.ts +28 -0
  79. package/dist/process/parse.js +73 -0
  80. package/dist/process/processAudiobook.cjs +220 -0
  81. package/dist/process/processAudiobook.d.cts +24 -0
  82. package/dist/process/processAudiobook.d.ts +24 -0
  83. package/dist/process/processAudiobook.js +166 -0
  84. package/dist/process/ranges.cjs +203 -0
  85. package/dist/process/ranges.d.cts +15 -0
  86. package/dist/process/ranges.d.ts +15 -0
  87. package/dist/process/ranges.js +137 -0
  88. package/dist/transcribe/parse.cjs +149 -0
  89. package/dist/transcribe/parse.d.cts +114 -0
  90. package/dist/transcribe/parse.d.ts +114 -0
  91. package/dist/transcribe/parse.js +143 -0
  92. package/dist/transcribe/transcribe.cjs +400 -0
  93. package/dist/transcribe/transcribe.d.cts +41 -0
  94. package/dist/transcribe/transcribe.d.ts +41 -0
  95. package/dist/transcribe/transcribe.js +330 -0
  96. package/package.json +96 -0
@@ -0,0 +1,330 @@
1
+ import {
2
+ __callDispose,
3
+ __using
4
+ } from "../chunk-BIEQXUOY.js";
5
+ import { mkdir, readFile, readdir, writeFile } from "node:fs/promises";
6
+ import * as os from "node:os";
7
+ import { basename, extname, join, resolve } from "node:path";
8
+ import { AsyncSemaphore } from "@esfx/async-semaphore";
9
+ import { isAudioFile } from "@storyteller-platform/audiobook";
10
+ import {
11
+ applyLegacyCpuFallback,
12
+ createAggregator,
13
+ ensureWhisperInstalled,
14
+ formatSingleReport,
15
+ recognize
16
+ } from "@storyteller-platform/ghost-story";
17
+ async function transcribe(input, output, locale, options) {
18
+ var _a;
19
+ if (process.env["DEBUG_TRANSCRIBE"] === "true") {
20
+ const inspector = await import("node:inspector");
21
+ inspector.open(9231, "0.0.0.0", true);
22
+ }
23
+ const semaphore = new AsyncSemaphore(options.parallelism ?? 1);
24
+ const controller = new AbortController();
25
+ const signal = AbortSignal.any([
26
+ options.signal ?? new AbortSignal(),
27
+ controller.signal
28
+ ]);
29
+ await mkdir(output, { recursive: true });
30
+ const allFiles = await readdir(input, { recursive: true });
31
+ const filenames = allFiles.filter((f) => isAudioFile(f));
32
+ if (!filenames.length) {
33
+ throw new Error(
34
+ `Failed to transcribe audio: found no audio files in ${input}`
35
+ );
36
+ }
37
+ const engine = options.engine ?? "whisper.cpp";
38
+ const model = options.model ?? "tiny.en";
39
+ if (engine === "whisper.cpp") {
40
+ await ensureWhisperInstalled({
41
+ model,
42
+ printOutput: ["debug", "info"].includes(
43
+ ((_a = options.logger) == null ? void 0 : _a.level) ?? "silent"
44
+ ),
45
+ signal
46
+ });
47
+ }
48
+ const transcriptions = [];
49
+ function aborted() {
50
+ return signal.aborted;
51
+ }
52
+ const perFileProgress = /* @__PURE__ */ new Map();
53
+ const timing = createAggregator();
54
+ timing.setMetadata("engine", engine);
55
+ timing.setMetadata("parallelization", options.parallelism ?? 1);
56
+ timing.setMetadata("processors", options.processors ?? 1);
57
+ timing.setMetadata("threads", options.threads ?? 4);
58
+ await Promise.all(
59
+ filenames.map(async (filename) => {
60
+ var _a2, _b, _c;
61
+ var _stack = [];
62
+ try {
63
+ if (aborted()) throw new Error("Aborted");
64
+ const filepath = join(input, filename);
65
+ const transcriptionFilepath = join(
66
+ output,
67
+ `${basename(filename, extname(filename))}.json`
68
+ );
69
+ try {
70
+ await readFile(transcriptionFilepath, {
71
+ encoding: "utf-8",
72
+ signal
73
+ });
74
+ (_a2 = options.logger) == null ? void 0 : _a2.info(`Found existing transcription for ${filepath}`);
75
+ transcriptions.push(transcriptionFilepath);
76
+ } catch {
77
+ }
78
+ if (aborted()) throw new Error("Aborted");
79
+ const stack = __using(_stack, new DisposableStack());
80
+ stack.defer(() => {
81
+ semaphore.release();
82
+ });
83
+ await semaphore.wait();
84
+ function onFileProgress(progress) {
85
+ var _a3, _b2;
86
+ perFileProgress.set(filename, progress);
87
+ const updatedProgress = Array.from(perFileProgress.values()).reduce((acc, p) => acc + p) / filenames.length;
88
+ (_a3 = options.logger) == null ? void 0 : _a3.info(
89
+ `Progress: ${Math.floor(updatedProgress * 100)}%`
90
+ );
91
+ (_b2 = options.onProgress) == null ? void 0 : _b2.call(options, updatedProgress);
92
+ }
93
+ const transcription = await transcribeFile(filepath, locale, {
94
+ ...options,
95
+ signal,
96
+ engine,
97
+ model,
98
+ processors: options.processors ?? 1,
99
+ threads: options.threads ?? 4,
100
+ onProgress: onFileProgress
101
+ });
102
+ (_b = options.logger) == null ? void 0 : _b.info(
103
+ formatSingleReport(
104
+ transcription.timing,
105
+ `Transcription Timing Report for ${filepath}`
106
+ )
107
+ );
108
+ timing.add(transcription.timing);
109
+ await writeFile(
110
+ transcriptionFilepath,
111
+ JSON.stringify({
112
+ transcript: transcription.transcript,
113
+ timeline: transcription.timeline
114
+ }),
115
+ { signal }
116
+ );
117
+ transcriptions.push(transcriptionFilepath);
118
+ (_c = options.onProgress) == null ? void 0 : _c.call(options, (transcriptions.length + 1) / filenames.length);
119
+ } catch (_) {
120
+ var _error = _, _hasError = true;
121
+ } finally {
122
+ __callDispose(_stack, _error, _hasError);
123
+ }
124
+ }).map(
125
+ (p) => p.catch((e) => {
126
+ controller.abort(e);
127
+ throw e;
128
+ })
129
+ )
130
+ );
131
+ return timing;
132
+ }
133
+ async function transcribeFile(input, locale, options) {
134
+ var _a, _b;
135
+ const audioFilepath = resolve(process.cwd(), input);
136
+ const sharedOptions = {
137
+ signal: options.signal,
138
+ language: locale.language
139
+ };
140
+ switch (options.engine) {
141
+ case "whisper.cpp": {
142
+ const fallbackVariant = getCpuOverrideVariant(
143
+ options.whisperCpuOverride ?? null
144
+ );
145
+ const whisperOptions = await ensureWhisperInstalled({
146
+ model: options.model,
147
+ variant: fallbackVariant,
148
+ printOutput: ["debug", "info"].includes(
149
+ ((_a = options.logger) == null ? void 0 : _a.level) ?? "silent"
150
+ ),
151
+ signal: options.signal
152
+ });
153
+ (_b = options.logger) == null ? void 0 : _b.info(`Transcribing audio file ${audioFilepath}`);
154
+ return recognize(audioFilepath, {
155
+ engine: options.engine,
156
+ options: {
157
+ flashAttention: true,
158
+ model: getWhisperCppModelId(sharedOptions.language, options.model),
159
+ processors: options.processors,
160
+ threads: options.threads,
161
+ onProgress: (progress) => {
162
+ var _a2;
163
+ if (options.onProgress) {
164
+ options.onProgress(progress);
165
+ return;
166
+ }
167
+ (_a2 = options.logger) == null ? void 0 : _a2.info(
168
+ `Transcribing ${audioFilepath} progress: ${Math.floor(progress * 100)}%`
169
+ );
170
+ },
171
+ ...whisperOptions
172
+ },
173
+ ...sharedOptions
174
+ });
175
+ }
176
+ case "google-cloud": {
177
+ if (!options.googleCloudApiKey) {
178
+ throw new Error(
179
+ "Failed to start transcription with engine google-cloud: missing API key"
180
+ );
181
+ }
182
+ return recognize(audioFilepath, {
183
+ engine: "google-cloud",
184
+ options: {
185
+ apiKey: options.googleCloudApiKey
186
+ },
187
+ ...sharedOptions
188
+ });
189
+ }
190
+ case "microsoft-azure": {
191
+ if (!options.azureServiceRegion) {
192
+ throw new Error(
193
+ "Failed to start transcription with engine microsoft-azure: missing service region"
194
+ );
195
+ }
196
+ if (!options.azureSubscriptionKey) {
197
+ throw new Error(
198
+ "Failed to start transcription with engine microsoft-azure: missing subscription key"
199
+ );
200
+ }
201
+ return recognize(audioFilepath, {
202
+ engine: "microsoft-azure",
203
+ options: {
204
+ serviceRegion: options.azureServiceRegion,
205
+ subscriptionKey: options.azureSubscriptionKey
206
+ },
207
+ ...sharedOptions
208
+ });
209
+ }
210
+ case "amazon-transcribe": {
211
+ if (!options.amazonTranscribeRegion) {
212
+ throw new Error(
213
+ "Failed to start transcription with engine amazon-transcribe: missing region"
214
+ );
215
+ }
216
+ if (!options.amazonTranscribeAccessKeyId) {
217
+ throw new Error(
218
+ "Failed to start transcription with engine amazon-transcribe: missing access key id"
219
+ );
220
+ }
221
+ if (!options.amazonTranscribeSecretAccessKey) {
222
+ throw new Error(
223
+ "Failed to start transcription with engine amazon-transcribe: missing access secret access key"
224
+ );
225
+ }
226
+ if (!options.amazonTranscribeBucketName) {
227
+ throw new Error(
228
+ "Failed to start transcription with engine amazon-transcribe: missing bucket name"
229
+ );
230
+ }
231
+ return recognize(audioFilepath, {
232
+ engine: "amazon-transcribe",
233
+ options: {
234
+ region: options.amazonTranscribeRegion,
235
+ accessKeyId: options.amazonTranscribeAccessKeyId,
236
+ secretAccessKey: options.amazonTranscribeSecretAccessKey,
237
+ bucketName: options.amazonTranscribeBucketName
238
+ },
239
+ ...sharedOptions
240
+ });
241
+ }
242
+ case "openai-cloud": {
243
+ return recognize(audioFilepath, {
244
+ engine: "openai-cloud",
245
+ options: {
246
+ ...options.openAiApiKey && { apiKey: options.openAiApiKey },
247
+ ...options.openAiOrganization && {
248
+ organization: options.openAiOrganization
249
+ },
250
+ ...options.openAiBaseUrl && { baseURL: options.openAiBaseUrl },
251
+ model: options.openAiModelName ?? "whisper-1"
252
+ },
253
+ ...sharedOptions
254
+ });
255
+ }
256
+ case "whisper-server": {
257
+ if (!options.whisperServerUrl) {
258
+ throw new Error(
259
+ "Failed to start transcription with engine whisper-server: missing server url"
260
+ );
261
+ }
262
+ return recognize(audioFilepath, {
263
+ engine: "whisper-server",
264
+ options: {
265
+ baseURL: options.whisperServerUrl,
266
+ ...options.whisperServerApiKey && {
267
+ apiKey: options.whisperServerApiKey
268
+ }
269
+ },
270
+ ...sharedOptions
271
+ });
272
+ }
273
+ case "deepgram": {
274
+ if (!options.deepgramApiKey) {
275
+ throw new Error(
276
+ "Failed to start transcription with engine deepgram: missing api key"
277
+ );
278
+ }
279
+ return recognize(audioFilepath, {
280
+ engine: "deepgram",
281
+ options: {
282
+ apiKey: options.deepgramApiKey,
283
+ // nova-3 is just as cheap as nova-2 and has better performance
284
+ model: options.deepgramModel ?? "nova-3",
285
+ punctuate: true
286
+ },
287
+ ...sharedOptions
288
+ });
289
+ }
290
+ default: {
291
+ throw new Error(
292
+ `Unknown transcription engine: ${options.engine}`
293
+ );
294
+ }
295
+ }
296
+ }
297
+ function getWhisperCppModelId(language, modelType) {
298
+ if (modelType === "large") return "large-v3-turbo";
299
+ if (language !== "en" || modelType.startsWith("large")) return modelType;
300
+ if (modelType.includes(".en")) return modelType;
301
+ const quant = modelType.indexOf("-q");
302
+ if (quant === -1) return `${modelType}.en`;
303
+ return `${modelType.slice(0, quant)}.en${modelType.slice(quant)}`;
304
+ }
305
+ function getCpuOverrideVariant(override) {
306
+ if (!override) return void 0;
307
+ const platform = os.platform();
308
+ const arch = os.arch();
309
+ if (platform === "linux" && arch === "x64") {
310
+ const variant = override === "blas" ? "linux-x64-blas" : "linux-x64-cpu";
311
+ return applyLegacyCpuFallback(variant);
312
+ }
313
+ if (platform === "linux" && arch === "arm64") {
314
+ return "linux-arm64-cpu";
315
+ }
316
+ if (platform === "darwin" && arch === "arm64") {
317
+ return "darwin-arm64-cpu";
318
+ }
319
+ if (platform === "darwin" && arch === "x64") {
320
+ return "darwin-x64-cpu";
321
+ }
322
+ if (platform === "win32") {
323
+ return "windows-x64-cpu";
324
+ }
325
+ return void 0;
326
+ }
327
+ export {
328
+ transcribe,
329
+ transcribeFile
330
+ };
package/package.json ADDED
@@ -0,0 +1,96 @@
1
+ {
2
+ "name": "@storyteller-platform/align",
3
+ "version": "0.0.1",
4
+ "description": "A library and CLI for automatically aligning audiobooks and EPUBs to produce Media Overlays",
5
+ "author": "Shane Friedman",
6
+ "license": "MIT",
7
+ "keywords": [
8
+ "epub",
9
+ "ebook",
10
+ "speech-to-text",
11
+ "audiobook"
12
+ ],
13
+ "repository": {
14
+ "type": "git",
15
+ "url": "https://gitlab.com/storyteller-platform/storyteller"
16
+ },
17
+ "bugs": {
18
+ "url": "https://gitlab.com/storyteller-platform/storyteller"
19
+ },
20
+ "engines": {
21
+ "node": ">=24"
22
+ },
23
+ "os": [
24
+ "win32",
25
+ "darwin",
26
+ "linux"
27
+ ],
28
+ "type": "module",
29
+ "files": [
30
+ "dist",
31
+ "README.md",
32
+ "LICENSE.txt"
33
+ ],
34
+ "exports": {
35
+ ".": {
36
+ "import": {
37
+ "types": "./dist/index.d.ts",
38
+ "default": "./dist/index.js"
39
+ },
40
+ "require": {
41
+ "types": "./dist/index.d.cts",
42
+ "default": "./dist/index.cjs"
43
+ }
44
+ }
45
+ },
46
+ "scripts": {
47
+ "compile": "NODE_OPTIONS=--experimental-import-meta-resolve ./scripts/bundle.js && node --build-sea sea-config.json",
48
+ "build": "tsup",
49
+ "prepack": "yarn build",
50
+ "test": "yarn tsx --test"
51
+ },
52
+ "bin": "./dist/cli/bin.js",
53
+ "dependencies": {
54
+ "@echogarden/icu-segmentation-wasm": "^0.2.2",
55
+ "@echogarden/text-segmentation": "^0.4.1",
56
+ "@esfx/async-semaphore": "^1.0.0",
57
+ "@optique/core": "^0.10.7",
58
+ "@optique/run": "^0.10.7",
59
+ "@storyteller-platform/audiobook": "^0.3.7",
60
+ "@storyteller-platform/epub": "^0.4.6",
61
+ "@storyteller-platform/ghost-story": "^0.1.2",
62
+ "chalk": "^5.4.1",
63
+ "cli-progress": "^3.12.0",
64
+ "esbuild": "^0.27.3",
65
+ "memoize": "^10.2.0",
66
+ "pino": "^10.3.1",
67
+ "pino-pretty": "^13.1.3",
68
+ "zod": "^3.24.0"
69
+ },
70
+ "devDependencies": {
71
+ "@storyteller-platform/eslint": "0.1.0",
72
+ "@storyteller-platform/tsup": "0.1.0",
73
+ "@tsconfig/strictest": "^2.0.5",
74
+ "@types/cli-progress": "^3",
75
+ "@types/node": "^24.0.0",
76
+ "eslint": "^8.0.0",
77
+ "tsup": "^8.5.0",
78
+ "tsx": "^4.19.2",
79
+ "typescript": "~5.8.3"
80
+ },
81
+ "publishConfig": {
82
+ "access": "public",
83
+ "exports": {
84
+ ".": {
85
+ "import": {
86
+ "types": "./dist/index.d.ts",
87
+ "default": "./dist/index.js"
88
+ },
89
+ "require": {
90
+ "types": "./dist/index.d.cts",
91
+ "default": "./dist/index.cjs"
92
+ }
93
+ }
94
+ }
95
+ }
96
+ }