@storyteller-platform/align 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +26 -0
- package/dist/align/align.cjs +6 -7
- package/dist/align/align.js +6 -7
- package/dist/cli/bin.cjs +1 -1
- package/dist/cli/bin.js +2 -2
- package/dist/common/ffmpeg.cjs +12 -12
- package/dist/common/ffmpeg.js +12 -12
- package/dist/errorAlign/backtraceGraph.cjs +5 -8
- package/dist/errorAlign/backtraceGraph.js +5 -8
- package/dist/errorAlign/beamSearch.cjs +1 -2
- package/dist/errorAlign/beamSearch.js +1 -2
- package/dist/markup/markup.cjs +3 -4
- package/dist/markup/markup.js +3 -4
- package/dist/markup/serializeDom.cjs +1 -1
- package/dist/markup/serializeDom.js +1 -1
- package/dist/process/processAudiobook.cjs +8 -12
- package/dist/process/processAudiobook.js +8 -12
- package/dist/process/ranges.cjs +3 -3
- package/dist/process/ranges.js +3 -3
- package/dist/transcribe/transcribe.cjs +9 -14
- package/dist/transcribe/transcribe.js +9 -14
- package/package.json +3 -1
- package/prebuilds/darwin-arm64/@storyteller-platform+align.node +0 -0
- package/prebuilds/linux-arm64/@storyteller-platform+align.node +0 -0
- package/prebuilds/linux-x64/@storyteller-platform+align.node +0 -0
- package/dist/align/__tests__/align.test.cjs +0 -283
- package/dist/align/__tests__/align.test.d.cts +0 -2
- package/dist/align/__tests__/align.test.d.ts +0 -2
- package/dist/align/__tests__/align.test.js +0 -219
- package/dist/align/__tests__/slugify.test.cjs +0 -64
- package/dist/align/__tests__/slugify.test.d.cts +0 -2
- package/dist/align/__tests__/slugify.test.d.ts +0 -2
- package/dist/align/__tests__/slugify.test.js +0 -41
- package/dist/errorAlign/__tests__/errorAlign.test.cjs +0 -100
- package/dist/errorAlign/__tests__/errorAlign.test.d.cts +0 -2
- package/dist/errorAlign/__tests__/errorAlign.test.d.ts +0 -2
- package/dist/errorAlign/__tests__/errorAlign.test.js +0 -77
- package/dist/errorAlign/__tests__/native.test.cjs +0 -118
- package/dist/errorAlign/__tests__/native.test.d.cts +0 -2
- package/dist/errorAlign/__tests__/native.test.d.ts +0 -2
- package/dist/errorAlign/__tests__/native.test.js +0 -107
- package/dist/markup/__tests__/markup.test.cjs +0 -491
- package/dist/markup/__tests__/markup.test.d.cts +0 -2
- package/dist/markup/__tests__/markup.test.d.ts +0 -2
- package/dist/markup/__tests__/markup.test.js +0 -468
- package/dist/markup/__tests__/parseDom.test.cjs +0 -112
- package/dist/markup/__tests__/parseDom.test.d.cts +0 -2
- package/dist/markup/__tests__/parseDom.test.d.ts +0 -2
- package/dist/markup/__tests__/parseDom.test.js +0 -89
- package/dist/markup/__tests__/serializeDom.test.cjs +0 -120
- package/dist/markup/__tests__/serializeDom.test.d.cts +0 -2
- package/dist/markup/__tests__/serializeDom.test.d.ts +0 -2
- package/dist/markup/__tests__/serializeDom.test.js +0 -97
- package/dist/markup/__tests__/transform.test.cjs +0 -122
- package/dist/markup/__tests__/transform.test.d.cts +0 -2
- package/dist/markup/__tests__/transform.test.d.ts +0 -2
- package/dist/markup/__tests__/transform.test.js +0 -99
- package/dist/process/__tests__/processAudiobook.test.cjs +0 -232
- package/dist/process/__tests__/processAudiobook.test.d.cts +0 -2
- package/dist/process/__tests__/processAudiobook.test.d.ts +0 -2
- package/dist/process/__tests__/processAudiobook.test.js +0 -209
package/dist/process/ranges.cjs
CHANGED
|
@@ -75,7 +75,7 @@ var import_vad = require("@storyteller-platform/ghost-story/vad");
|
|
|
75
75
|
var import_ffmpeg = require("../common/ffmpeg.cjs");
|
|
76
76
|
async function getSafeChapterRanges(input, duration, chapters, maxSeconds, signal, logger) {
|
|
77
77
|
if (!chapters.length) {
|
|
78
|
-
logger
|
|
78
|
+
logger?.info(
|
|
79
79
|
`Track is longer than ${maxSeconds / 60} minutes (${duration / 60}m); using VAD to determine safe split points.`
|
|
80
80
|
);
|
|
81
81
|
const ranges2 = await getSafeRanges(input, duration, maxSeconds, 0, signal);
|
|
@@ -103,7 +103,7 @@ async function getSafeChapterRanges(input, duration, chapters, maxSeconds, signa
|
|
|
103
103
|
ranges.push(range);
|
|
104
104
|
continue;
|
|
105
105
|
}
|
|
106
|
-
logger
|
|
106
|
+
logger?.info(
|
|
107
107
|
`Chapter is longer than ${maxSeconds / 60} minutes (${duration / 60}m); using VAD to determine safe split points.`
|
|
108
108
|
);
|
|
109
109
|
const chapterRanges = await getSafeRanges(
|
|
@@ -132,7 +132,7 @@ async function getSafeRanges(input, duration, maxSeconds, start = 0, signal, log
|
|
|
132
132
|
});
|
|
133
133
|
const ranges = [{ start, end: duration + start }];
|
|
134
134
|
for (let i = 0; i + 1 < duration / maxSeconds; i++) {
|
|
135
|
-
if (signal
|
|
135
|
+
if (signal?.aborted) throw new Error("Aborted");
|
|
136
136
|
const tmpFilepath = (0, import_node_path.join)(tmpDir, i.toString(), `${rawFilename}.wav`);
|
|
137
137
|
await (0, import_promises.mkdir)((0, import_node_path.dirname)(tmpFilepath), { recursive: true });
|
|
138
138
|
const approxCutPoint = start + maxSeconds * (i + 1);
|
package/dist/process/ranges.js
CHANGED
|
@@ -10,7 +10,7 @@ import { detectVoiceActivity } from "@storyteller-platform/ghost-story/vad";
|
|
|
10
10
|
import { splitFile } from "../common/ffmpeg.js";
|
|
11
11
|
async function getSafeChapterRanges(input, duration, chapters, maxSeconds, signal, logger) {
|
|
12
12
|
if (!chapters.length) {
|
|
13
|
-
logger
|
|
13
|
+
logger?.info(
|
|
14
14
|
`Track is longer than ${maxSeconds / 60} minutes (${duration / 60}m); using VAD to determine safe split points.`
|
|
15
15
|
);
|
|
16
16
|
const ranges2 = await getSafeRanges(input, duration, maxSeconds, 0, signal);
|
|
@@ -38,7 +38,7 @@ async function getSafeChapterRanges(input, duration, chapters, maxSeconds, signa
|
|
|
38
38
|
ranges.push(range);
|
|
39
39
|
continue;
|
|
40
40
|
}
|
|
41
|
-
logger
|
|
41
|
+
logger?.info(
|
|
42
42
|
`Chapter is longer than ${maxSeconds / 60} minutes (${duration / 60}m); using VAD to determine safe split points.`
|
|
43
43
|
);
|
|
44
44
|
const chapterRanges = await getSafeRanges(
|
|
@@ -67,7 +67,7 @@ async function getSafeRanges(input, duration, maxSeconds, start = 0, signal, log
|
|
|
67
67
|
});
|
|
68
68
|
const ranges = [{ start, end: duration + start }];
|
|
69
69
|
for (let i = 0; i + 1 < duration / maxSeconds; i++) {
|
|
70
|
-
if (signal
|
|
70
|
+
if (signal?.aborted) throw new Error("Aborted");
|
|
71
71
|
const tmpFilepath = join(tmpDir, i.toString(), `${rawFilename}.wav`);
|
|
72
72
|
await mkdir(dirname(tmpFilepath), { recursive: true });
|
|
73
73
|
const approxCutPoint = start + maxSeconds * (i + 1);
|
|
@@ -84,7 +84,6 @@ var import_async_semaphore = require("@esfx/async-semaphore");
|
|
|
84
84
|
var import_audiobook = require("@storyteller-platform/audiobook");
|
|
85
85
|
var import_ghost_story = require("@storyteller-platform/ghost-story");
|
|
86
86
|
async function transcribe(input, output, locale, options) {
|
|
87
|
-
var _a;
|
|
88
87
|
if (process.env["DEBUG_TRANSCRIBE"] === "true") {
|
|
89
88
|
const inspector = await import("node:inspector");
|
|
90
89
|
inspector.open(9231, "0.0.0.0", true);
|
|
@@ -109,7 +108,7 @@ async function transcribe(input, output, locale, options) {
|
|
|
109
108
|
await (0, import_ghost_story.ensureWhisperInstalled)({
|
|
110
109
|
model,
|
|
111
110
|
printOutput: ["debug", "info"].includes(
|
|
112
|
-
|
|
111
|
+
options.logger?.level ?? "silent"
|
|
113
112
|
),
|
|
114
113
|
signal
|
|
115
114
|
});
|
|
@@ -126,7 +125,6 @@ async function transcribe(input, output, locale, options) {
|
|
|
126
125
|
timing.setMetadata("threads", options.threads ?? 4);
|
|
127
126
|
await Promise.all(
|
|
128
127
|
filenames.map(async (filename) => {
|
|
129
|
-
var _a2, _b, _c;
|
|
130
128
|
var _stack = [];
|
|
131
129
|
try {
|
|
132
130
|
if (aborted()) throw new Error("Aborted");
|
|
@@ -140,7 +138,7 @@ async function transcribe(input, output, locale, options) {
|
|
|
140
138
|
encoding: "utf-8",
|
|
141
139
|
signal
|
|
142
140
|
});
|
|
143
|
-
|
|
141
|
+
options.logger?.info(`Found existing transcription for ${filepath}`);
|
|
144
142
|
transcriptions.push(transcriptionFilepath);
|
|
145
143
|
} catch {
|
|
146
144
|
}
|
|
@@ -151,13 +149,12 @@ async function transcribe(input, output, locale, options) {
|
|
|
151
149
|
});
|
|
152
150
|
await semaphore.wait();
|
|
153
151
|
function onFileProgress(progress) {
|
|
154
|
-
var _a3, _b2;
|
|
155
152
|
perFileProgress.set(filename, progress);
|
|
156
153
|
const updatedProgress = Array.from(perFileProgress.values()).reduce((acc, p) => acc + p) / filenames.length;
|
|
157
|
-
|
|
154
|
+
options.logger?.info(
|
|
158
155
|
`Progress: ${Math.floor(updatedProgress * 100)}%`
|
|
159
156
|
);
|
|
160
|
-
|
|
157
|
+
options.onProgress?.(updatedProgress);
|
|
161
158
|
}
|
|
162
159
|
const transcription = await transcribeFile(filepath, locale, {
|
|
163
160
|
...options,
|
|
@@ -168,7 +165,7 @@ async function transcribe(input, output, locale, options) {
|
|
|
168
165
|
threads: options.threads ?? 4,
|
|
169
166
|
onProgress: onFileProgress
|
|
170
167
|
});
|
|
171
|
-
|
|
168
|
+
options.logger?.info(
|
|
172
169
|
(0, import_ghost_story.formatSingleReport)(
|
|
173
170
|
transcription.timing,
|
|
174
171
|
`Transcription Timing Report for ${filepath}`
|
|
@@ -184,7 +181,7 @@ async function transcribe(input, output, locale, options) {
|
|
|
184
181
|
{ signal }
|
|
185
182
|
);
|
|
186
183
|
transcriptions.push(transcriptionFilepath);
|
|
187
|
-
|
|
184
|
+
options.onProgress?.((transcriptions.length + 1) / filenames.length);
|
|
188
185
|
} catch (_) {
|
|
189
186
|
var _error = _, _hasError = true;
|
|
190
187
|
} finally {
|
|
@@ -200,7 +197,6 @@ async function transcribe(input, output, locale, options) {
|
|
|
200
197
|
return timing;
|
|
201
198
|
}
|
|
202
199
|
async function transcribeFile(input, locale, options) {
|
|
203
|
-
var _a, _b;
|
|
204
200
|
const audioFilepath = (0, import_node_path.resolve)(process.cwd(), input);
|
|
205
201
|
const sharedOptions = {
|
|
206
202
|
signal: options.signal,
|
|
@@ -215,11 +211,11 @@ async function transcribeFile(input, locale, options) {
|
|
|
215
211
|
model: options.model,
|
|
216
212
|
variant: fallbackVariant,
|
|
217
213
|
printOutput: ["debug", "info"].includes(
|
|
218
|
-
|
|
214
|
+
options.logger?.level ?? "silent"
|
|
219
215
|
),
|
|
220
216
|
signal: options.signal
|
|
221
217
|
});
|
|
222
|
-
|
|
218
|
+
options.logger?.info(`Transcribing audio file ${audioFilepath}`);
|
|
223
219
|
return (0, import_ghost_story.recognize)(audioFilepath, {
|
|
224
220
|
engine: options.engine,
|
|
225
221
|
options: {
|
|
@@ -228,12 +224,11 @@ async function transcribeFile(input, locale, options) {
|
|
|
228
224
|
processors: options.processors,
|
|
229
225
|
threads: options.threads,
|
|
230
226
|
onProgress: (progress) => {
|
|
231
|
-
var _a2;
|
|
232
227
|
if (options.onProgress) {
|
|
233
228
|
options.onProgress(progress);
|
|
234
229
|
return;
|
|
235
230
|
}
|
|
236
|
-
|
|
231
|
+
options.logger?.info(
|
|
237
232
|
`Transcribing ${audioFilepath} progress: ${Math.floor(progress * 100)}%`
|
|
238
233
|
);
|
|
239
234
|
},
|
|
@@ -15,7 +15,6 @@ import {
|
|
|
15
15
|
recognize
|
|
16
16
|
} from "@storyteller-platform/ghost-story";
|
|
17
17
|
async function transcribe(input, output, locale, options) {
|
|
18
|
-
var _a;
|
|
19
18
|
if (process.env["DEBUG_TRANSCRIBE"] === "true") {
|
|
20
19
|
const inspector = await import("node:inspector");
|
|
21
20
|
inspector.open(9231, "0.0.0.0", true);
|
|
@@ -40,7 +39,7 @@ async function transcribe(input, output, locale, options) {
|
|
|
40
39
|
await ensureWhisperInstalled({
|
|
41
40
|
model,
|
|
42
41
|
printOutput: ["debug", "info"].includes(
|
|
43
|
-
|
|
42
|
+
options.logger?.level ?? "silent"
|
|
44
43
|
),
|
|
45
44
|
signal
|
|
46
45
|
});
|
|
@@ -57,7 +56,6 @@ async function transcribe(input, output, locale, options) {
|
|
|
57
56
|
timing.setMetadata("threads", options.threads ?? 4);
|
|
58
57
|
await Promise.all(
|
|
59
58
|
filenames.map(async (filename) => {
|
|
60
|
-
var _a2, _b, _c;
|
|
61
59
|
var _stack = [];
|
|
62
60
|
try {
|
|
63
61
|
if (aborted()) throw new Error("Aborted");
|
|
@@ -71,7 +69,7 @@ async function transcribe(input, output, locale, options) {
|
|
|
71
69
|
encoding: "utf-8",
|
|
72
70
|
signal
|
|
73
71
|
});
|
|
74
|
-
|
|
72
|
+
options.logger?.info(`Found existing transcription for ${filepath}`);
|
|
75
73
|
transcriptions.push(transcriptionFilepath);
|
|
76
74
|
} catch {
|
|
77
75
|
}
|
|
@@ -82,13 +80,12 @@ async function transcribe(input, output, locale, options) {
|
|
|
82
80
|
});
|
|
83
81
|
await semaphore.wait();
|
|
84
82
|
function onFileProgress(progress) {
|
|
85
|
-
var _a3, _b2;
|
|
86
83
|
perFileProgress.set(filename, progress);
|
|
87
84
|
const updatedProgress = Array.from(perFileProgress.values()).reduce((acc, p) => acc + p) / filenames.length;
|
|
88
|
-
|
|
85
|
+
options.logger?.info(
|
|
89
86
|
`Progress: ${Math.floor(updatedProgress * 100)}%`
|
|
90
87
|
);
|
|
91
|
-
|
|
88
|
+
options.onProgress?.(updatedProgress);
|
|
92
89
|
}
|
|
93
90
|
const transcription = await transcribeFile(filepath, locale, {
|
|
94
91
|
...options,
|
|
@@ -99,7 +96,7 @@ async function transcribe(input, output, locale, options) {
|
|
|
99
96
|
threads: options.threads ?? 4,
|
|
100
97
|
onProgress: onFileProgress
|
|
101
98
|
});
|
|
102
|
-
|
|
99
|
+
options.logger?.info(
|
|
103
100
|
formatSingleReport(
|
|
104
101
|
transcription.timing,
|
|
105
102
|
`Transcription Timing Report for ${filepath}`
|
|
@@ -115,7 +112,7 @@ async function transcribe(input, output, locale, options) {
|
|
|
115
112
|
{ signal }
|
|
116
113
|
);
|
|
117
114
|
transcriptions.push(transcriptionFilepath);
|
|
118
|
-
|
|
115
|
+
options.onProgress?.((transcriptions.length + 1) / filenames.length);
|
|
119
116
|
} catch (_) {
|
|
120
117
|
var _error = _, _hasError = true;
|
|
121
118
|
} finally {
|
|
@@ -131,7 +128,6 @@ async function transcribe(input, output, locale, options) {
|
|
|
131
128
|
return timing;
|
|
132
129
|
}
|
|
133
130
|
async function transcribeFile(input, locale, options) {
|
|
134
|
-
var _a, _b;
|
|
135
131
|
const audioFilepath = resolve(process.cwd(), input);
|
|
136
132
|
const sharedOptions = {
|
|
137
133
|
signal: options.signal,
|
|
@@ -146,11 +142,11 @@ async function transcribeFile(input, locale, options) {
|
|
|
146
142
|
model: options.model,
|
|
147
143
|
variant: fallbackVariant,
|
|
148
144
|
printOutput: ["debug", "info"].includes(
|
|
149
|
-
|
|
145
|
+
options.logger?.level ?? "silent"
|
|
150
146
|
),
|
|
151
147
|
signal: options.signal
|
|
152
148
|
});
|
|
153
|
-
|
|
149
|
+
options.logger?.info(`Transcribing audio file ${audioFilepath}`);
|
|
154
150
|
return recognize(audioFilepath, {
|
|
155
151
|
engine: options.engine,
|
|
156
152
|
options: {
|
|
@@ -159,12 +155,11 @@ async function transcribeFile(input, locale, options) {
|
|
|
159
155
|
processors: options.processors,
|
|
160
156
|
threads: options.threads,
|
|
161
157
|
onProgress: (progress) => {
|
|
162
|
-
var _a2;
|
|
163
158
|
if (options.onProgress) {
|
|
164
159
|
options.onProgress(progress);
|
|
165
160
|
return;
|
|
166
161
|
}
|
|
167
|
-
|
|
162
|
+
options.logger?.info(
|
|
168
163
|
`Transcribing ${audioFilepath} progress: ${Math.floor(progress * 100)}%`
|
|
169
164
|
);
|
|
170
165
|
},
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@storyteller-platform/align",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.18",
|
|
4
4
|
"description": "A library and CLI for automatically aligning audiobooks and EPUBs to produce Media Overlays",
|
|
5
5
|
"author": "Shane Friedman",
|
|
6
6
|
"license": "MIT",
|
|
@@ -28,6 +28,8 @@
|
|
|
28
28
|
"type": "module",
|
|
29
29
|
"files": [
|
|
30
30
|
"dist",
|
|
31
|
+
"prebuilds",
|
|
32
|
+
"binding.gyp",
|
|
31
33
|
"README.md",
|
|
32
34
|
"LICENSE.txt"
|
|
33
35
|
],
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,283 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
|
-
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
-
var __knownSymbol = (name, symbol) => (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name);
|
|
9
|
-
var __typeError = (msg) => {
|
|
10
|
-
throw TypeError(msg);
|
|
11
|
-
};
|
|
12
|
-
var __copyProps = (to, from, except, desc) => {
|
|
13
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
-
for (let key of __getOwnPropNames(from))
|
|
15
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
-
}
|
|
18
|
-
return to;
|
|
19
|
-
};
|
|
20
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
-
mod
|
|
27
|
-
));
|
|
28
|
-
var __using = (stack, value, async) => {
|
|
29
|
-
if (value != null) {
|
|
30
|
-
if (typeof value !== "object" && typeof value !== "function") __typeError("Object expected");
|
|
31
|
-
var dispose, inner;
|
|
32
|
-
if (async) dispose = value[__knownSymbol("asyncDispose")];
|
|
33
|
-
if (dispose === void 0) {
|
|
34
|
-
dispose = value[__knownSymbol("dispose")];
|
|
35
|
-
if (async) inner = dispose;
|
|
36
|
-
}
|
|
37
|
-
if (typeof dispose !== "function") __typeError("Object not disposable");
|
|
38
|
-
if (inner) dispose = function() {
|
|
39
|
-
try {
|
|
40
|
-
inner.call(this);
|
|
41
|
-
} catch (e) {
|
|
42
|
-
return Promise.reject(e);
|
|
43
|
-
}
|
|
44
|
-
};
|
|
45
|
-
stack.push([async, dispose, value]);
|
|
46
|
-
} else if (async) {
|
|
47
|
-
stack.push([async]);
|
|
48
|
-
}
|
|
49
|
-
return value;
|
|
50
|
-
};
|
|
51
|
-
var __callDispose = (stack, error, hasError) => {
|
|
52
|
-
var E = typeof SuppressedError === "function" ? SuppressedError : function(e, s, m, _) {
|
|
53
|
-
return _ = Error(m), _.name = "SuppressedError", _.error = e, _.suppressed = s, _;
|
|
54
|
-
};
|
|
55
|
-
var fail = (e) => error = hasError ? new E(e, error, "An error was suppressed during disposal") : (hasError = true, e);
|
|
56
|
-
var next = (it2) => {
|
|
57
|
-
while (it2 = stack.pop()) {
|
|
58
|
-
try {
|
|
59
|
-
var result = it2[1] && it2[1].call(it2[2]);
|
|
60
|
-
if (it2[0]) return Promise.resolve(result).then(next, (e) => (fail(e), next()));
|
|
61
|
-
} catch (e) {
|
|
62
|
-
fail(e);
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
if (hasError) throw error;
|
|
66
|
-
};
|
|
67
|
-
return next();
|
|
68
|
-
};
|
|
69
|
-
var import_node_assert = __toESM(require("node:assert"), 1);
|
|
70
|
-
var import_promises = require("node:fs/promises");
|
|
71
|
-
var import_node_path = require("node:path");
|
|
72
|
-
var import_posix = require("node:path/posix");
|
|
73
|
-
var import_node_test = require("node:test");
|
|
74
|
-
var import_audiobook = require("@storyteller-platform/audiobook");
|
|
75
|
-
var import_epub = require("@storyteller-platform/epub");
|
|
76
|
-
var import_logging = require("../../common/logging.cjs");
|
|
77
|
-
var import_segmentation = require("../../markup/segmentation.cjs");
|
|
78
|
-
var import_align = require("../align.cjs");
|
|
79
|
-
function createTestLogger() {
|
|
80
|
-
return (0, import_logging.createLogger)(process.env["CI"] ? "silent" : "info");
|
|
81
|
-
}
|
|
82
|
-
function sanitizeFilename(title) {
|
|
83
|
-
return title.replace(/[/\\:*?"<>|]/g, "-").replace(/\s+/g, " ").trim().replace(/[.]+$/, "");
|
|
84
|
-
}
|
|
85
|
-
function truncate(input, byteLimit, suffix = "") {
|
|
86
|
-
const normalized = input.normalize("NFC");
|
|
87
|
-
const encoder = new TextEncoder();
|
|
88
|
-
let result = "";
|
|
89
|
-
for (const char of normalized) {
|
|
90
|
-
const withSuffix = result + char + suffix;
|
|
91
|
-
const byteLength = encoder.encode(withSuffix).length;
|
|
92
|
-
if (byteLength > byteLimit) break;
|
|
93
|
-
result += char;
|
|
94
|
-
}
|
|
95
|
-
return result + suffix;
|
|
96
|
-
}
|
|
97
|
-
function getSafeFilepathSegment(name, suffix = "") {
|
|
98
|
-
return truncate(sanitizeFilename(name), 150, suffix);
|
|
99
|
-
}
|
|
100
|
-
async function assertAlignSnapshot(context, epub, transcriptionFilepaths) {
|
|
101
|
-
var _a, _b, _c, _d, _e, _f;
|
|
102
|
-
const snapshotFilename = getSafeFilepathSegment(context.fullName, ".snapshot");
|
|
103
|
-
const snapshotFilepath = (0, import_node_path.join)(
|
|
104
|
-
"src",
|
|
105
|
-
"align",
|
|
106
|
-
"__snapshots__",
|
|
107
|
-
snapshotFilename
|
|
108
|
-
);
|
|
109
|
-
let newSnapshot = "";
|
|
110
|
-
const manifest = await epub.getManifest();
|
|
111
|
-
const mediaOverlayItems = Object.values(manifest).map((item) => item.mediaOverlay).filter((mediaOverlayId) => !!mediaOverlayId).map((id) => manifest[id]);
|
|
112
|
-
const mediaOverlays = [];
|
|
113
|
-
for (const item of mediaOverlayItems) {
|
|
114
|
-
const contents = await epub.readItemContents(item.id, "utf-8");
|
|
115
|
-
const parsed = import_epub.Epub.xmlParser.parse(contents);
|
|
116
|
-
mediaOverlays.push(parsed);
|
|
117
|
-
const smil = import_epub.Epub.findXmlChildByName("smil", parsed);
|
|
118
|
-
if (!smil) continue;
|
|
119
|
-
const body = import_epub.Epub.findXmlChildByName("body", import_epub.Epub.getXmlChildren(smil));
|
|
120
|
-
if (!body) continue;
|
|
121
|
-
const seq = import_epub.Epub.findXmlChildByName("seq", import_epub.Epub.getXmlChildren(body));
|
|
122
|
-
if (!seq) continue;
|
|
123
|
-
const textref = (_a = seq[":@"]) == null ? void 0 : _a["@_epub:textref"];
|
|
124
|
-
if (!textref) continue;
|
|
125
|
-
newSnapshot += `// ${(0, import_posix.basename)(textref)}
|
|
126
|
-
|
|
127
|
-
`;
|
|
128
|
-
const chapterContents = await epub.readFileContents(
|
|
129
|
-
textref,
|
|
130
|
-
item.href,
|
|
131
|
-
"utf-8"
|
|
132
|
-
);
|
|
133
|
-
const chapterXml = import_epub.Epub.xhtmlParser.parse(chapterContents);
|
|
134
|
-
const { result: segmentation } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
135
|
-
import_epub.Epub.getXhtmlBody(chapterXml),
|
|
136
|
-
{
|
|
137
|
-
primaryLocale: new Intl.Locale("en-US")
|
|
138
|
-
}
|
|
139
|
-
);
|
|
140
|
-
const chapterSentences = segmentation.map((s) => s.text).filter((s) => s.match(/\S/));
|
|
141
|
-
for (const par of import_epub.Epub.getXmlChildren(seq)) {
|
|
142
|
-
newSnapshot += `
|
|
143
|
-
`;
|
|
144
|
-
const text = import_epub.Epub.findXmlChildByName("text", import_epub.Epub.getXmlChildren(par));
|
|
145
|
-
if (!text) continue;
|
|
146
|
-
const audio = import_epub.Epub.findXmlChildByName("audio", import_epub.Epub.getXmlChildren(par));
|
|
147
|
-
if (!audio) continue;
|
|
148
|
-
const textSrc = (_b = text[":@"]) == null ? void 0 : _b["@_src"];
|
|
149
|
-
if (!textSrc) continue;
|
|
150
|
-
const sentenceId = (_c = textSrc.match(/[0-9]+$/)) == null ? void 0 : _c[0];
|
|
151
|
-
if (sentenceId === void 0) continue;
|
|
152
|
-
const textSentence = chapterSentences[parseInt(sentenceId)];
|
|
153
|
-
if (!textSentence) continue;
|
|
154
|
-
newSnapshot += `Text: ${textSentence.replace(/\n/, "")}
|
|
155
|
-
`;
|
|
156
|
-
const audioSrc = (_d = audio[":@"]) == null ? void 0 : _d["@_src"];
|
|
157
|
-
if (!audioSrc) continue;
|
|
158
|
-
const audioStart = (_e = audio[":@"]) == null ? void 0 : _e["@_clipBegin"];
|
|
159
|
-
const audioEnd = (_f = audio[":@"]) == null ? void 0 : _f["@_clipEnd"];
|
|
160
|
-
if (!audioStart || !audioEnd) continue;
|
|
161
|
-
const audioStartTime = parseFloat(audioStart.slice(0, -1)) - 2e-3;
|
|
162
|
-
const audioEndTime = parseFloat(audioEnd.slice(0, -1));
|
|
163
|
-
const audioFilename = (0, import_posix.basename)(audioSrc, (0, import_node_path.extname)(audioSrc));
|
|
164
|
-
const transcriptionFilepath = transcriptionFilepaths.find(
|
|
165
|
-
(f) => (0, import_node_path.basename)(f, (0, import_node_path.extname)(f)) === audioFilename
|
|
166
|
-
);
|
|
167
|
-
if (!transcriptionFilepath) continue;
|
|
168
|
-
const transcription = JSON.parse(
|
|
169
|
-
await (0, import_promises.readFile)(transcriptionFilepath, { encoding: "utf-8" })
|
|
170
|
-
);
|
|
171
|
-
const transcriptionWords = [];
|
|
172
|
-
let started = false;
|
|
173
|
-
let i = 0;
|
|
174
|
-
let word = transcription.timeline[i];
|
|
175
|
-
while (word && word.endTime <= audioEndTime) {
|
|
176
|
-
if (word.startTime >= audioStartTime) {
|
|
177
|
-
started = true;
|
|
178
|
-
}
|
|
179
|
-
if (started) {
|
|
180
|
-
transcriptionWords.push(word.text);
|
|
181
|
-
}
|
|
182
|
-
word = transcription.timeline[++i];
|
|
183
|
-
}
|
|
184
|
-
const transcriptionSentence = transcriptionWords.join(" ");
|
|
185
|
-
newSnapshot += `Audio: ${transcriptionSentence}
|
|
186
|
-
`;
|
|
187
|
-
}
|
|
188
|
-
newSnapshot += `
|
|
189
|
-
`;
|
|
190
|
-
}
|
|
191
|
-
if (process.env["UPDATE_SNAPSHOTS"]) {
|
|
192
|
-
await (0, import_promises.mkdir)((0, import_node_path.dirname)(snapshotFilepath), { recursive: true });
|
|
193
|
-
await (0, import_promises.writeFile)(snapshotFilepath, newSnapshot, { encoding: "utf-8" });
|
|
194
|
-
return;
|
|
195
|
-
}
|
|
196
|
-
try {
|
|
197
|
-
const existingSnapshot = await (0, import_promises.readFile)(snapshotFilepath, {
|
|
198
|
-
encoding: "utf-8"
|
|
199
|
-
});
|
|
200
|
-
const existingLines = existingSnapshot.split("\n");
|
|
201
|
-
const newLines = newSnapshot.split("\n");
|
|
202
|
-
for (let i = 0; i < existingLines.length; i++) {
|
|
203
|
-
const existingLine = existingLines[i];
|
|
204
|
-
const newLine = newLines[i];
|
|
205
|
-
if (existingLine !== newLine) {
|
|
206
|
-
import_node_assert.default.strictEqual(
|
|
207
|
-
newLines.slice(Math.max(0, i - 5), i + 5),
|
|
208
|
-
existingLines.slice(Math.max(0, i - 5), i + 5)
|
|
209
|
-
);
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
} catch (e) {
|
|
213
|
-
if (e instanceof import_node_assert.default.AssertionError) {
|
|
214
|
-
throw e;
|
|
215
|
-
}
|
|
216
|
-
throw new import_node_assert.default.AssertionError({
|
|
217
|
-
actual: newSnapshot,
|
|
218
|
-
expected: "",
|
|
219
|
-
diff: "simple"
|
|
220
|
-
});
|
|
221
|
-
}
|
|
222
|
-
}
|
|
223
|
-
void (0, import_node_test.describe)("align", () => {
|
|
224
|
-
void (0, import_node_test.it)("should align Peter and Wendy", async (context) => {
|
|
225
|
-
var _stack = [];
|
|
226
|
-
try {
|
|
227
|
-
const epub = __using(_stack, await import_epub.Epub.from(
|
|
228
|
-
(0, import_node_path.join)(
|
|
229
|
-
"src",
|
|
230
|
-
"align",
|
|
231
|
-
"__fixtures__",
|
|
232
|
-
"peter-and-wendy",
|
|
233
|
-
"text",
|
|
234
|
-
"Peter and Wendy.epub"
|
|
235
|
-
)
|
|
236
|
-
));
|
|
237
|
-
const audiobookDir = (0, import_node_path.join)(
|
|
238
|
-
"src",
|
|
239
|
-
"align",
|
|
240
|
-
"__fixtures__",
|
|
241
|
-
"peter-and-wendy",
|
|
242
|
-
"audio"
|
|
243
|
-
);
|
|
244
|
-
const audiobookFiles = await (0, import_promises.readdir)(audiobookDir).then(
|
|
245
|
-
(filenames) => filenames.filter((f) => (0, import_audiobook.isAudioFile)(f)).map((f) => (0, import_node_path.join)(audiobookDir, f))
|
|
246
|
-
);
|
|
247
|
-
const transcriptionsDir = (0, import_node_path.join)(
|
|
248
|
-
"src",
|
|
249
|
-
"align",
|
|
250
|
-
"__fixtures__",
|
|
251
|
-
"peter-and-wendy",
|
|
252
|
-
"transcriptions"
|
|
253
|
-
);
|
|
254
|
-
const transcriptionFilepaths = await (0, import_promises.readdir)(transcriptionsDir).then(
|
|
255
|
-
(filenames) => filenames.filter((f) => f.endsWith(".json")).map((f) => (0, import_node_path.join)(transcriptionsDir, f))
|
|
256
|
-
);
|
|
257
|
-
const transcriptions = await Promise.all(
|
|
258
|
-
transcriptionFilepaths.map(
|
|
259
|
-
async (p) => (0, import_promises.readFile)(p, { encoding: "utf-8" })
|
|
260
|
-
)
|
|
261
|
-
).then(
|
|
262
|
-
(contents) => contents.map(
|
|
263
|
-
(c) => JSON.parse(c)
|
|
264
|
-
)
|
|
265
|
-
);
|
|
266
|
-
const aligner = new import_align.Aligner(
|
|
267
|
-
epub,
|
|
268
|
-
audiobookFiles,
|
|
269
|
-
transcriptions,
|
|
270
|
-
"sentence",
|
|
271
|
-
void 0,
|
|
272
|
-
createTestLogger()
|
|
273
|
-
);
|
|
274
|
-
const timing = await aligner.alignBook();
|
|
275
|
-
if (!process.env["CI"]) timing.print();
|
|
276
|
-
await assertAlignSnapshot(context, epub, transcriptionFilepaths);
|
|
277
|
-
} catch (_) {
|
|
278
|
-
var _error = _, _hasError = true;
|
|
279
|
-
} finally {
|
|
280
|
-
__callDispose(_stack, _error, _hasError);
|
|
281
|
-
}
|
|
282
|
-
});
|
|
283
|
-
});
|