@storyteller-platform/align 0.1.4 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/__tests__/align.test.cjs +282 -0
- package/dist/align/__tests__/align.test.d.cts +2 -0
- package/dist/align/__tests__/align.test.d.ts +2 -0
- package/dist/align/__tests__/align.test.js +218 -0
- package/dist/align/__tests__/slugify.test.cjs +64 -0
- package/dist/align/__tests__/slugify.test.d.cts +2 -0
- package/dist/align/__tests__/slugify.test.d.ts +2 -0
- package/dist/align/__tests__/slugify.test.js +41 -0
- package/dist/align/align.cjs +41 -21
- package/dist/align/align.js +41 -21
- package/dist/align/fuzzy.cjs +1 -1
- package/dist/align/fuzzy.js +1 -1
- package/dist/align/getSentenceRanges.cjs +24 -12
- package/dist/align/getSentenceRanges.d.cts +1 -1
- package/dist/align/getSentenceRanges.d.ts +1 -1
- package/dist/align/getSentenceRanges.js +24 -12
- package/dist/align/slugify.cjs +125 -0
- package/dist/align/slugify.d.cts +8 -0
- package/dist/align/slugify.d.ts +8 -0
- package/dist/align/slugify.js +102 -0
- package/package.json +6 -3
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __knownSymbol = (name, symbol) => (symbol = Symbol[name]) ? symbol : Symbol.for("Symbol." + name);
|
|
9
|
+
var __typeError = (msg) => {
|
|
10
|
+
throw TypeError(msg);
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __using = (stack, value, async) => {
|
|
29
|
+
if (value != null) {
|
|
30
|
+
if (typeof value !== "object" && typeof value !== "function") __typeError("Object expected");
|
|
31
|
+
var dispose, inner;
|
|
32
|
+
if (async) dispose = value[__knownSymbol("asyncDispose")];
|
|
33
|
+
if (dispose === void 0) {
|
|
34
|
+
dispose = value[__knownSymbol("dispose")];
|
|
35
|
+
if (async) inner = dispose;
|
|
36
|
+
}
|
|
37
|
+
if (typeof dispose !== "function") __typeError("Object not disposable");
|
|
38
|
+
if (inner) dispose = function() {
|
|
39
|
+
try {
|
|
40
|
+
inner.call(this);
|
|
41
|
+
} catch (e) {
|
|
42
|
+
return Promise.reject(e);
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
stack.push([async, dispose, value]);
|
|
46
|
+
} else if (async) {
|
|
47
|
+
stack.push([async]);
|
|
48
|
+
}
|
|
49
|
+
return value;
|
|
50
|
+
};
|
|
51
|
+
var __callDispose = (stack, error, hasError) => {
|
|
52
|
+
var E = typeof SuppressedError === "function" ? SuppressedError : function(e, s, m, _) {
|
|
53
|
+
return _ = Error(m), _.name = "SuppressedError", _.error = e, _.suppressed = s, _;
|
|
54
|
+
};
|
|
55
|
+
var fail = (e) => error = hasError ? new E(e, error, "An error was suppressed during disposal") : (hasError = true, e);
|
|
56
|
+
var next = (it2) => {
|
|
57
|
+
while (it2 = stack.pop()) {
|
|
58
|
+
try {
|
|
59
|
+
var result = it2[1] && it2[1].call(it2[2]);
|
|
60
|
+
if (it2[0]) return Promise.resolve(result).then(next, (e) => (fail(e), next()));
|
|
61
|
+
} catch (e) {
|
|
62
|
+
fail(e);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
if (hasError) throw error;
|
|
66
|
+
};
|
|
67
|
+
return next();
|
|
68
|
+
};
|
|
69
|
+
var import_node_assert = __toESM(require("node:assert"), 1);
|
|
70
|
+
var import_promises = require("node:fs/promises");
|
|
71
|
+
var import_node_path = require("node:path");
|
|
72
|
+
var import_posix = require("node:path/posix");
|
|
73
|
+
var import_node_test = require("node:test");
|
|
74
|
+
var import_audiobook = require("@storyteller-platform/audiobook");
|
|
75
|
+
var import_epub = require("@storyteller-platform/epub");
|
|
76
|
+
var import_logging = require("../../common/logging.cjs");
|
|
77
|
+
var import_segmentation = require("../../markup/segmentation.cjs");
|
|
78
|
+
var import_align = require("../align.cjs");
|
|
79
|
+
function createTestLogger() {
|
|
80
|
+
return (0, import_logging.createLogger)(process.env["CI"] ? "silent" : "info");
|
|
81
|
+
}
|
|
82
|
+
function sanitizeFilename(title) {
|
|
83
|
+
return title.replace(/[/\\:*?"<>|]/g, "-").replace(/\s+/g, " ").trim().replace(/[.]+$/, "");
|
|
84
|
+
}
|
|
85
|
+
function truncate(input, byteLimit, suffix = "") {
|
|
86
|
+
const normalized = input.normalize("NFC");
|
|
87
|
+
const encoder = new TextEncoder();
|
|
88
|
+
let result = "";
|
|
89
|
+
for (const char of normalized) {
|
|
90
|
+
const withSuffix = result + char + suffix;
|
|
91
|
+
const byteLength = encoder.encode(withSuffix).length;
|
|
92
|
+
if (byteLength > byteLimit) break;
|
|
93
|
+
result += char;
|
|
94
|
+
}
|
|
95
|
+
return result + suffix;
|
|
96
|
+
}
|
|
97
|
+
function getSafeFilepathSegment(name, suffix = "") {
|
|
98
|
+
return truncate(sanitizeFilename(name), 150, suffix);
|
|
99
|
+
}
|
|
100
|
+
async function assertAlignSnapshot(context, epub, transcriptionFilepaths) {
|
|
101
|
+
var _a, _b, _c, _d, _e, _f;
|
|
102
|
+
const snapshotFilename = getSafeFilepathSegment(context.fullName, ".snapshot");
|
|
103
|
+
const snapshotFilepath = (0, import_node_path.join)(
|
|
104
|
+
"src",
|
|
105
|
+
"align",
|
|
106
|
+
"__snapshots__",
|
|
107
|
+
snapshotFilename
|
|
108
|
+
);
|
|
109
|
+
let newSnapshot = "";
|
|
110
|
+
const manifest = await epub.getManifest();
|
|
111
|
+
const mediaOverlayItems = Object.values(manifest).map((item) => item.mediaOverlay).filter((mediaOverlayId) => !!mediaOverlayId).map((id) => manifest[id]);
|
|
112
|
+
const mediaOverlays = [];
|
|
113
|
+
for (const item of mediaOverlayItems) {
|
|
114
|
+
const contents = await epub.readItemContents(item.id, "utf-8");
|
|
115
|
+
const parsed = import_epub.Epub.xmlParser.parse(contents);
|
|
116
|
+
mediaOverlays.push(parsed);
|
|
117
|
+
const smil = import_epub.Epub.findXmlChildByName("smil", parsed);
|
|
118
|
+
if (!smil) continue;
|
|
119
|
+
const body = import_epub.Epub.findXmlChildByName("body", import_epub.Epub.getXmlChildren(smil));
|
|
120
|
+
if (!body) continue;
|
|
121
|
+
const seq = import_epub.Epub.findXmlChildByName("seq", import_epub.Epub.getXmlChildren(body));
|
|
122
|
+
if (!seq) continue;
|
|
123
|
+
const textref = (_a = seq[":@"]) == null ? void 0 : _a["@_epub:textref"];
|
|
124
|
+
if (!textref) continue;
|
|
125
|
+
newSnapshot += `// ${(0, import_posix.basename)(textref)}
|
|
126
|
+
|
|
127
|
+
`;
|
|
128
|
+
const chapterContents = await epub.readFileContents(
|
|
129
|
+
textref,
|
|
130
|
+
item.href,
|
|
131
|
+
"utf-8"
|
|
132
|
+
);
|
|
133
|
+
const chapterXml = import_epub.Epub.xhtmlParser.parse(chapterContents);
|
|
134
|
+
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
135
|
+
import_epub.Epub.getXhtmlBody(chapterXml),
|
|
136
|
+
{
|
|
137
|
+
primaryLocale: new Intl.Locale("en-US")
|
|
138
|
+
}
|
|
139
|
+
);
|
|
140
|
+
const chapterSentences = segmentation.sentences.map((s) => s.text);
|
|
141
|
+
for (const par of import_epub.Epub.getXmlChildren(seq)) {
|
|
142
|
+
newSnapshot += `
|
|
143
|
+
`;
|
|
144
|
+
const text = import_epub.Epub.findXmlChildByName("text", import_epub.Epub.getXmlChildren(par));
|
|
145
|
+
if (!text) continue;
|
|
146
|
+
const audio = import_epub.Epub.findXmlChildByName("audio", import_epub.Epub.getXmlChildren(par));
|
|
147
|
+
if (!audio) continue;
|
|
148
|
+
const textSrc = (_b = text[":@"]) == null ? void 0 : _b["@_src"];
|
|
149
|
+
if (!textSrc) continue;
|
|
150
|
+
const sentenceId = (_c = textSrc.match(/[0-9]+$/)) == null ? void 0 : _c[0];
|
|
151
|
+
if (sentenceId === void 0) continue;
|
|
152
|
+
const textSentence = chapterSentences[parseInt(sentenceId)];
|
|
153
|
+
if (!textSentence) continue;
|
|
154
|
+
newSnapshot += `Text: ${textSentence}
|
|
155
|
+
`;
|
|
156
|
+
const audioSrc = (_d = audio[":@"]) == null ? void 0 : _d["@_src"];
|
|
157
|
+
if (!audioSrc) continue;
|
|
158
|
+
const audioStart = (_e = audio[":@"]) == null ? void 0 : _e["@_clipBegin"];
|
|
159
|
+
const audioEnd = (_f = audio[":@"]) == null ? void 0 : _f["@_clipEnd"];
|
|
160
|
+
if (!audioStart || !audioEnd) continue;
|
|
161
|
+
const audioStartTime = parseFloat(audioStart.slice(0, -1));
|
|
162
|
+
const audioEndTime = parseFloat(audioEnd.slice(0, -1));
|
|
163
|
+
const audioFilename = (0, import_posix.basename)(audioSrc, (0, import_node_path.extname)(audioSrc));
|
|
164
|
+
const transcriptionFilepath = transcriptionFilepaths.find(
|
|
165
|
+
(f) => (0, import_node_path.basename)(f, (0, import_node_path.extname)(f)) === audioFilename
|
|
166
|
+
);
|
|
167
|
+
if (!transcriptionFilepath) continue;
|
|
168
|
+
const transcription = JSON.parse(
|
|
169
|
+
await (0, import_promises.readFile)(transcriptionFilepath, { encoding: "utf-8" })
|
|
170
|
+
);
|
|
171
|
+
const transcriptionWords = [];
|
|
172
|
+
let started = false;
|
|
173
|
+
let i = 0;
|
|
174
|
+
let word = transcription.timeline[i];
|
|
175
|
+
while (word && word.endTime <= audioEndTime) {
|
|
176
|
+
if (word.startTime >= audioStartTime) {
|
|
177
|
+
started = true;
|
|
178
|
+
}
|
|
179
|
+
if (started) {
|
|
180
|
+
transcriptionWords.push(word.text);
|
|
181
|
+
}
|
|
182
|
+
word = transcription.timeline[++i];
|
|
183
|
+
}
|
|
184
|
+
const transcriptionSentence = transcriptionWords.join(" ");
|
|
185
|
+
newSnapshot += `Audio: ${transcriptionSentence}
|
|
186
|
+
`;
|
|
187
|
+
}
|
|
188
|
+
newSnapshot += `
|
|
189
|
+
`;
|
|
190
|
+
}
|
|
191
|
+
if (process.env["UPDATE_SNAPSHOTS"]) {
|
|
192
|
+
await (0, import_promises.mkdir)((0, import_node_path.dirname)(snapshotFilepath), { recursive: true });
|
|
193
|
+
await (0, import_promises.writeFile)(snapshotFilepath, newSnapshot, { encoding: "utf-8" });
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
try {
|
|
197
|
+
const existingSnapshot = await (0, import_promises.readFile)(snapshotFilepath, {
|
|
198
|
+
encoding: "utf-8"
|
|
199
|
+
});
|
|
200
|
+
const existingLines = existingSnapshot.split("\n");
|
|
201
|
+
const newLines = newSnapshot.split("\n");
|
|
202
|
+
for (let i = 0; i < existingLines.length; i++) {
|
|
203
|
+
const existingLine = existingLines[i];
|
|
204
|
+
const newLine = newLines[i];
|
|
205
|
+
if (existingLine !== newLine) {
|
|
206
|
+
import_node_assert.default.strictEqual(
|
|
207
|
+
newLines.slice(Math.max(0, i - 5), i + 5),
|
|
208
|
+
existingLines.slice(Math.max(0, i - 5), i + 5)
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
} catch (e) {
|
|
213
|
+
if (e instanceof import_node_assert.default.AssertionError) {
|
|
214
|
+
throw e;
|
|
215
|
+
}
|
|
216
|
+
throw new import_node_assert.default.AssertionError({
|
|
217
|
+
actual: newSnapshot,
|
|
218
|
+
expected: "",
|
|
219
|
+
diff: "simple"
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
void (0, import_node_test.describe)("align", () => {
|
|
224
|
+
void (0, import_node_test.it)("should align Peter and Wendy", async (context) => {
|
|
225
|
+
var _stack = [];
|
|
226
|
+
try {
|
|
227
|
+
const epub = __using(_stack, await import_epub.Epub.from(
|
|
228
|
+
(0, import_node_path.join)(
|
|
229
|
+
"src",
|
|
230
|
+
"align",
|
|
231
|
+
"__fixtures__",
|
|
232
|
+
"peter-and-wendy",
|
|
233
|
+
"text",
|
|
234
|
+
"Peter and Wendy.epub"
|
|
235
|
+
)
|
|
236
|
+
));
|
|
237
|
+
const audiobookDir = (0, import_node_path.join)(
|
|
238
|
+
"src",
|
|
239
|
+
"align",
|
|
240
|
+
"__fixtures__",
|
|
241
|
+
"peter-and-wendy",
|
|
242
|
+
"audio"
|
|
243
|
+
);
|
|
244
|
+
const audiobookFiles = await (0, import_promises.readdir)(audiobookDir).then(
|
|
245
|
+
(filenames) => filenames.filter((f) => (0, import_audiobook.isAudioFile)(f)).map((f) => (0, import_node_path.join)(audiobookDir, f))
|
|
246
|
+
);
|
|
247
|
+
const transcriptionsDir = (0, import_node_path.join)(
|
|
248
|
+
"src",
|
|
249
|
+
"align",
|
|
250
|
+
"__fixtures__",
|
|
251
|
+
"peter-and-wendy",
|
|
252
|
+
"transcriptions"
|
|
253
|
+
);
|
|
254
|
+
const transcriptionFilepaths = await (0, import_promises.readdir)(transcriptionsDir).then(
|
|
255
|
+
(filenames) => filenames.filter((f) => f.endsWith(".json")).map((f) => (0, import_node_path.join)(transcriptionsDir, f))
|
|
256
|
+
);
|
|
257
|
+
const transcriptions = await Promise.all(
|
|
258
|
+
transcriptionFilepaths.map(
|
|
259
|
+
async (p) => (0, import_promises.readFile)(p, { encoding: "utf-8" })
|
|
260
|
+
)
|
|
261
|
+
).then(
|
|
262
|
+
(contents) => contents.map(
|
|
263
|
+
(c) => JSON.parse(c)
|
|
264
|
+
)
|
|
265
|
+
);
|
|
266
|
+
const aligner = new import_align.Aligner(
|
|
267
|
+
epub,
|
|
268
|
+
audiobookFiles,
|
|
269
|
+
transcriptions,
|
|
270
|
+
"sentence",
|
|
271
|
+
void 0,
|
|
272
|
+
createTestLogger()
|
|
273
|
+
);
|
|
274
|
+
await aligner.alignBook();
|
|
275
|
+
await assertAlignSnapshot(context, epub, transcriptionFilepaths);
|
|
276
|
+
} catch (_) {
|
|
277
|
+
var _error = _, _hasError = true;
|
|
278
|
+
} finally {
|
|
279
|
+
__callDispose(_stack, _error, _hasError);
|
|
280
|
+
}
|
|
281
|
+
});
|
|
282
|
+
});
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import {
|
|
2
|
+
__callDispose,
|
|
3
|
+
__using
|
|
4
|
+
} from "../../chunk-BIEQXUOY.js";
|
|
5
|
+
import assert from "node:assert";
|
|
6
|
+
import { mkdir, readFile, readdir, writeFile } from "node:fs/promises";
|
|
7
|
+
import { basename, dirname, extname, join } from "node:path";
|
|
8
|
+
import { basename as posixBasename } from "node:path/posix";
|
|
9
|
+
import { describe, it } from "node:test";
|
|
10
|
+
import { isAudioFile } from "@storyteller-platform/audiobook";
|
|
11
|
+
import { Epub } from "@storyteller-platform/epub";
|
|
12
|
+
import { createLogger } from "../../common/logging.js";
|
|
13
|
+
import { getXhtmlSegmentation } from "../../markup/segmentation.js";
|
|
14
|
+
import { Aligner } from "../align.js";
|
|
15
|
+
function createTestLogger() {
|
|
16
|
+
return createLogger(process.env["CI"] ? "silent" : "info");
|
|
17
|
+
}
|
|
18
|
+
function sanitizeFilename(title) {
|
|
19
|
+
return title.replace(/[/\\:*?"<>|]/g, "-").replace(/\s+/g, " ").trim().replace(/[.]+$/, "");
|
|
20
|
+
}
|
|
21
|
+
function truncate(input, byteLimit, suffix = "") {
|
|
22
|
+
const normalized = input.normalize("NFC");
|
|
23
|
+
const encoder = new TextEncoder();
|
|
24
|
+
let result = "";
|
|
25
|
+
for (const char of normalized) {
|
|
26
|
+
const withSuffix = result + char + suffix;
|
|
27
|
+
const byteLength = encoder.encode(withSuffix).length;
|
|
28
|
+
if (byteLength > byteLimit) break;
|
|
29
|
+
result += char;
|
|
30
|
+
}
|
|
31
|
+
return result + suffix;
|
|
32
|
+
}
|
|
33
|
+
function getSafeFilepathSegment(name, suffix = "") {
|
|
34
|
+
return truncate(sanitizeFilename(name), 150, suffix);
|
|
35
|
+
}
|
|
36
|
+
async function assertAlignSnapshot(context, epub, transcriptionFilepaths) {
|
|
37
|
+
var _a, _b, _c, _d, _e, _f;
|
|
38
|
+
const snapshotFilename = getSafeFilepathSegment(context.fullName, ".snapshot");
|
|
39
|
+
const snapshotFilepath = join(
|
|
40
|
+
"src",
|
|
41
|
+
"align",
|
|
42
|
+
"__snapshots__",
|
|
43
|
+
snapshotFilename
|
|
44
|
+
);
|
|
45
|
+
let newSnapshot = "";
|
|
46
|
+
const manifest = await epub.getManifest();
|
|
47
|
+
const mediaOverlayItems = Object.values(manifest).map((item) => item.mediaOverlay).filter((mediaOverlayId) => !!mediaOverlayId).map((id) => manifest[id]);
|
|
48
|
+
const mediaOverlays = [];
|
|
49
|
+
for (const item of mediaOverlayItems) {
|
|
50
|
+
const contents = await epub.readItemContents(item.id, "utf-8");
|
|
51
|
+
const parsed = Epub.xmlParser.parse(contents);
|
|
52
|
+
mediaOverlays.push(parsed);
|
|
53
|
+
const smil = Epub.findXmlChildByName("smil", parsed);
|
|
54
|
+
if (!smil) continue;
|
|
55
|
+
const body = Epub.findXmlChildByName("body", Epub.getXmlChildren(smil));
|
|
56
|
+
if (!body) continue;
|
|
57
|
+
const seq = Epub.findXmlChildByName("seq", Epub.getXmlChildren(body));
|
|
58
|
+
if (!seq) continue;
|
|
59
|
+
const textref = (_a = seq[":@"]) == null ? void 0 : _a["@_epub:textref"];
|
|
60
|
+
if (!textref) continue;
|
|
61
|
+
newSnapshot += `// ${posixBasename(textref)}
|
|
62
|
+
|
|
63
|
+
`;
|
|
64
|
+
const chapterContents = await epub.readFileContents(
|
|
65
|
+
textref,
|
|
66
|
+
item.href,
|
|
67
|
+
"utf-8"
|
|
68
|
+
);
|
|
69
|
+
const chapterXml = Epub.xhtmlParser.parse(chapterContents);
|
|
70
|
+
const segmentation = await getXhtmlSegmentation(
|
|
71
|
+
Epub.getXhtmlBody(chapterXml),
|
|
72
|
+
{
|
|
73
|
+
primaryLocale: new Intl.Locale("en-US")
|
|
74
|
+
}
|
|
75
|
+
);
|
|
76
|
+
const chapterSentences = segmentation.sentences.map((s) => s.text);
|
|
77
|
+
for (const par of Epub.getXmlChildren(seq)) {
|
|
78
|
+
newSnapshot += `
|
|
79
|
+
`;
|
|
80
|
+
const text = Epub.findXmlChildByName("text", Epub.getXmlChildren(par));
|
|
81
|
+
if (!text) continue;
|
|
82
|
+
const audio = Epub.findXmlChildByName("audio", Epub.getXmlChildren(par));
|
|
83
|
+
if (!audio) continue;
|
|
84
|
+
const textSrc = (_b = text[":@"]) == null ? void 0 : _b["@_src"];
|
|
85
|
+
if (!textSrc) continue;
|
|
86
|
+
const sentenceId = (_c = textSrc.match(/[0-9]+$/)) == null ? void 0 : _c[0];
|
|
87
|
+
if (sentenceId === void 0) continue;
|
|
88
|
+
const textSentence = chapterSentences[parseInt(sentenceId)];
|
|
89
|
+
if (!textSentence) continue;
|
|
90
|
+
newSnapshot += `Text: ${textSentence}
|
|
91
|
+
`;
|
|
92
|
+
const audioSrc = (_d = audio[":@"]) == null ? void 0 : _d["@_src"];
|
|
93
|
+
if (!audioSrc) continue;
|
|
94
|
+
const audioStart = (_e = audio[":@"]) == null ? void 0 : _e["@_clipBegin"];
|
|
95
|
+
const audioEnd = (_f = audio[":@"]) == null ? void 0 : _f["@_clipEnd"];
|
|
96
|
+
if (!audioStart || !audioEnd) continue;
|
|
97
|
+
const audioStartTime = parseFloat(audioStart.slice(0, -1));
|
|
98
|
+
const audioEndTime = parseFloat(audioEnd.slice(0, -1));
|
|
99
|
+
const audioFilename = posixBasename(audioSrc, extname(audioSrc));
|
|
100
|
+
const transcriptionFilepath = transcriptionFilepaths.find(
|
|
101
|
+
(f) => basename(f, extname(f)) === audioFilename
|
|
102
|
+
);
|
|
103
|
+
if (!transcriptionFilepath) continue;
|
|
104
|
+
const transcription = JSON.parse(
|
|
105
|
+
await readFile(transcriptionFilepath, { encoding: "utf-8" })
|
|
106
|
+
);
|
|
107
|
+
const transcriptionWords = [];
|
|
108
|
+
let started = false;
|
|
109
|
+
let i = 0;
|
|
110
|
+
let word = transcription.timeline[i];
|
|
111
|
+
while (word && word.endTime <= audioEndTime) {
|
|
112
|
+
if (word.startTime >= audioStartTime) {
|
|
113
|
+
started = true;
|
|
114
|
+
}
|
|
115
|
+
if (started) {
|
|
116
|
+
transcriptionWords.push(word.text);
|
|
117
|
+
}
|
|
118
|
+
word = transcription.timeline[++i];
|
|
119
|
+
}
|
|
120
|
+
const transcriptionSentence = transcriptionWords.join(" ");
|
|
121
|
+
newSnapshot += `Audio: ${transcriptionSentence}
|
|
122
|
+
`;
|
|
123
|
+
}
|
|
124
|
+
newSnapshot += `
|
|
125
|
+
`;
|
|
126
|
+
}
|
|
127
|
+
if (process.env["UPDATE_SNAPSHOTS"]) {
|
|
128
|
+
await mkdir(dirname(snapshotFilepath), { recursive: true });
|
|
129
|
+
await writeFile(snapshotFilepath, newSnapshot, { encoding: "utf-8" });
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
try {
|
|
133
|
+
const existingSnapshot = await readFile(snapshotFilepath, {
|
|
134
|
+
encoding: "utf-8"
|
|
135
|
+
});
|
|
136
|
+
const existingLines = existingSnapshot.split("\n");
|
|
137
|
+
const newLines = newSnapshot.split("\n");
|
|
138
|
+
for (let i = 0; i < existingLines.length; i++) {
|
|
139
|
+
const existingLine = existingLines[i];
|
|
140
|
+
const newLine = newLines[i];
|
|
141
|
+
if (existingLine !== newLine) {
|
|
142
|
+
assert.strictEqual(
|
|
143
|
+
newLines.slice(Math.max(0, i - 5), i + 5),
|
|
144
|
+
existingLines.slice(Math.max(0, i - 5), i + 5)
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
} catch (e) {
|
|
149
|
+
if (e instanceof assert.AssertionError) {
|
|
150
|
+
throw e;
|
|
151
|
+
}
|
|
152
|
+
throw new assert.AssertionError({
|
|
153
|
+
actual: newSnapshot,
|
|
154
|
+
expected: "",
|
|
155
|
+
diff: "simple"
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
void describe("align", () => {
|
|
160
|
+
void it("should align Peter and Wendy", async (context) => {
|
|
161
|
+
var _stack = [];
|
|
162
|
+
try {
|
|
163
|
+
const epub = __using(_stack, await Epub.from(
|
|
164
|
+
join(
|
|
165
|
+
"src",
|
|
166
|
+
"align",
|
|
167
|
+
"__fixtures__",
|
|
168
|
+
"peter-and-wendy",
|
|
169
|
+
"text",
|
|
170
|
+
"Peter and Wendy.epub"
|
|
171
|
+
)
|
|
172
|
+
));
|
|
173
|
+
const audiobookDir = join(
|
|
174
|
+
"src",
|
|
175
|
+
"align",
|
|
176
|
+
"__fixtures__",
|
|
177
|
+
"peter-and-wendy",
|
|
178
|
+
"audio"
|
|
179
|
+
);
|
|
180
|
+
const audiobookFiles = await readdir(audiobookDir).then(
|
|
181
|
+
(filenames) => filenames.filter((f) => isAudioFile(f)).map((f) => join(audiobookDir, f))
|
|
182
|
+
);
|
|
183
|
+
const transcriptionsDir = join(
|
|
184
|
+
"src",
|
|
185
|
+
"align",
|
|
186
|
+
"__fixtures__",
|
|
187
|
+
"peter-and-wendy",
|
|
188
|
+
"transcriptions"
|
|
189
|
+
);
|
|
190
|
+
const transcriptionFilepaths = await readdir(transcriptionsDir).then(
|
|
191
|
+
(filenames) => filenames.filter((f) => f.endsWith(".json")).map((f) => join(transcriptionsDir, f))
|
|
192
|
+
);
|
|
193
|
+
const transcriptions = await Promise.all(
|
|
194
|
+
transcriptionFilepaths.map(
|
|
195
|
+
async (p) => readFile(p, { encoding: "utf-8" })
|
|
196
|
+
)
|
|
197
|
+
).then(
|
|
198
|
+
(contents) => contents.map(
|
|
199
|
+
(c) => JSON.parse(c)
|
|
200
|
+
)
|
|
201
|
+
);
|
|
202
|
+
const aligner = new Aligner(
|
|
203
|
+
epub,
|
|
204
|
+
audiobookFiles,
|
|
205
|
+
transcriptions,
|
|
206
|
+
"sentence",
|
|
207
|
+
void 0,
|
|
208
|
+
createTestLogger()
|
|
209
|
+
);
|
|
210
|
+
await aligner.alignBook();
|
|
211
|
+
await assertAlignSnapshot(context, epub, transcriptionFilepaths);
|
|
212
|
+
} catch (_) {
|
|
213
|
+
var _error = _, _hasError = true;
|
|
214
|
+
} finally {
|
|
215
|
+
__callDispose(_stack, _error, _hasError);
|
|
216
|
+
}
|
|
217
|
+
});
|
|
218
|
+
});
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __copyProps = (to, from, except, desc) => {
|
|
9
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
10
|
+
for (let key of __getOwnPropNames(from))
|
|
11
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
12
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
13
|
+
}
|
|
14
|
+
return to;
|
|
15
|
+
};
|
|
16
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
17
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
18
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
19
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
20
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
21
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
22
|
+
mod
|
|
23
|
+
));
|
|
24
|
+
var import_node_assert = __toESM(require("node:assert"), 1);
|
|
25
|
+
var import_node_test = require("node:test");
|
|
26
|
+
var import_slugify = require("../slugify.cjs");
|
|
27
|
+
void (0, import_node_test.describe)("slugify", () => {
|
|
28
|
+
void (0, import_node_test.it)("should slugify numbers", async () => {
|
|
29
|
+
const input = "There were 10 cars.";
|
|
30
|
+
const output = "there-were-ten-cars";
|
|
31
|
+
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("en"));
|
|
32
|
+
import_node_assert.default.strictEqual(result, output);
|
|
33
|
+
});
|
|
34
|
+
void (0, import_node_test.it)("should slugify numbers with decimals", async () => {
|
|
35
|
+
const input = "It was 74.6 degrees out.";
|
|
36
|
+
const output = "it-was-seventy-four-point-six-degrees-out";
|
|
37
|
+
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("en"));
|
|
38
|
+
import_node_assert.default.strictEqual(result, output);
|
|
39
|
+
});
|
|
40
|
+
void (0, import_node_test.it)("should slugify numbers with groupings", async () => {
|
|
41
|
+
const input = "It weighed over 1,000 pounds";
|
|
42
|
+
const output = "it-weighed-over-one-thousand-pounds";
|
|
43
|
+
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("en"));
|
|
44
|
+
import_node_assert.default.strictEqual(result, output);
|
|
45
|
+
});
|
|
46
|
+
void (0, import_node_test.it)("should slugify currency", async () => {
|
|
47
|
+
const input = "It costs $5,500.50.";
|
|
48
|
+
const output = "it-costs-five-thousand-five-hundred-dollars-and-fifty-cents";
|
|
49
|
+
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("en"));
|
|
50
|
+
import_node_assert.default.strictEqual(result, output);
|
|
51
|
+
});
|
|
52
|
+
void (0, import_node_test.it)("should slugify German currency", async () => {
|
|
53
|
+
const input = "Es kostet 5.500,50 \u20AC";
|
|
54
|
+
const output = "es-kostet-funf-tausend-funf-hundert-euro-und-funfzig-cent";
|
|
55
|
+
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("de"));
|
|
56
|
+
import_node_assert.default.strictEqual(result, output);
|
|
57
|
+
});
|
|
58
|
+
void (0, import_node_test.it)("should slugify Chinese currency", async () => {
|
|
59
|
+
const input = "\u4EF7\u683C\u4E3A \xA55,500.50";
|
|
60
|
+
const output = "jie-ge-wei-wu-qian-wu-bai-yuan-wu-shi-fen";
|
|
61
|
+
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("zh"));
|
|
62
|
+
import_node_assert.default.strictEqual(result, output);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import assert from "node:assert";
|
|
2
|
+
import { describe, it } from "node:test";
|
|
3
|
+
import { slugify } from "../slugify.js";
|
|
4
|
+
void describe("slugify", () => {
|
|
5
|
+
void it("should slugify numbers", async () => {
|
|
6
|
+
const input = "There were 10 cars.";
|
|
7
|
+
const output = "there-were-ten-cars";
|
|
8
|
+
const { result } = await slugify(input, new Intl.Locale("en"));
|
|
9
|
+
assert.strictEqual(result, output);
|
|
10
|
+
});
|
|
11
|
+
void it("should slugify numbers with decimals", async () => {
|
|
12
|
+
const input = "It was 74.6 degrees out.";
|
|
13
|
+
const output = "it-was-seventy-four-point-six-degrees-out";
|
|
14
|
+
const { result } = await slugify(input, new Intl.Locale("en"));
|
|
15
|
+
assert.strictEqual(result, output);
|
|
16
|
+
});
|
|
17
|
+
void it("should slugify numbers with groupings", async () => {
|
|
18
|
+
const input = "It weighed over 1,000 pounds";
|
|
19
|
+
const output = "it-weighed-over-one-thousand-pounds";
|
|
20
|
+
const { result } = await slugify(input, new Intl.Locale("en"));
|
|
21
|
+
assert.strictEqual(result, output);
|
|
22
|
+
});
|
|
23
|
+
void it("should slugify currency", async () => {
|
|
24
|
+
const input = "It costs $5,500.50.";
|
|
25
|
+
const output = "it-costs-five-thousand-five-hundred-dollars-and-fifty-cents";
|
|
26
|
+
const { result } = await slugify(input, new Intl.Locale("en"));
|
|
27
|
+
assert.strictEqual(result, output);
|
|
28
|
+
});
|
|
29
|
+
void it("should slugify German currency", async () => {
|
|
30
|
+
const input = "Es kostet 5.500,50 \u20AC";
|
|
31
|
+
const output = "es-kostet-funf-tausend-funf-hundert-euro-und-funfzig-cent";
|
|
32
|
+
const { result } = await slugify(input, new Intl.Locale("de"));
|
|
33
|
+
assert.strictEqual(result, output);
|
|
34
|
+
});
|
|
35
|
+
void it("should slugify Chinese currency", async () => {
|
|
36
|
+
const input = "\u4EF7\u683C\u4E3A \xA55,500.50";
|
|
37
|
+
const output = "jie-ge-wei-wu-qian-wu-bai-yuan-wu-shi-fen";
|
|
38
|
+
const { result } = await slugify(input, new Intl.Locale("zh"));
|
|
39
|
+
assert.strictEqual(result, output);
|
|
40
|
+
});
|
|
41
|
+
});
|