@storyteller-platform/align 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +26 -0
- package/dist/align/align.cjs +6 -7
- package/dist/align/align.js +6 -7
- package/dist/cli/bin.cjs +1 -1
- package/dist/cli/bin.js +2 -2
- package/dist/common/ffmpeg.cjs +12 -12
- package/dist/common/ffmpeg.js +12 -12
- package/dist/errorAlign/backtraceGraph.cjs +5 -8
- package/dist/errorAlign/backtraceGraph.js +5 -8
- package/dist/errorAlign/beamSearch.cjs +1 -2
- package/dist/errorAlign/beamSearch.js +1 -2
- package/dist/markup/markup.cjs +3 -4
- package/dist/markup/markup.js +3 -4
- package/dist/markup/serializeDom.cjs +1 -1
- package/dist/markup/serializeDom.js +1 -1
- package/dist/process/processAudiobook.cjs +8 -12
- package/dist/process/processAudiobook.js +8 -12
- package/dist/process/ranges.cjs +3 -3
- package/dist/process/ranges.js +3 -3
- package/dist/transcribe/transcribe.cjs +9 -14
- package/dist/transcribe/transcribe.js +9 -14
- package/package.json +3 -1
- package/prebuilds/darwin-arm64/@storyteller-platform+align.node +0 -0
- package/prebuilds/linux-arm64/@storyteller-platform+align.node +0 -0
- package/prebuilds/linux-x64/@storyteller-platform+align.node +0 -0
- package/dist/align/__tests__/align.test.cjs +0 -283
- package/dist/align/__tests__/align.test.d.cts +0 -2
- package/dist/align/__tests__/align.test.d.ts +0 -2
- package/dist/align/__tests__/align.test.js +0 -219
- package/dist/align/__tests__/slugify.test.cjs +0 -64
- package/dist/align/__tests__/slugify.test.d.cts +0 -2
- package/dist/align/__tests__/slugify.test.d.ts +0 -2
- package/dist/align/__tests__/slugify.test.js +0 -41
- package/dist/errorAlign/__tests__/errorAlign.test.cjs +0 -100
- package/dist/errorAlign/__tests__/errorAlign.test.d.cts +0 -2
- package/dist/errorAlign/__tests__/errorAlign.test.d.ts +0 -2
- package/dist/errorAlign/__tests__/errorAlign.test.js +0 -77
- package/dist/errorAlign/__tests__/native.test.cjs +0 -118
- package/dist/errorAlign/__tests__/native.test.d.cts +0 -2
- package/dist/errorAlign/__tests__/native.test.d.ts +0 -2
- package/dist/errorAlign/__tests__/native.test.js +0 -107
- package/dist/markup/__tests__/markup.test.cjs +0 -491
- package/dist/markup/__tests__/markup.test.d.cts +0 -2
- package/dist/markup/__tests__/markup.test.d.ts +0 -2
- package/dist/markup/__tests__/markup.test.js +0 -468
- package/dist/markup/__tests__/parseDom.test.cjs +0 -112
- package/dist/markup/__tests__/parseDom.test.d.cts +0 -2
- package/dist/markup/__tests__/parseDom.test.d.ts +0 -2
- package/dist/markup/__tests__/parseDom.test.js +0 -89
- package/dist/markup/__tests__/serializeDom.test.cjs +0 -120
- package/dist/markup/__tests__/serializeDom.test.d.cts +0 -2
- package/dist/markup/__tests__/serializeDom.test.d.ts +0 -2
- package/dist/markup/__tests__/serializeDom.test.js +0 -97
- package/dist/markup/__tests__/transform.test.cjs +0 -122
- package/dist/markup/__tests__/transform.test.d.cts +0 -2
- package/dist/markup/__tests__/transform.test.d.ts +0 -2
- package/dist/markup/__tests__/transform.test.js +0 -99
- package/dist/process/__tests__/processAudiobook.test.cjs +0 -232
- package/dist/process/__tests__/processAudiobook.test.d.cts +0 -2
- package/dist/process/__tests__/processAudiobook.test.d.ts +0 -2
- package/dist/process/__tests__/processAudiobook.test.js +0 -209
package/binding.gyp
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"targets": [
|
|
3
|
+
{
|
|
4
|
+
"target_name": "error_align_native",
|
|
5
|
+
"sources": [
|
|
6
|
+
"src/errorAlign/cpp/edit_distance.cpp",
|
|
7
|
+
"src/errorAlign/cpp/beam_search.cpp",
|
|
8
|
+
"src/errorAlign/cpp/module.cpp"
|
|
9
|
+
],
|
|
10
|
+
"include_dirs": [
|
|
11
|
+
"<!@(node -p \"require('node-addon-api').include\")"
|
|
12
|
+
],
|
|
13
|
+
"cflags_cc": ["-std=c++17", "-fexceptions"],
|
|
14
|
+
"xcode_settings": {
|
|
15
|
+
"GCC_ENABLE_CPP_EXCEPTIONS": "YES",
|
|
16
|
+
"CLANG_CXX_LANGUAGE_STANDARD": "c++17"
|
|
17
|
+
},
|
|
18
|
+
"msvs_settings": {
|
|
19
|
+
"VCCLCompilerTool": {
|
|
20
|
+
"ExceptionHandling": 1,
|
|
21
|
+
"AdditionalOptions": ["/std:c++17"]
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
]
|
|
26
|
+
}
|
package/dist/align/align.cjs
CHANGED
|
@@ -347,7 +347,6 @@ class Aligner {
|
|
|
347
347
|
return narrowed;
|
|
348
348
|
}
|
|
349
349
|
async alignBook(onProgress) {
|
|
350
|
-
var _a, _b, _c, _d, _e, _f;
|
|
351
350
|
const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
|
|
352
351
|
this.timing.setMetadata("language", locale.toString());
|
|
353
352
|
this.timing.setMetadata("granularity", this.granularity);
|
|
@@ -358,13 +357,13 @@ class Aligner {
|
|
|
358
357
|
locale
|
|
359
358
|
);
|
|
360
359
|
for (let index = 0; index < spine.length; index++) {
|
|
361
|
-
onProgress
|
|
360
|
+
onProgress?.(index / spine.length);
|
|
362
361
|
const spineItem = spine[index];
|
|
363
|
-
|
|
362
|
+
this.logger?.info(
|
|
364
363
|
`Aligning epub item #${index} : ${(0, import_posix.basename)(spineItem.href)}`
|
|
365
364
|
);
|
|
366
365
|
const chapterId = spineItem.id;
|
|
367
|
-
if (
|
|
366
|
+
if (manifest[chapterId]?.properties?.includes("nav")) {
|
|
368
367
|
continue;
|
|
369
368
|
}
|
|
370
369
|
const chapterSentences = await this.getChapterSentences(chapterId);
|
|
@@ -375,12 +374,12 @@ class Aligner {
|
|
|
375
374
|
);
|
|
376
375
|
}
|
|
377
376
|
if (chapterSentences.length === 0) {
|
|
378
|
-
|
|
377
|
+
this.logger?.info(`Chapter #${index} has no text; skipping`);
|
|
379
378
|
continue;
|
|
380
379
|
}
|
|
381
380
|
if (chapterSentences.length < 2 && // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
382
381
|
chapterSentences[0].split(" ").length < 4) {
|
|
383
|
-
|
|
382
|
+
this.logger?.info(
|
|
384
383
|
`Chapter #${index} is fewer than four words; skipping`
|
|
385
384
|
);
|
|
386
385
|
continue;
|
|
@@ -390,7 +389,7 @@ class Aligner {
|
|
|
390
389
|
transcriptionText
|
|
391
390
|
);
|
|
392
391
|
if (!boundaries) {
|
|
393
|
-
|
|
392
|
+
this.logger?.info(
|
|
394
393
|
`Could not find chapter #${index} in the transcripton`
|
|
395
394
|
);
|
|
396
395
|
continue;
|
package/dist/align/align.js
CHANGED
|
@@ -281,7 +281,6 @@ class Aligner {
|
|
|
281
281
|
return narrowed;
|
|
282
282
|
}
|
|
283
283
|
async alignBook(onProgress) {
|
|
284
|
-
var _a, _b, _c, _d, _e, _f;
|
|
285
284
|
const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
|
|
286
285
|
this.timing.setMetadata("language", locale.toString());
|
|
287
286
|
this.timing.setMetadata("granularity", this.granularity);
|
|
@@ -292,13 +291,13 @@ class Aligner {
|
|
|
292
291
|
locale
|
|
293
292
|
);
|
|
294
293
|
for (let index = 0; index < spine.length; index++) {
|
|
295
|
-
onProgress
|
|
294
|
+
onProgress?.(index / spine.length);
|
|
296
295
|
const spineItem = spine[index];
|
|
297
|
-
|
|
296
|
+
this.logger?.info(
|
|
298
297
|
`Aligning epub item #${index} : ${basename(spineItem.href)}`
|
|
299
298
|
);
|
|
300
299
|
const chapterId = spineItem.id;
|
|
301
|
-
if (
|
|
300
|
+
if (manifest[chapterId]?.properties?.includes("nav")) {
|
|
302
301
|
continue;
|
|
303
302
|
}
|
|
304
303
|
const chapterSentences = await this.getChapterSentences(chapterId);
|
|
@@ -309,12 +308,12 @@ class Aligner {
|
|
|
309
308
|
);
|
|
310
309
|
}
|
|
311
310
|
if (chapterSentences.length === 0) {
|
|
312
|
-
|
|
311
|
+
this.logger?.info(`Chapter #${index} has no text; skipping`);
|
|
313
312
|
continue;
|
|
314
313
|
}
|
|
315
314
|
if (chapterSentences.length < 2 && // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
316
315
|
chapterSentences[0].split(" ").length < 4) {
|
|
317
|
-
|
|
316
|
+
this.logger?.info(
|
|
318
317
|
`Chapter #${index} is fewer than four words; skipping`
|
|
319
318
|
);
|
|
320
319
|
continue;
|
|
@@ -324,7 +323,7 @@ class Aligner {
|
|
|
324
323
|
transcriptionText
|
|
325
324
|
);
|
|
326
325
|
if (!boundaries) {
|
|
327
|
-
|
|
326
|
+
this.logger?.info(
|
|
328
327
|
`Could not find chapter #${index} in the transcripton`
|
|
329
328
|
);
|
|
330
329
|
continue;
|
package/dist/cli/bin.cjs
CHANGED
|
@@ -137,7 +137,7 @@ async function main() {
|
|
|
137
137
|
const controller = new AbortController();
|
|
138
138
|
let progressBar;
|
|
139
139
|
function resetProgressBar() {
|
|
140
|
-
progressBar
|
|
140
|
+
progressBar?.stop();
|
|
141
141
|
progressBar = new import_cli_progress.SingleBar(
|
|
142
142
|
{ etaBuffer: 4, hideCursor: null, noTTYOutput: !process.stderr.isTTY },
|
|
143
143
|
import_cli_progress.Presets.shades_classic
|
package/dist/cli/bin.js
CHANGED
|
@@ -23,7 +23,7 @@ import { run } from "@optique/run";
|
|
|
23
23
|
import { path } from "@optique/run/valueparser";
|
|
24
24
|
import { Presets, SingleBar } from "cli-progress";
|
|
25
25
|
import { Epub } from "@storyteller-platform/epub";
|
|
26
|
-
import packageJson from "../../package.json";
|
|
26
|
+
import packageJson from "../../package.json" with { type: "json" };
|
|
27
27
|
import { align } from "../align/align.js";
|
|
28
28
|
import { alignCommand, alignParser } from "../align/parse.js";
|
|
29
29
|
import { createLogger } from "../common/logging.js";
|
|
@@ -88,7 +88,7 @@ async function main() {
|
|
|
88
88
|
const controller = new AbortController();
|
|
89
89
|
let progressBar;
|
|
90
90
|
function resetProgressBar() {
|
|
91
|
-
progressBar
|
|
91
|
+
progressBar?.stop();
|
|
92
92
|
progressBar = new SingleBar(
|
|
93
93
|
{ etaBuffer: 4, hideCursor: null, noTTYOutput: !process.stderr.isTTY },
|
|
94
94
|
Presets.shades_classic
|
package/dist/common/ffmpeg.cjs
CHANGED
|
@@ -58,8 +58,8 @@ async function execCmd(command, logger, signal) {
|
|
|
58
58
|
"stdout maxBuffer length exceeded. This likely means that youre trying to process a very large file, and the ffmpeg process is running out of memory. Maybe check the image size of your cover art."
|
|
59
59
|
);
|
|
60
60
|
}
|
|
61
|
-
logger
|
|
62
|
-
logger
|
|
61
|
+
logger?.error(error);
|
|
62
|
+
logger?.info(stdout);
|
|
63
63
|
throw new Error(stderr);
|
|
64
64
|
}
|
|
65
65
|
}
|
|
@@ -157,8 +157,8 @@ function commonFfmpegArguments(sourceExtension, destExtension, codec, bitrate) {
|
|
|
157
157
|
}
|
|
158
158
|
async function splitFile(input, output, start, end, encoding, signal, logger) {
|
|
159
159
|
if (start === end) return false;
|
|
160
|
-
logger
|
|
161
|
-
`Splitting ${input} start: ${start} end: ${end}${
|
|
160
|
+
logger?.info(
|
|
161
|
+
`Splitting ${input} start: ${start} end: ${end}${encoding?.codec ? ` codec: ${encoding.codec}` : ""}`
|
|
162
162
|
);
|
|
163
163
|
const command = "ffmpeg";
|
|
164
164
|
const args = [
|
|
@@ -172,8 +172,8 @@ async function splitFile(input, output, start, end, encoding, signal, logger) {
|
|
|
172
172
|
...commonFfmpegArguments(
|
|
173
173
|
(0, import_node_path.extname)(input),
|
|
174
174
|
(0, import_node_path.extname)(output),
|
|
175
|
-
|
|
176
|
-
|
|
175
|
+
encoding?.codec ?? null,
|
|
176
|
+
encoding?.bitrate ?? null
|
|
177
177
|
),
|
|
178
178
|
(0, import_shell.quotePath)(output)
|
|
179
179
|
];
|
|
@@ -189,15 +189,15 @@ async function splitFile(input, output, start, end, encoding, signal, logger) {
|
|
|
189
189
|
return true;
|
|
190
190
|
}
|
|
191
191
|
async function transcodeFile(input, output, encoding, signal, logger) {
|
|
192
|
-
if (!
|
|
193
|
-
logger
|
|
192
|
+
if (!encoding?.codec && (0, import_mime.areSameType)(input, output)) {
|
|
193
|
+
logger?.info(
|
|
194
194
|
`Input and output container and codec are the same, copying ${input} to output directory`
|
|
195
195
|
);
|
|
196
196
|
await (0, import_promises.copyFile)(input, output);
|
|
197
197
|
return;
|
|
198
198
|
}
|
|
199
|
-
logger
|
|
200
|
-
`Transcoding ${input}${
|
|
199
|
+
logger?.info(
|
|
200
|
+
`Transcoding ${input}${encoding?.codec ? ` codec: ${encoding.codec}` : ""}`
|
|
201
201
|
);
|
|
202
202
|
const command = "ffmpeg";
|
|
203
203
|
const args = [
|
|
@@ -207,8 +207,8 @@ async function transcodeFile(input, output, encoding, signal, logger) {
|
|
|
207
207
|
...commonFfmpegArguments(
|
|
208
208
|
(0, import_node_path.extname)(input),
|
|
209
209
|
(0, import_node_path.extname)(output),
|
|
210
|
-
|
|
211
|
-
|
|
210
|
+
encoding?.codec ?? null,
|
|
211
|
+
encoding?.bitrate ?? null
|
|
212
212
|
),
|
|
213
213
|
(0, import_shell.quotePath)(output)
|
|
214
214
|
];
|
package/dist/common/ffmpeg.js
CHANGED
|
@@ -23,8 +23,8 @@ async function execCmd(command, logger, signal) {
|
|
|
23
23
|
"stdout maxBuffer length exceeded. This likely means that youre trying to process a very large file, and the ffmpeg process is running out of memory. Maybe check the image size of your cover art."
|
|
24
24
|
);
|
|
25
25
|
}
|
|
26
|
-
logger
|
|
27
|
-
logger
|
|
26
|
+
logger?.error(error);
|
|
27
|
+
logger?.info(stdout);
|
|
28
28
|
throw new Error(stderr);
|
|
29
29
|
}
|
|
30
30
|
}
|
|
@@ -122,8 +122,8 @@ function commonFfmpegArguments(sourceExtension, destExtension, codec, bitrate) {
|
|
|
122
122
|
}
|
|
123
123
|
async function splitFile(input, output, start, end, encoding, signal, logger) {
|
|
124
124
|
if (start === end) return false;
|
|
125
|
-
logger
|
|
126
|
-
`Splitting ${input} start: ${start} end: ${end}${
|
|
125
|
+
logger?.info(
|
|
126
|
+
`Splitting ${input} start: ${start} end: ${end}${encoding?.codec ? ` codec: ${encoding.codec}` : ""}`
|
|
127
127
|
);
|
|
128
128
|
const command = "ffmpeg";
|
|
129
129
|
const args = [
|
|
@@ -137,8 +137,8 @@ async function splitFile(input, output, start, end, encoding, signal, logger) {
|
|
|
137
137
|
...commonFfmpegArguments(
|
|
138
138
|
extname(input),
|
|
139
139
|
extname(output),
|
|
140
|
-
|
|
141
|
-
|
|
140
|
+
encoding?.codec ?? null,
|
|
141
|
+
encoding?.bitrate ?? null
|
|
142
142
|
),
|
|
143
143
|
quotePath(output)
|
|
144
144
|
];
|
|
@@ -154,15 +154,15 @@ async function splitFile(input, output, start, end, encoding, signal, logger) {
|
|
|
154
154
|
return true;
|
|
155
155
|
}
|
|
156
156
|
async function transcodeFile(input, output, encoding, signal, logger) {
|
|
157
|
-
if (!
|
|
158
|
-
logger
|
|
157
|
+
if (!encoding?.codec && areSameType(input, output)) {
|
|
158
|
+
logger?.info(
|
|
159
159
|
`Input and output container and codec are the same, copying ${input} to output directory`
|
|
160
160
|
);
|
|
161
161
|
await copyFile(input, output);
|
|
162
162
|
return;
|
|
163
163
|
}
|
|
164
|
-
logger
|
|
165
|
-
`Transcoding ${input}${
|
|
164
|
+
logger?.info(
|
|
165
|
+
`Transcoding ${input}${encoding?.codec ? ` codec: ${encoding.codec}` : ""}`
|
|
166
166
|
);
|
|
167
167
|
const command = "ffmpeg";
|
|
168
168
|
const args = [
|
|
@@ -172,8 +172,8 @@ async function transcodeFile(input, output, encoding, signal, logger) {
|
|
|
172
172
|
...commonFfmpegArguments(
|
|
173
173
|
extname(input),
|
|
174
174
|
extname(output),
|
|
175
|
-
|
|
176
|
-
|
|
175
|
+
encoding?.codec ?? null,
|
|
176
|
+
encoding?.bitrate ?? null
|
|
177
177
|
),
|
|
178
178
|
quotePath(output)
|
|
179
179
|
];
|
|
@@ -69,15 +69,14 @@ class Node {
|
|
|
69
69
|
class NodeMap {
|
|
70
70
|
map;
|
|
71
71
|
constructor(entries) {
|
|
72
|
-
const keyedEntries = entries
|
|
72
|
+
const keyedEntries = entries?.map(
|
|
73
73
|
([index, node]) => [`${index[0]}-${index[1]}`, { index, node }]
|
|
74
74
|
);
|
|
75
75
|
this.map = new Map(keyedEntries);
|
|
76
76
|
}
|
|
77
77
|
get([hypIndex, refIndex]) {
|
|
78
|
-
var _a;
|
|
79
78
|
const key = `${hypIndex}-${refIndex}`;
|
|
80
|
-
return
|
|
79
|
+
return this.map.get(key)?.node;
|
|
81
80
|
}
|
|
82
81
|
set([hypIndex, refIndex], node) {
|
|
83
82
|
const key = `${hypIndex}-${refIndex}`;
|
|
@@ -159,7 +158,7 @@ class BacktraceGraph {
|
|
|
159
158
|
*/
|
|
160
159
|
getPath({ sample } = { sample: false }) {
|
|
161
160
|
let node = this.getNode(0, 0);
|
|
162
|
-
(0, import_node_assert.default)(node
|
|
161
|
+
(0, import_node_assert.default)(node?.isRoot, "The node at (-1, -1) was expected to be a root node.");
|
|
163
162
|
const path = [];
|
|
164
163
|
while (!node.isTerminal) {
|
|
165
164
|
const opType = sample ? choose(Array.from(node.children.keys())) : (
|
|
@@ -211,7 +210,6 @@ class BacktraceGraph {
|
|
|
211
210
|
* @returns A list of index tuples representing the end node of unambiguous span matches.
|
|
212
211
|
*/
|
|
213
212
|
getUnambiguousTokenSpanMatches(ref) {
|
|
214
|
-
var _a;
|
|
215
213
|
ref = "_" + ref;
|
|
216
214
|
const monoMatchEndNodes = /* @__PURE__ */ new Set();
|
|
217
215
|
const refIndexes = new import_utils.Counter();
|
|
@@ -226,7 +224,7 @@ class BacktraceGraph {
|
|
|
226
224
|
if (!this.nodes.has(_index)) {
|
|
227
225
|
break;
|
|
228
226
|
}
|
|
229
|
-
if (!
|
|
227
|
+
if (!this.nodes.get(_index)?.parents) {
|
|
230
228
|
break;
|
|
231
229
|
}
|
|
232
230
|
if (ref[_refIndex] === import_utils.END_DELIMITER) {
|
|
@@ -272,8 +270,7 @@ class BacktraceGraph {
|
|
|
272
270
|
* Add parents to the node at the given index based on the backtrace matrix.
|
|
273
271
|
*/
|
|
274
272
|
addParentsFromBacktrace(index) {
|
|
275
|
-
|
|
276
|
-
const node = (_a = this._nodes) == null ? void 0 : _a.get(index);
|
|
273
|
+
const node = this._nodes?.get(index);
|
|
277
274
|
(0, import_node_assert.default)(
|
|
278
275
|
!!node,
|
|
279
276
|
`Node at index ${index.toString()} does not exist in the graph.`
|
|
@@ -42,15 +42,14 @@ class Node {
|
|
|
42
42
|
class NodeMap {
|
|
43
43
|
map;
|
|
44
44
|
constructor(entries) {
|
|
45
|
-
const keyedEntries = entries
|
|
45
|
+
const keyedEntries = entries?.map(
|
|
46
46
|
([index, node]) => [`${index[0]}-${index[1]}`, { index, node }]
|
|
47
47
|
);
|
|
48
48
|
this.map = new Map(keyedEntries);
|
|
49
49
|
}
|
|
50
50
|
get([hypIndex, refIndex]) {
|
|
51
|
-
var _a;
|
|
52
51
|
const key = `${hypIndex}-${refIndex}`;
|
|
53
|
-
return
|
|
52
|
+
return this.map.get(key)?.node;
|
|
54
53
|
}
|
|
55
54
|
set([hypIndex, refIndex], node) {
|
|
56
55
|
const key = `${hypIndex}-${refIndex}`;
|
|
@@ -132,7 +131,7 @@ class BacktraceGraph {
|
|
|
132
131
|
*/
|
|
133
132
|
getPath({ sample } = { sample: false }) {
|
|
134
133
|
let node = this.getNode(0, 0);
|
|
135
|
-
assert(node
|
|
134
|
+
assert(node?.isRoot, "The node at (-1, -1) was expected to be a root node.");
|
|
136
135
|
const path = [];
|
|
137
136
|
while (!node.isTerminal) {
|
|
138
137
|
const opType = sample ? choose(Array.from(node.children.keys())) : (
|
|
@@ -184,7 +183,6 @@ class BacktraceGraph {
|
|
|
184
183
|
* @returns A list of index tuples representing the end node of unambiguous span matches.
|
|
185
184
|
*/
|
|
186
185
|
getUnambiguousTokenSpanMatches(ref) {
|
|
187
|
-
var _a;
|
|
188
186
|
ref = "_" + ref;
|
|
189
187
|
const monoMatchEndNodes = /* @__PURE__ */ new Set();
|
|
190
188
|
const refIndexes = new Counter();
|
|
@@ -199,7 +197,7 @@ class BacktraceGraph {
|
|
|
199
197
|
if (!this.nodes.has(_index)) {
|
|
200
198
|
break;
|
|
201
199
|
}
|
|
202
|
-
if (!
|
|
200
|
+
if (!this.nodes.get(_index)?.parents) {
|
|
203
201
|
break;
|
|
204
202
|
}
|
|
205
203
|
if (ref[_refIndex] === END_DELIMITER) {
|
|
@@ -245,8 +243,7 @@ class BacktraceGraph {
|
|
|
245
243
|
* Add parents to the node at the given index based on the backtrace matrix.
|
|
246
244
|
*/
|
|
247
245
|
addParentsFromBacktrace(index) {
|
|
248
|
-
|
|
249
|
-
const node = (_a = this._nodes) == null ? void 0 : _a.get(index);
|
|
246
|
+
const node = this._nodes?.get(index);
|
|
250
247
|
assert(
|
|
251
248
|
!!node,
|
|
252
249
|
`Node at index ${index.toString()} does not exist in the graph.`
|
|
@@ -242,7 +242,6 @@ function isSubstitution(hypIndex, refIndex, lastHypIndex, lastRefIndex) {
|
|
|
242
242
|
return !(refIndex === lastRefIndex || hypIndex === lastHypIndex);
|
|
243
243
|
}
|
|
244
244
|
function errorAlignBeamSearch(src, beamSize = 100) {
|
|
245
|
-
var _a;
|
|
246
245
|
const startPath = new Path(src);
|
|
247
246
|
let beam = [startPath];
|
|
248
247
|
let pruneMap = {};
|
|
@@ -278,7 +277,7 @@ function errorAlignBeamSearch(src, beamSize = 100) {
|
|
|
278
277
|
return a.normCost - b.normCost;
|
|
279
278
|
});
|
|
280
279
|
beam = newBeamPaths.slice(0, beamSize);
|
|
281
|
-
if (
|
|
280
|
+
if (beam[0]?.atUnambiguousMatchNode) {
|
|
282
281
|
beam = beam.slice(0, 1);
|
|
283
282
|
pruneMap = {};
|
|
284
283
|
}
|
|
@@ -209,7 +209,6 @@ function isSubstitution(hypIndex, refIndex, lastHypIndex, lastRefIndex) {
|
|
|
209
209
|
return !(refIndex === lastRefIndex || hypIndex === lastHypIndex);
|
|
210
210
|
}
|
|
211
211
|
function errorAlignBeamSearch(src, beamSize = 100) {
|
|
212
|
-
var _a;
|
|
213
212
|
const startPath = new Path(src);
|
|
214
213
|
let beam = [startPath];
|
|
215
214
|
let pruneMap = {};
|
|
@@ -245,7 +244,7 @@ function errorAlignBeamSearch(src, beamSize = 100) {
|
|
|
245
244
|
return a.normCost - b.normCost;
|
|
246
245
|
});
|
|
247
246
|
beam = newBeamPaths.slice(0, beamSize);
|
|
248
|
-
if (
|
|
247
|
+
if (beam[0]?.atUnambiguousMatchNode) {
|
|
249
248
|
beam = beam.slice(0, 1);
|
|
250
249
|
pruneMap = {};
|
|
251
250
|
}
|
package/dist/markup/markup.cjs
CHANGED
|
@@ -77,7 +77,6 @@ var import_segmentation = require("./segmentation.cjs");
|
|
|
77
77
|
var import_serializeDom = require("./serializeDom.cjs");
|
|
78
78
|
var import_transform = require("./transform.cjs");
|
|
79
79
|
async function markup(input, output, options) {
|
|
80
|
-
var _a, _b, _c, _d;
|
|
81
80
|
var _stack = [];
|
|
82
81
|
try {
|
|
83
82
|
const timing = (0, import_ghost_story.createAggregator)();
|
|
@@ -88,13 +87,13 @@ async function markup(input, output, options) {
|
|
|
88
87
|
const spine = await epub.getSpineItems();
|
|
89
88
|
const manifest = await epub.getManifest();
|
|
90
89
|
for (let index = 0; index < spine.length; index++) {
|
|
91
|
-
|
|
90
|
+
options.onProgress?.(index / spine.length);
|
|
92
91
|
const spineItem = spine[index];
|
|
93
|
-
|
|
92
|
+
options.logger?.info(
|
|
94
93
|
`Marking up epub item #${index}: ${(0, import_posix.basename)(spineItem.href)}`
|
|
95
94
|
);
|
|
96
95
|
const chapterId = spineItem.id;
|
|
97
|
-
if (
|
|
96
|
+
if (manifest[chapterId]?.properties?.includes("nav")) {
|
|
98
97
|
continue;
|
|
99
98
|
}
|
|
100
99
|
const chapterXml = await epub.readXhtmlItemContents(chapterId);
|
package/dist/markup/markup.js
CHANGED
|
@@ -15,7 +15,6 @@ import { getXhtmlSegmentation } from "./segmentation.js";
|
|
|
15
15
|
import { serializeDom } from "./serializeDom.js";
|
|
16
16
|
import { addMark } from "./transform.js";
|
|
17
17
|
async function markup(input, output, options) {
|
|
18
|
-
var _a, _b, _c, _d;
|
|
19
18
|
var _stack = [];
|
|
20
19
|
try {
|
|
21
20
|
const timing = createAggregator();
|
|
@@ -26,13 +25,13 @@ async function markup(input, output, options) {
|
|
|
26
25
|
const spine = await epub.getSpineItems();
|
|
27
26
|
const manifest = await epub.getManifest();
|
|
28
27
|
for (let index = 0; index < spine.length; index++) {
|
|
29
|
-
|
|
28
|
+
options.onProgress?.(index / spine.length);
|
|
30
29
|
const spineItem = spine[index];
|
|
31
|
-
|
|
30
|
+
options.logger?.info(
|
|
32
31
|
`Marking up epub item #${index}: ${basename(spineItem.href)}`
|
|
33
32
|
);
|
|
34
33
|
const chapterId = spineItem.id;
|
|
35
|
-
if (
|
|
34
|
+
if (manifest[chapterId]?.properties?.includes("nav")) {
|
|
36
35
|
continue;
|
|
37
36
|
}
|
|
38
37
|
const chapterXml = await epub.readXhtmlItemContents(chapterId);
|
|
@@ -49,7 +49,7 @@ function serializeDomNodes(nodes) {
|
|
|
49
49
|
}
|
|
50
50
|
const childFirstMark = child.marks[0];
|
|
51
51
|
const lastChildFirstMark = lastChild.marks[0];
|
|
52
|
-
if (childFirstMark === lastChildFirstMark ||
|
|
52
|
+
if (childFirstMark === lastChildFirstMark || childFirstMark?.eq(lastChildFirstMark)) {
|
|
53
53
|
return [
|
|
54
54
|
...acc.slice(0, acc.length - 1),
|
|
55
55
|
[...lastPartition.slice(0, lastPartition.length), child]
|
|
@@ -26,7 +26,7 @@ function serializeDomNodes(nodes) {
|
|
|
26
26
|
}
|
|
27
27
|
const childFirstMark = child.marks[0];
|
|
28
28
|
const lastChildFirstMark = lastChild.marks[0];
|
|
29
|
-
if (childFirstMark === lastChildFirstMark ||
|
|
29
|
+
if (childFirstMark === lastChildFirstMark || childFirstMark?.eq(lastChildFirstMark)) {
|
|
30
30
|
return [
|
|
31
31
|
...acc.slice(0, acc.length - 1),
|
|
32
32
|
[...lastPartition.slice(0, lastPartition.length), child]
|
|
@@ -75,15 +75,14 @@ var import_ghost_story = require("@storyteller-platform/ghost-story");
|
|
|
75
75
|
var import_ffmpeg = require("../common/ffmpeg.cjs");
|
|
76
76
|
var import_ranges = require("./ranges.cjs");
|
|
77
77
|
async function processAudiobook(input, output, options) {
|
|
78
|
-
var _a, _b, _c;
|
|
79
78
|
const timing = (0, import_ghost_story.createAggregator)();
|
|
80
|
-
timing.setMetadata("codec",
|
|
81
|
-
timing.setMetadata("bitrate",
|
|
79
|
+
timing.setMetadata("codec", options.encoding?.codec ?? "unspecified");
|
|
80
|
+
timing.setMetadata("bitrate", options.encoding?.bitrate ?? "unspecified");
|
|
82
81
|
timing.setMetadata("max-length", `${(options.maxLength ?? 2) * 60} minutes`);
|
|
83
82
|
await (0, import_promises.mkdir)(output, { recursive: true });
|
|
84
83
|
const allFiles = await (0, import_promises.readdir)(input, { recursive: true });
|
|
85
84
|
const filenames = allFiles.filter((f) => (0, import_audiobook.isAudioFile)(f) || (0, import_audiobook.isZipArchive)(f));
|
|
86
|
-
|
|
85
|
+
options.logger?.debug(`Found ${filenames.length} files to process`);
|
|
87
86
|
const outputFiles = [];
|
|
88
87
|
const controller = new AbortController();
|
|
89
88
|
const signal = AbortSignal.any([
|
|
@@ -97,15 +96,14 @@ async function processAudiobook(input, output, options) {
|
|
|
97
96
|
if (signal.aborted) throw new Error("Aborted");
|
|
98
97
|
const filepath = (0, import_node_path.join)(input, filename);
|
|
99
98
|
function onFileProgress(progress) {
|
|
100
|
-
var _a2, _b2;
|
|
101
99
|
perFileProgress.set(index, progress);
|
|
102
100
|
const updatedProgress = Array.from(perFileProgress.values()).reduce(
|
|
103
101
|
(acc, p) => acc + p
|
|
104
102
|
);
|
|
105
|
-
|
|
103
|
+
options.logger?.info(
|
|
106
104
|
`Progress: ${Math.floor(updatedProgress * 100)}%`
|
|
107
105
|
);
|
|
108
|
-
|
|
106
|
+
options.onProgress?.(updatedProgress);
|
|
109
107
|
}
|
|
110
108
|
const fileOptions = {
|
|
111
109
|
...options,
|
|
@@ -150,12 +148,11 @@ async function processFile(input, output, prefix, options) {
|
|
|
150
148
|
);
|
|
151
149
|
await Promise.all(
|
|
152
150
|
ranges.map(async (range, index) => {
|
|
153
|
-
var _a, _b;
|
|
154
151
|
var _stack2 = [];
|
|
155
152
|
try {
|
|
156
153
|
const outputExtension = determineExtension(
|
|
157
154
|
range.filepath,
|
|
158
|
-
|
|
155
|
+
options.encoding?.codec
|
|
159
156
|
);
|
|
160
157
|
const outputFilename = `${prefix}${(index + 1).toString().padStart(5, "0")}${outputExtension}`;
|
|
161
158
|
const outputFilepath = (0, import_node_path.join)(output, outputFilename);
|
|
@@ -164,11 +161,10 @@ async function processFile(input, output, prefix, options) {
|
|
|
164
161
|
options.lock.release();
|
|
165
162
|
});
|
|
166
163
|
await options.lock.wait();
|
|
167
|
-
if (
|
|
164
|
+
if (options.signal?.aborted) throw new Error("Aborted");
|
|
168
165
|
await timing.timeAsync(
|
|
169
166
|
`${range.filepath},${range.start}:${range.end}`,
|
|
170
167
|
async () => {
|
|
171
|
-
var _a2;
|
|
172
168
|
const wasSplit = await (0, import_ffmpeg.splitFile)(
|
|
173
169
|
range.filepath,
|
|
174
170
|
outputFilepath,
|
|
@@ -180,7 +176,7 @@ async function processFile(input, output, prefix, options) {
|
|
|
180
176
|
);
|
|
181
177
|
if (wasSplit) {
|
|
182
178
|
outputFiles.push(outputFilename);
|
|
183
|
-
|
|
179
|
+
options.onProgress?.((outputFiles.length + 1) / ranges.length);
|
|
184
180
|
}
|
|
185
181
|
}
|
|
186
182
|
);
|
|
@@ -22,15 +22,14 @@ import {
|
|
|
22
22
|
import { splitFile } from "../common/ffmpeg.js";
|
|
23
23
|
import { getSafeChapterRanges } from "./ranges.js";
|
|
24
24
|
async function processAudiobook(input, output, options) {
|
|
25
|
-
var _a, _b, _c;
|
|
26
25
|
const timing = createAggregator();
|
|
27
|
-
timing.setMetadata("codec",
|
|
28
|
-
timing.setMetadata("bitrate",
|
|
26
|
+
timing.setMetadata("codec", options.encoding?.codec ?? "unspecified");
|
|
27
|
+
timing.setMetadata("bitrate", options.encoding?.bitrate ?? "unspecified");
|
|
29
28
|
timing.setMetadata("max-length", `${(options.maxLength ?? 2) * 60} minutes`);
|
|
30
29
|
await mkdir(output, { recursive: true });
|
|
31
30
|
const allFiles = await readdir(input, { recursive: true });
|
|
32
31
|
const filenames = allFiles.filter((f) => isAudioFile(f) || isZipArchive(f));
|
|
33
|
-
|
|
32
|
+
options.logger?.debug(`Found ${filenames.length} files to process`);
|
|
34
33
|
const outputFiles = [];
|
|
35
34
|
const controller = new AbortController();
|
|
36
35
|
const signal = AbortSignal.any([
|
|
@@ -44,15 +43,14 @@ async function processAudiobook(input, output, options) {
|
|
|
44
43
|
if (signal.aborted) throw new Error("Aborted");
|
|
45
44
|
const filepath = join(input, filename);
|
|
46
45
|
function onFileProgress(progress) {
|
|
47
|
-
var _a2, _b2;
|
|
48
46
|
perFileProgress.set(index, progress);
|
|
49
47
|
const updatedProgress = Array.from(perFileProgress.values()).reduce(
|
|
50
48
|
(acc, p) => acc + p
|
|
51
49
|
);
|
|
52
|
-
|
|
50
|
+
options.logger?.info(
|
|
53
51
|
`Progress: ${Math.floor(updatedProgress * 100)}%`
|
|
54
52
|
);
|
|
55
|
-
|
|
53
|
+
options.onProgress?.(updatedProgress);
|
|
56
54
|
}
|
|
57
55
|
const fileOptions = {
|
|
58
56
|
...options,
|
|
@@ -97,12 +95,11 @@ async function processFile(input, output, prefix, options) {
|
|
|
97
95
|
);
|
|
98
96
|
await Promise.all(
|
|
99
97
|
ranges.map(async (range, index) => {
|
|
100
|
-
var _a, _b;
|
|
101
98
|
var _stack2 = [];
|
|
102
99
|
try {
|
|
103
100
|
const outputExtension = determineExtension(
|
|
104
101
|
range.filepath,
|
|
105
|
-
|
|
102
|
+
options.encoding?.codec
|
|
106
103
|
);
|
|
107
104
|
const outputFilename = `${prefix}${(index + 1).toString().padStart(5, "0")}${outputExtension}`;
|
|
108
105
|
const outputFilepath = join(output, outputFilename);
|
|
@@ -111,11 +108,10 @@ async function processFile(input, output, prefix, options) {
|
|
|
111
108
|
options.lock.release();
|
|
112
109
|
});
|
|
113
110
|
await options.lock.wait();
|
|
114
|
-
if (
|
|
111
|
+
if (options.signal?.aborted) throw new Error("Aborted");
|
|
115
112
|
await timing.timeAsync(
|
|
116
113
|
`${range.filepath},${range.start}:${range.end}`,
|
|
117
114
|
async () => {
|
|
118
|
-
var _a2;
|
|
119
115
|
const wasSplit = await splitFile(
|
|
120
116
|
range.filepath,
|
|
121
117
|
outputFilepath,
|
|
@@ -127,7 +123,7 @@ async function processFile(input, output, prefix, options) {
|
|
|
127
123
|
);
|
|
128
124
|
if (wasSplit) {
|
|
129
125
|
outputFiles.push(outputFilename);
|
|
130
|
-
|
|
126
|
+
options.onProgress?.((outputFiles.length + 1) / ranges.length);
|
|
131
127
|
}
|
|
132
128
|
}
|
|
133
129
|
);
|