@storyteller-platform/align 0.1.27 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/align.cjs +217 -78
- package/dist/align/align.d.cts +1 -0
- package/dist/align/align.d.ts +1 -0
- package/dist/align/align.js +231 -81
- package/dist/align/getSentenceRanges.cjs +4 -1
- package/dist/align/getSentenceRanges.js +4 -1
- package/dist/align/parse.cjs +14 -3
- package/dist/align/parse.d.cts +3 -0
- package/dist/align/parse.d.ts +3 -0
- package/dist/align/parse.js +14 -3
- package/dist/align/textFragments.cjs +60 -99
- package/dist/align/textFragments.d.cts +5 -18
- package/dist/align/textFragments.d.ts +5 -18
- package/dist/align/textFragments.js +60 -99
- package/dist/cli/bin.cjs +2 -0
- package/dist/cli/bin.js +2 -0
- package/dist/errorAlign/graphMetadata.cjs +2 -2
- package/dist/errorAlign/graphMetadata.js +1 -1
- package/dist/markup/parseDom.cjs +1 -1
- package/dist/markup/parseDom.js +1 -1
- package/dist/readium/guidedNavigation.cjs +234 -0
- package/dist/readium/guidedNavigation.d.cts +7 -0
- package/dist/readium/guidedNavigation.d.ts +7 -0
- package/dist/readium/guidedNavigation.js +210 -0
- package/dist/readium/manifest.cjs +260 -0
- package/dist/readium/manifest.d.cts +6 -0
- package/dist/readium/manifest.d.ts +6 -0
- package/dist/readium/manifest.js +242 -0
- package/dist/snapshot/snapshot.cjs +13 -1
- package/dist/snapshot/snapshot.js +16 -2
- package/dist/types/smil-clockvalue.d.cjs +1 -0
- package/dist/types/smil-clockvalue.d.d.cts +3 -0
- package/dist/types/smil-clockvalue.d.d.ts +3 -0
- package/dist/types/smil-clockvalue.d.js +0 -0
- package/package.json +7 -2
package/dist/align/align.js
CHANGED
|
@@ -2,11 +2,24 @@ import {
|
|
|
2
2
|
__callDispose,
|
|
3
3
|
__using
|
|
4
4
|
} from "../chunk-BIEQXUOY.js";
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
5
|
+
import { randomUUID } from "node:crypto";
|
|
6
|
+
import { createWriteStream } from "node:fs";
|
|
7
|
+
import {
|
|
8
|
+
copyFile,
|
|
9
|
+
cp,
|
|
10
|
+
mkdir,
|
|
11
|
+
readFile,
|
|
12
|
+
readdir,
|
|
13
|
+
rm,
|
|
14
|
+
writeFile
|
|
15
|
+
} from "node:fs/promises";
|
|
16
|
+
import { tmpdir } from "node:os";
|
|
17
|
+
import { dirname as autoDirname, extname, join as autoJoin } from "node:path";
|
|
7
18
|
import { basename, dirname, parse, relative } from "node:path/posix";
|
|
19
|
+
import { LocalizedString } from "@readium/shared";
|
|
8
20
|
import { enumerate, max } from "itertools";
|
|
9
21
|
import memoize from "memoize";
|
|
22
|
+
import { ZipFile } from "yazl";
|
|
10
23
|
import { isAudioFile, lookupAudioMime } from "@storyteller-platform/audiobook";
|
|
11
24
|
import {
|
|
12
25
|
Epub
|
|
@@ -19,6 +32,10 @@ import { getTrackDuration } from "../common/ffmpeg.js";
|
|
|
19
32
|
import { parseDom } from "../markup/parseDom.js";
|
|
20
33
|
import { segmentChapter } from "../markup/segmentation.js";
|
|
21
34
|
import { inlineFootnotes, liftText } from "../markup/transform.js";
|
|
35
|
+
import {
|
|
36
|
+
generateGuidedNavigationDocuments,
|
|
37
|
+
generateGuidedNavigationManifest
|
|
38
|
+
} from "../readium/guidedNavigation.js";
|
|
22
39
|
import {
|
|
23
40
|
collapseSentenceRangeGaps,
|
|
24
41
|
expandEmptySentenceRanges,
|
|
@@ -29,16 +46,19 @@ import {
|
|
|
29
46
|
import { interpolateSentenceRanges } from "./interpolateSentenceRanges.js";
|
|
30
47
|
import { findBoundaries } from "./search.js";
|
|
31
48
|
import { slugify } from "./slugify.js";
|
|
32
|
-
import {
|
|
49
|
+
import { TextFragmentFactory } from "./textFragments.js";
|
|
33
50
|
async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
34
|
-
var
|
|
51
|
+
var _stack2 = [];
|
|
35
52
|
try {
|
|
36
|
-
|
|
37
|
-
|
|
53
|
+
const outFormat = options.outFormat ?? "epub";
|
|
54
|
+
if (outFormat === "epub") {
|
|
55
|
+
await mkdir(dirname(output), { recursive: true });
|
|
56
|
+
await copyFile(input, output);
|
|
57
|
+
}
|
|
38
58
|
const audiobookFiles = await readdir(audiobookDir).then(
|
|
39
59
|
(filenames) => filenames.filter((f) => isAudioFile(f)).map((f) => autoJoin(audiobookDir, f))
|
|
40
60
|
);
|
|
41
|
-
const epub = __using(
|
|
61
|
+
const epub = __using(_stack2, await Epub.from(outFormat === "epub" ? output : input));
|
|
42
62
|
const transcriptions = await readdir(transcriptionsDir).then(
|
|
43
63
|
(filenames) => filenames.filter((f) => f.endsWith(".json")).map((f) => autoJoin(transcriptionsDir, f))
|
|
44
64
|
).then(
|
|
@@ -70,7 +90,57 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
|
70
90
|
options.logger
|
|
71
91
|
);
|
|
72
92
|
const timing = await aligner.alignBook(options.onProgress);
|
|
73
|
-
|
|
93
|
+
if (outFormat === "epub") {
|
|
94
|
+
await epub.saveAndClose();
|
|
95
|
+
} else {
|
|
96
|
+
var _stack = [];
|
|
97
|
+
try {
|
|
98
|
+
const guidedNavigationDocuments = await generateGuidedNavigationDocuments(epub);
|
|
99
|
+
const manifest = generateGuidedNavigationManifest(
|
|
100
|
+
new LocalizedString(
|
|
101
|
+
await epub.getTitle() ?? basename(input, extname(input))
|
|
102
|
+
),
|
|
103
|
+
guidedNavigationDocuments
|
|
104
|
+
);
|
|
105
|
+
const tmpArchivePath = autoJoin(
|
|
106
|
+
tmpdir(),
|
|
107
|
+
`storyteller-platform-epub-${randomUUID()}`
|
|
108
|
+
);
|
|
109
|
+
const { promise, resolve } = Promise.withResolvers();
|
|
110
|
+
const zipfile = new ZipFile();
|
|
111
|
+
const writeStream = createWriteStream(tmpArchivePath);
|
|
112
|
+
writeStream.on("close", () => {
|
|
113
|
+
resolve();
|
|
114
|
+
});
|
|
115
|
+
const stack = __using(_stack, new AsyncDisposableStack(), true);
|
|
116
|
+
stack.defer(async () => {
|
|
117
|
+
writeStream.close();
|
|
118
|
+
await rm(tmpArchivePath, { force: true });
|
|
119
|
+
});
|
|
120
|
+
zipfile.outputStream.pipe(writeStream);
|
|
121
|
+
zipfile.addBuffer(
|
|
122
|
+
Buffer.from(JSON.stringify(manifest.serialize())),
|
|
123
|
+
"manifest.json"
|
|
124
|
+
);
|
|
125
|
+
for (const doc of guidedNavigationDocuments) {
|
|
126
|
+
const selfLink = doc.links?.findWithRel("self");
|
|
127
|
+
if (!selfLink) continue;
|
|
128
|
+
zipfile.addBuffer(
|
|
129
|
+
Buffer.from(JSON.stringify(doc.serialize())),
|
|
130
|
+
selfLink.href
|
|
131
|
+
);
|
|
132
|
+
}
|
|
133
|
+
zipfile.end();
|
|
134
|
+
await promise;
|
|
135
|
+
await cp(tmpArchivePath, output);
|
|
136
|
+
epub.discardAndClose();
|
|
137
|
+
} catch (_) {
|
|
138
|
+
var _error = _, _hasError = true;
|
|
139
|
+
} finally {
|
|
140
|
+
var _promise = __callDispose(_stack, _error, _hasError);
|
|
141
|
+
_promise && await _promise;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
74
144
|
if (options.reportsPath) {
|
|
75
145
|
await mkdir(autoDirname(options.reportsPath), { recursive: true });
|
|
76
146
|
await writeFile(
|
|
@@ -82,10 +152,10 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
|
82
152
|
);
|
|
83
153
|
}
|
|
84
154
|
return timing;
|
|
85
|
-
} catch (
|
|
86
|
-
var
|
|
155
|
+
} catch (_2) {
|
|
156
|
+
var _error2 = _2, _hasError2 = true;
|
|
87
157
|
} finally {
|
|
88
|
-
__callDispose(
|
|
158
|
+
__callDispose(_stack2, _error2, _hasError2);
|
|
89
159
|
}
|
|
90
160
|
}
|
|
91
161
|
class Aligner {
|
|
@@ -122,6 +192,7 @@ class Aligner {
|
|
|
122
192
|
const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
|
|
123
193
|
const { chapter, sentenceRanges, wordRanges, xml } = alignedChapter;
|
|
124
194
|
const sentences = await this.getChapterSentences(chapter.id);
|
|
195
|
+
let sentenceIdToBlockFragment = null;
|
|
125
196
|
const sentenceIdToFragment = new Map(
|
|
126
197
|
sentenceRanges.map((range) => [
|
|
127
198
|
range.id,
|
|
@@ -142,34 +213,67 @@ class Aligner {
|
|
|
142
213
|
);
|
|
143
214
|
const wordRangeMap = new Map(wordRanges.map((w) => [w[0].sentenceId, w]));
|
|
144
215
|
if (this.textRef === "text-fragment") {
|
|
145
|
-
|
|
146
|
-
|
|
216
|
+
sentenceIdToBlockFragment = /* @__PURE__ */ new Map();
|
|
217
|
+
const blocks = [[]];
|
|
218
|
+
for (const [i, sentence] of enumerate(sentences)) {
|
|
219
|
+
const text = sentence.text;
|
|
220
|
+
blocks.at(-1)?.push(text);
|
|
221
|
+
if (text.includes("\n") && i < sentences.length - 1) {
|
|
222
|
+
blocks.push([]);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
const blockFactory = new TextFragmentFactory(
|
|
226
|
+
blocks.map((block) => block.join("")),
|
|
147
227
|
locale
|
|
148
228
|
);
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
155
|
-
range.id
|
|
156
|
-
)
|
|
229
|
+
let sentenceRangeIndex = 0;
|
|
230
|
+
for (const [i, block] of enumerate(blocks)) {
|
|
231
|
+
sentenceIdToBlockFragment.set(
|
|
232
|
+
sentenceRangeIndex,
|
|
233
|
+
blockFactory.findMinimalFragment(i)
|
|
157
234
|
);
|
|
235
|
+
const sentenceFactory = new TextFragmentFactory(
|
|
236
|
+
block.map((s) => s.replace("\n", "")),
|
|
237
|
+
locale
|
|
238
|
+
);
|
|
239
|
+
const blockRanges = sentenceRanges.slice(
|
|
240
|
+
sentenceRangeIndex,
|
|
241
|
+
sentenceRangeIndex + block.length
|
|
242
|
+
);
|
|
243
|
+
for (const [j, range] of enumerate(blockRanges)) {
|
|
244
|
+
sentenceIdToFragment.set(
|
|
245
|
+
range.id,
|
|
246
|
+
sentenceFactory.findMinimalFragment(j)
|
|
247
|
+
);
|
|
248
|
+
}
|
|
158
249
|
if (this.granularity === "word") {
|
|
159
|
-
const
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
250
|
+
const wordFactory = new TextFragmentFactory(
|
|
251
|
+
blockRanges.flatMap((range) => {
|
|
252
|
+
const sentence = sentences[range.id];
|
|
253
|
+
const wordRanges2 = wordRangeMap.get(range.id);
|
|
254
|
+
const toFragment = wordIdToFragment.get(range.id);
|
|
255
|
+
if (!wordRanges2 || !toFragment) return [];
|
|
256
|
+
const words = sentence.words.entries.filter(
|
|
257
|
+
(w) => w.text.match(/\S/)
|
|
258
|
+
);
|
|
259
|
+
return words.map((w) => w.text.replace("\n", ""));
|
|
260
|
+
})
|
|
165
261
|
);
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
);
|
|
262
|
+
let wordRangeIndex = 0;
|
|
263
|
+
for (const range of blockRanges) {
|
|
264
|
+
const wordRanges2 = wordRangeMap.get(range.id);
|
|
265
|
+
const toFragment = wordIdToFragment.get(range.id);
|
|
266
|
+
if (!wordRanges2 || !toFragment) continue;
|
|
267
|
+
for (const [k, wordRange] of enumerate(wordRanges2)) {
|
|
268
|
+
toFragment.set(
|
|
269
|
+
wordRange.id,
|
|
270
|
+
wordFactory.findMinimalFragment(k + wordRangeIndex)
|
|
271
|
+
);
|
|
272
|
+
}
|
|
273
|
+
wordRangeIndex += wordRanges2.length;
|
|
171
274
|
}
|
|
172
275
|
}
|
|
276
|
+
sentenceRangeIndex += block.length;
|
|
173
277
|
}
|
|
174
278
|
}
|
|
175
279
|
const audiofiles = Array.from(
|
|
@@ -209,6 +313,7 @@ class Aligner {
|
|
|
209
313
|
this.granularity,
|
|
210
314
|
sentenceRanges,
|
|
211
315
|
wordRangeMap,
|
|
316
|
+
sentenceIdToBlockFragment,
|
|
212
317
|
sentenceIdToFragment,
|
|
213
318
|
wordIdToFragment
|
|
214
319
|
),
|
|
@@ -507,7 +612,23 @@ class Aligner {
|
|
|
507
612
|
return this.timing;
|
|
508
613
|
}
|
|
509
614
|
}
|
|
510
|
-
function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
|
|
615
|
+
function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToBlockFragment, sentenceIdToFragment, wordIdToFragment) {
|
|
616
|
+
const subSequences = sentenceIdToBlockFragment ? createTextRangeLargeSequences(
|
|
617
|
+
chapter,
|
|
618
|
+
granularity,
|
|
619
|
+
sentenceRanges,
|
|
620
|
+
wordRanges,
|
|
621
|
+
sentenceIdToBlockFragment,
|
|
622
|
+
sentenceIdToFragment,
|
|
623
|
+
wordIdToFragment
|
|
624
|
+
) : createTextRangeSmallSequences(
|
|
625
|
+
chapter,
|
|
626
|
+
granularity,
|
|
627
|
+
sentenceRanges,
|
|
628
|
+
wordRanges,
|
|
629
|
+
sentenceIdToFragment,
|
|
630
|
+
wordIdToFragment
|
|
631
|
+
);
|
|
511
632
|
return [
|
|
512
633
|
Epub.createXmlElement(
|
|
513
634
|
"smil",
|
|
@@ -525,60 +646,89 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, se
|
|
|
525
646
|
"epub:textref": `../${chapter.href}`,
|
|
526
647
|
"epub:type": "chapter"
|
|
527
648
|
},
|
|
528
|
-
|
|
529
|
-
if (granularity === "sentence" || !wordRanges.has(sentenceRange.id)) {
|
|
530
|
-
return Epub.createXmlElement(
|
|
531
|
-
"par",
|
|
532
|
-
{
|
|
533
|
-
id: `${chapter.id}-s${sentenceRange.id}`
|
|
534
|
-
},
|
|
535
|
-
[
|
|
536
|
-
Epub.createXmlElement("text", {
|
|
537
|
-
src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
538
|
-
}),
|
|
539
|
-
Epub.createXmlElement("audio", {
|
|
540
|
-
src: `../Audio/${basename(sentenceRange.audiofile)}`,
|
|
541
|
-
clipBegin: `${sentenceRange.start.toFixed(3)}s`,
|
|
542
|
-
clipEnd: `${sentenceRange.end.toFixed(3)}s`
|
|
543
|
-
})
|
|
544
|
-
]
|
|
545
|
-
);
|
|
546
|
-
}
|
|
547
|
-
const words = wordRanges.get(sentenceRange.id);
|
|
548
|
-
const wordToFragment = wordIdToFragment.get(sentenceRange.id);
|
|
549
|
-
return Epub.createXmlElement(
|
|
550
|
-
"seq",
|
|
551
|
-
{
|
|
552
|
-
id: `${chapter.id}-s${sentenceRange.id}`,
|
|
553
|
-
"epub:type": "text-range-small",
|
|
554
|
-
"epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
555
|
-
},
|
|
556
|
-
words.map(
|
|
557
|
-
(word) => Epub.createXmlElement(
|
|
558
|
-
"par",
|
|
559
|
-
{
|
|
560
|
-
id: `${chapter.id}-s${sentenceRange.id}-w${word.id}`
|
|
561
|
-
},
|
|
562
|
-
[
|
|
563
|
-
Epub.createXmlElement("text", {
|
|
564
|
-
src: `../${chapter.href}#${wordToFragment.get(word.id)}`
|
|
565
|
-
}),
|
|
566
|
-
Epub.createXmlElement("audio", {
|
|
567
|
-
src: `../Audio/${basename(word.audiofile)}`,
|
|
568
|
-
clipBegin: `${word.start.toFixed(3)}s`,
|
|
569
|
-
clipEnd: `${word.end.toFixed(3)}s`
|
|
570
|
-
})
|
|
571
|
-
]
|
|
572
|
-
)
|
|
573
|
-
)
|
|
574
|
-
);
|
|
575
|
-
})
|
|
649
|
+
subSequences
|
|
576
650
|
)
|
|
577
651
|
])
|
|
578
652
|
]
|
|
579
653
|
)
|
|
580
654
|
];
|
|
581
655
|
}
|
|
656
|
+
function createTextRangeLargeSequences(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToBlockFragment, sentenceIdToFragment, wordIdToFragment) {
|
|
657
|
+
const blockStarts = sentenceIdToBlockFragment.entries().toArray().toSorted(([a], [b]) => a - b);
|
|
658
|
+
return blockStarts.map(([sentenceId, fragment], index) => {
|
|
659
|
+
const blockEnd = index === blockStarts.length - 1 ? sentenceRanges.length - 1 : (
|
|
660
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
661
|
+
blockStarts[index + 1][0]
|
|
662
|
+
);
|
|
663
|
+
const sentences = sentenceRanges.slice(sentenceId, blockEnd);
|
|
664
|
+
return Epub.createXmlElement(
|
|
665
|
+
"seq",
|
|
666
|
+
{
|
|
667
|
+
id: `${chapter.id}-b${index}`,
|
|
668
|
+
"epub:type": "text-range-large",
|
|
669
|
+
"epub:textref": `../${chapter.href}#${fragment}`
|
|
670
|
+
},
|
|
671
|
+
createTextRangeSmallSequences(
|
|
672
|
+
chapter,
|
|
673
|
+
granularity,
|
|
674
|
+
sentences,
|
|
675
|
+
wordRanges,
|
|
676
|
+
sentenceIdToFragment,
|
|
677
|
+
wordIdToFragment
|
|
678
|
+
)
|
|
679
|
+
);
|
|
680
|
+
});
|
|
681
|
+
}
|
|
682
|
+
function createTextRangeSmallSequences(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
|
|
683
|
+
return sentenceRanges.map((sentenceRange) => {
|
|
684
|
+
if (granularity === "sentence" || !wordRanges.has(sentenceRange.id)) {
|
|
685
|
+
return Epub.createXmlElement(
|
|
686
|
+
"par",
|
|
687
|
+
{
|
|
688
|
+
id: `${chapter.id}-s${sentenceRange.id}`
|
|
689
|
+
},
|
|
690
|
+
[
|
|
691
|
+
Epub.createXmlElement("text", {
|
|
692
|
+
src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
693
|
+
}),
|
|
694
|
+
Epub.createXmlElement("audio", {
|
|
695
|
+
src: `../Audio/${basename(sentenceRange.audiofile)}`,
|
|
696
|
+
clipBegin: `${sentenceRange.start.toFixed(3)}s`,
|
|
697
|
+
clipEnd: `${sentenceRange.end.toFixed(3)}s`
|
|
698
|
+
})
|
|
699
|
+
]
|
|
700
|
+
);
|
|
701
|
+
}
|
|
702
|
+
const words = wordRanges.get(sentenceRange.id);
|
|
703
|
+
const wordToFragment = wordIdToFragment.get(sentenceRange.id);
|
|
704
|
+
return Epub.createXmlElement(
|
|
705
|
+
"seq",
|
|
706
|
+
{
|
|
707
|
+
id: `${chapter.id}-s${sentenceRange.id}`,
|
|
708
|
+
"epub:type": "text-range-small",
|
|
709
|
+
"epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
710
|
+
},
|
|
711
|
+
words.map(
|
|
712
|
+
(word) => Epub.createXmlElement(
|
|
713
|
+
"par",
|
|
714
|
+
{
|
|
715
|
+
id: `${chapter.id}-s${sentenceRange.id}-w${word.id}`
|
|
716
|
+
},
|
|
717
|
+
[
|
|
718
|
+
Epub.createXmlElement("text", {
|
|
719
|
+
src: `../${chapter.href}#${wordToFragment.get(word.id)}`
|
|
720
|
+
}),
|
|
721
|
+
Epub.createXmlElement("audio", {
|
|
722
|
+
src: `../Audio/${basename(word.audiofile)}`,
|
|
723
|
+
clipBegin: `${word.start.toFixed(3)}s`,
|
|
724
|
+
clipEnd: `${word.end.toFixed(3)}s`
|
|
725
|
+
})
|
|
726
|
+
]
|
|
727
|
+
)
|
|
728
|
+
)
|
|
729
|
+
);
|
|
730
|
+
});
|
|
731
|
+
}
|
|
582
732
|
function concatTranscriptions(transcriptions, audiofiles) {
|
|
583
733
|
return transcriptions.reduce(
|
|
584
734
|
(acc, transcription, index) => ({
|
|
@@ -312,7 +312,10 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
|
|
|
312
312
|
wordRanges,
|
|
313
313
|
transcriptionOffset: chapterTranscriptEndIndex,
|
|
314
314
|
firstFoundSentence,
|
|
315
|
-
lastFoundSentence:
|
|
315
|
+
lastFoundSentence: Math.max(
|
|
316
|
+
firstFoundSentence,
|
|
317
|
+
chapterSentenceIndex - 1
|
|
318
|
+
)
|
|
316
319
|
};
|
|
317
320
|
}
|
|
318
321
|
chapterSentenceIndex += lastGoodSentenceIndex + 1;
|
|
@@ -285,7 +285,10 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
|
|
|
285
285
|
wordRanges,
|
|
286
286
|
transcriptionOffset: chapterTranscriptEndIndex,
|
|
287
287
|
firstFoundSentence,
|
|
288
|
-
lastFoundSentence:
|
|
288
|
+
lastFoundSentence: Math.max(
|
|
289
|
+
firstFoundSentence,
|
|
290
|
+
chapterSentenceIndex - 1
|
|
291
|
+
)
|
|
289
292
|
};
|
|
290
293
|
}
|
|
291
294
|
chapterSentenceIndex += lastGoodSentenceIndex + 1;
|
package/dist/align/parse.cjs
CHANGED
|
@@ -36,11 +36,17 @@ const alignParser = (0, import_core.object)("Alignment", {
|
|
|
36
36
|
),
|
|
37
37
|
textRef: (0, import_core.withDefault)(
|
|
38
38
|
(0, import_core.option)("--text-ref", (0, import_core.choice)(["id-fragment", "text-fragment"]), {
|
|
39
|
-
description: import_core.message`Whether to use text fragments
|
|
39
|
+
description: import_core.message`Whether to use text fragments or element id fragments to identify text ranges in generated media overlays.`
|
|
40
40
|
}),
|
|
41
41
|
"id-fragment"
|
|
42
42
|
),
|
|
43
|
-
reports: (0, import_core.optional)((0, import_core.option)("--reports", (0, import_valueparser.path)({ type: "directory" })))
|
|
43
|
+
reports: (0, import_core.optional)((0, import_core.option)("--reports", (0, import_valueparser.path)({ type: "directory" }))),
|
|
44
|
+
outFormat: (0, import_core.withDefault)(
|
|
45
|
+
(0, import_core.option)("--out-format", (0, import_core.choice)(["epub", "gnp"]), {
|
|
46
|
+
description: import_core.message`Whether to output a full EPUB 3 package with embedded media overlays and audio, or a Readium Guided Navigation Package with just a manifest and guided navigation documents.`
|
|
47
|
+
}),
|
|
48
|
+
"epub"
|
|
49
|
+
)
|
|
44
50
|
});
|
|
45
51
|
const alignCommand = (0, import_core.command)(
|
|
46
52
|
"align",
|
|
@@ -51,7 +57,12 @@ const alignCommand = (0, import_core.command)(
|
|
|
51
57
|
"--transcriptions",
|
|
52
58
|
(0, import_valueparser.path)({ mustExist: true, type: "directory" })
|
|
53
59
|
),
|
|
54
|
-
output: (0, import_core.option)(
|
|
60
|
+
output: (0, import_core.option)(
|
|
61
|
+
"--output",
|
|
62
|
+
// TODO: I think it should be possible to pick the correct
|
|
63
|
+
// extension based on the output format
|
|
64
|
+
(0, import_valueparser.path)({ type: "file", extensions: [".epub", ".gnp"] })
|
|
65
|
+
)
|
|
55
66
|
}),
|
|
56
67
|
alignParser,
|
|
57
68
|
import_parse.loggingParser,
|
package/dist/align/parse.d.cts
CHANGED
|
@@ -5,11 +5,13 @@ declare const alignParser: _optique_core.Parser<"sync", {
|
|
|
5
5
|
readonly epub: string;
|
|
6
6
|
readonly textRef: "id-fragment" | "text-fragment";
|
|
7
7
|
readonly reports: string | undefined;
|
|
8
|
+
readonly outFormat: "epub" | "gnp";
|
|
8
9
|
}, {
|
|
9
10
|
readonly audiobook: _optique_core.ValueParserResult<string> | undefined;
|
|
10
11
|
readonly epub: _optique_core.ValueParserResult<string> | undefined;
|
|
11
12
|
readonly textRef: [_optique_core.ValueParserResult<"id-fragment" | "text-fragment"> | undefined] | undefined;
|
|
12
13
|
readonly reports: [_optique_core.ValueParserResult<string> | undefined] | undefined;
|
|
14
|
+
readonly outFormat: [_optique_core.ValueParserResult<"epub" | "gnp"> | undefined] | undefined;
|
|
13
15
|
}>;
|
|
14
16
|
declare const alignCommand: _optique_core.Parser<"sync", {
|
|
15
17
|
readonly action: "align";
|
|
@@ -20,6 +22,7 @@ declare const alignCommand: _optique_core.Parser<"sync", {
|
|
|
20
22
|
readonly epub: string;
|
|
21
23
|
readonly textRef: "id-fragment" | "text-fragment";
|
|
22
24
|
readonly reports: string | undefined;
|
|
25
|
+
readonly outFormat: "epub" | "gnp";
|
|
23
26
|
} & {
|
|
24
27
|
readonly noProgress: boolean;
|
|
25
28
|
readonly logLevel: "silent" | "debug" | "info" | "warn" | "error";
|
package/dist/align/parse.d.ts
CHANGED
|
@@ -5,11 +5,13 @@ declare const alignParser: _optique_core.Parser<"sync", {
|
|
|
5
5
|
readonly epub: string;
|
|
6
6
|
readonly textRef: "id-fragment" | "text-fragment";
|
|
7
7
|
readonly reports: string | undefined;
|
|
8
|
+
readonly outFormat: "epub" | "gnp";
|
|
8
9
|
}, {
|
|
9
10
|
readonly audiobook: _optique_core.ValueParserResult<string> | undefined;
|
|
10
11
|
readonly epub: _optique_core.ValueParserResult<string> | undefined;
|
|
11
12
|
readonly textRef: [_optique_core.ValueParserResult<"id-fragment" | "text-fragment"> | undefined] | undefined;
|
|
12
13
|
readonly reports: [_optique_core.ValueParserResult<string> | undefined] | undefined;
|
|
14
|
+
readonly outFormat: [_optique_core.ValueParserResult<"epub" | "gnp"> | undefined] | undefined;
|
|
13
15
|
}>;
|
|
14
16
|
declare const alignCommand: _optique_core.Parser<"sync", {
|
|
15
17
|
readonly action: "align";
|
|
@@ -20,6 +22,7 @@ declare const alignCommand: _optique_core.Parser<"sync", {
|
|
|
20
22
|
readonly epub: string;
|
|
21
23
|
readonly textRef: "id-fragment" | "text-fragment";
|
|
22
24
|
readonly reports: string | undefined;
|
|
25
|
+
readonly outFormat: "epub" | "gnp";
|
|
23
26
|
} & {
|
|
24
27
|
readonly noProgress: boolean;
|
|
25
28
|
readonly logLevel: "silent" | "debug" | "info" | "warn" | "error";
|
package/dist/align/parse.js
CHANGED
|
@@ -27,11 +27,17 @@ const alignParser = object("Alignment", {
|
|
|
27
27
|
),
|
|
28
28
|
textRef: withDefault(
|
|
29
29
|
option("--text-ref", choice(["id-fragment", "text-fragment"]), {
|
|
30
|
-
description: message`Whether to use text fragments
|
|
30
|
+
description: message`Whether to use text fragments or element id fragments to identify text ranges in generated media overlays.`
|
|
31
31
|
}),
|
|
32
32
|
"id-fragment"
|
|
33
33
|
),
|
|
34
|
-
reports: optional(option("--reports", path({ type: "directory" })))
|
|
34
|
+
reports: optional(option("--reports", path({ type: "directory" }))),
|
|
35
|
+
outFormat: withDefault(
|
|
36
|
+
option("--out-format", choice(["epub", "gnp"]), {
|
|
37
|
+
description: message`Whether to output a full EPUB 3 package with embedded media overlays and audio, or a Readium Guided Navigation Package with just a manifest and guided navigation documents.`
|
|
38
|
+
}),
|
|
39
|
+
"epub"
|
|
40
|
+
)
|
|
35
41
|
});
|
|
36
42
|
const alignCommand = command(
|
|
37
43
|
"align",
|
|
@@ -42,7 +48,12 @@ const alignCommand = command(
|
|
|
42
48
|
"--transcriptions",
|
|
43
49
|
path({ mustExist: true, type: "directory" })
|
|
44
50
|
),
|
|
45
|
-
output: option(
|
|
51
|
+
output: option(
|
|
52
|
+
"--output",
|
|
53
|
+
// TODO: I think it should be possible to pick the correct
|
|
54
|
+
// extension based on the output format
|
|
55
|
+
path({ type: "file", extensions: [".epub", ".gnp"] })
|
|
56
|
+
)
|
|
46
57
|
}),
|
|
47
58
|
alignParser,
|
|
48
59
|
loggingParser,
|