@storyteller-platform/align 0.1.20 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/align.cjs +81 -15
- package/dist/align/align.d.cts +4 -2
- package/dist/align/align.d.ts +4 -2
- package/dist/align/align.js +82 -16
- package/dist/align/getSentenceRanges.cjs +1 -0
- package/dist/align/getSentenceRanges.d.cts +1 -0
- package/dist/align/getSentenceRanges.d.ts +1 -0
- package/dist/align/getSentenceRanges.js +1 -0
- package/dist/align/parse.cjs +6 -0
- package/dist/align/parse.d.cts +3 -0
- package/dist/align/parse.d.ts +3 -0
- package/dist/align/parse.js +9 -1
- package/dist/align/textFragments.cjs +147 -0
- package/dist/align/textFragments.d.cts +23 -0
- package/dist/align/textFragments.d.ts +23 -0
- package/dist/align/textFragments.js +124 -0
- package/dist/cli/bin.cjs +38 -24
- package/dist/cli/bin.js +35 -21
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/snapshot/parse.cjs +61 -0
- package/dist/snapshot/parse.d.cts +24 -0
- package/dist/snapshot/parse.d.ts +24 -0
- package/dist/snapshot/parse.js +45 -0
- package/dist/snapshot/snapshot.cjs +224 -0
- package/dist/snapshot/snapshot.d.cts +6 -0
- package/dist/snapshot/snapshot.d.ts +6 -0
- package/dist/snapshot/snapshot.js +161 -0
- package/dist/transcribe/parse.cjs +2 -2
- package/dist/transcribe/parse.js +1 -1
- package/dist/transcribe/transcribe.cjs +2 -0
- package/dist/transcribe/transcribe.d.cts +2 -1
- package/dist/transcribe/transcribe.d.ts +2 -1
- package/dist/transcribe/transcribe.js +2 -0
- package/package.json +3 -3
package/dist/align/align.cjs
CHANGED
|
@@ -91,6 +91,7 @@ var import_segmentation = require("../markup/segmentation.cjs");
|
|
|
91
91
|
var import_getSentenceRanges = require("./getSentenceRanges.cjs");
|
|
92
92
|
var import_search = require("./search.cjs");
|
|
93
93
|
var import_slugify = require("./slugify.cjs");
|
|
94
|
+
var import_textFragments = require("./textFragments.cjs");
|
|
94
95
|
async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
95
96
|
var _stack = [];
|
|
96
97
|
try {
|
|
@@ -126,6 +127,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
|
126
127
|
audiobookFiles,
|
|
127
128
|
transcriptions,
|
|
128
129
|
options.granularity,
|
|
130
|
+
options.textRef,
|
|
129
131
|
options.primaryLocale,
|
|
130
132
|
options.logger
|
|
131
133
|
);
|
|
@@ -149,7 +151,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
|
149
151
|
}
|
|
150
152
|
}
|
|
151
153
|
class Aligner {
|
|
152
|
-
constructor(epub, audiofiles, transcriptions, granularity, languageOverride, logger) {
|
|
154
|
+
constructor(epub, audiofiles, transcriptions, granularity, textRef, languageOverride, logger) {
|
|
153
155
|
this.epub = epub;
|
|
154
156
|
this.audiofiles = audiofiles;
|
|
155
157
|
this.languageOverride = languageOverride;
|
|
@@ -157,12 +159,14 @@ class Aligner {
|
|
|
157
159
|
this.transcription = concatTranscriptions(transcriptions, audiofiles);
|
|
158
160
|
this.getChapterSentences = (0, import_memoize.default)(this.getChapterSentences.bind(this));
|
|
159
161
|
this.granularity = granularity ?? "sentence";
|
|
162
|
+
this.textRef = textRef ?? "id-fragment";
|
|
160
163
|
}
|
|
161
164
|
transcription;
|
|
162
165
|
totalDuration = 0;
|
|
163
166
|
alignedChapters = [];
|
|
164
167
|
timing = (0, import_ghost_story.createAggregator)();
|
|
165
168
|
granularity;
|
|
169
|
+
textRef;
|
|
166
170
|
report = {
|
|
167
171
|
chapters: []
|
|
168
172
|
};
|
|
@@ -177,8 +181,59 @@ class Aligner {
|
|
|
177
181
|
return segmentation.filter((s) => s.text.match(/\S/));
|
|
178
182
|
}
|
|
179
183
|
async writeAlignedChapter(alignedChapter) {
|
|
184
|
+
const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
|
|
180
185
|
const { chapter, sentenceRanges, wordRanges, xml } = alignedChapter;
|
|
186
|
+
const sentences = await this.getChapterSentences(chapter.id);
|
|
187
|
+
const sentenceIdToFragment = new Map(
|
|
188
|
+
sentenceRanges.map((range) => [
|
|
189
|
+
range.id,
|
|
190
|
+
`${range.chapterId}-s${range.id}`
|
|
191
|
+
])
|
|
192
|
+
);
|
|
193
|
+
const wordIdToFragment = new Map(
|
|
194
|
+
wordRanges.map((ranges) => [
|
|
195
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
196
|
+
ranges[0].sentenceId,
|
|
197
|
+
new Map(
|
|
198
|
+
ranges.map((range) => [
|
|
199
|
+
range.id,
|
|
200
|
+
`${range.chapterId}-s${range.sentenceId}-w${range.id}`
|
|
201
|
+
])
|
|
202
|
+
)
|
|
203
|
+
])
|
|
204
|
+
);
|
|
181
205
|
const wordRangeMap = new Map(wordRanges.map((w) => [w[0].sentenceId, w]));
|
|
206
|
+
if (this.textRef === "text-fragment") {
|
|
207
|
+
const trie = new import_textFragments.TextFragmentTrie(
|
|
208
|
+
sentences.map((s) => s.text.replace("\n", " ")),
|
|
209
|
+
locale
|
|
210
|
+
);
|
|
211
|
+
for (const range of sentenceRanges) {
|
|
212
|
+
const sentence = sentences[range.id];
|
|
213
|
+
sentenceIdToFragment.set(
|
|
214
|
+
range.id,
|
|
215
|
+
trie.findMinimalFragment(
|
|
216
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
217
|
+
range.id
|
|
218
|
+
)
|
|
219
|
+
);
|
|
220
|
+
if (this.granularity === "word") {
|
|
221
|
+
const wordRanges2 = wordRangeMap.get(range.id);
|
|
222
|
+
const toFragment = wordIdToFragment.get(range.id);
|
|
223
|
+
const words = sentence.words.entries.filter((w) => w.text.match(/\S/));
|
|
224
|
+
const wordTrie = new import_textFragments.TextFragmentTrie(
|
|
225
|
+
words.map((w) => w.text.replace("\n", " ")),
|
|
226
|
+
locale
|
|
227
|
+
);
|
|
228
|
+
for (const wordRange of wordRanges2) {
|
|
229
|
+
toFragment.set(
|
|
230
|
+
wordRange.id,
|
|
231
|
+
wordTrie.findMinimalFragment(wordRange.id)
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
182
237
|
const audiofiles = Array.from(
|
|
183
238
|
new Set(sentenceRanges.map(({ audiofile }) => audiofile))
|
|
184
239
|
);
|
|
@@ -215,7 +270,9 @@ class Aligner {
|
|
|
215
270
|
chapter,
|
|
216
271
|
this.granularity,
|
|
217
272
|
sentenceRanges,
|
|
218
|
-
wordRangeMap
|
|
273
|
+
wordRangeMap,
|
|
274
|
+
sentenceIdToFragment,
|
|
275
|
+
wordIdToFragment
|
|
219
276
|
),
|
|
220
277
|
"xml"
|
|
221
278
|
);
|
|
@@ -348,16 +405,24 @@ class Aligner {
|
|
|
348
405
|
};
|
|
349
406
|
}
|
|
350
407
|
narrowToAvailableBoundary(boundary) {
|
|
351
|
-
const
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
408
|
+
const available = [
|
|
409
|
+
-1,
|
|
410
|
+
...this.alignedChapters.toSorted((a, b) => a.startOffset - b.startOffset).flatMap(({ startOffset, endOffset }) => [startOffset, endOffset]),
|
|
411
|
+
Infinity
|
|
412
|
+
];
|
|
413
|
+
const withinBoundary = [];
|
|
414
|
+
for (let i = 0; i < available.length - 1; i += 2) {
|
|
415
|
+
const [start, end] = [available[i], available[i + 1]];
|
|
416
|
+
if (boundary.start <= start && boundary.end >= start || boundary.start <= end && boundary.end >= end) {
|
|
417
|
+
withinBoundary.push([
|
|
418
|
+
Math.max(boundary.start, start + 1),
|
|
419
|
+
Math.min(boundary.end, end - 1)
|
|
420
|
+
]);
|
|
358
421
|
}
|
|
359
422
|
}
|
|
360
|
-
|
|
423
|
+
const largestBoundary = (0, import_itertools.max)(withinBoundary, ([start, end]) => end - start);
|
|
424
|
+
if (!largestBoundary) return { start: boundary.start, end: boundary.end };
|
|
425
|
+
return { start: largestBoundary[0], end: largestBoundary[1] };
|
|
361
426
|
}
|
|
362
427
|
async alignBook(onProgress) {
|
|
363
428
|
const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
|
|
@@ -467,7 +532,7 @@ class Aligner {
|
|
|
467
532
|
alignedChapter.wordRanges[i] = (0, import_getSentenceRanges.expandEmptySentenceRanges)(wordRanges);
|
|
468
533
|
}
|
|
469
534
|
await this.writeAlignedChapter(alignedChapter);
|
|
470
|
-
collapsedStart += sentences.length
|
|
535
|
+
collapsedStart += sentences.length;
|
|
471
536
|
}
|
|
472
537
|
await this.epub.addMetadata({
|
|
473
538
|
type: "meta",
|
|
@@ -495,7 +560,7 @@ class Aligner {
|
|
|
495
560
|
return this.timing;
|
|
496
561
|
}
|
|
497
562
|
}
|
|
498
|
-
function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
563
|
+
function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
|
|
499
564
|
return [
|
|
500
565
|
import_epub.Epub.createXmlElement(
|
|
501
566
|
"smil",
|
|
@@ -522,7 +587,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
522
587
|
},
|
|
523
588
|
[
|
|
524
589
|
import_epub.Epub.createXmlElement("text", {
|
|
525
|
-
src: `../${chapter.href}#${
|
|
590
|
+
src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
526
591
|
}),
|
|
527
592
|
import_epub.Epub.createXmlElement("audio", {
|
|
528
593
|
src: `../Audio/${(0, import_posix.basename)(sentenceRange.audiofile)}`,
|
|
@@ -533,12 +598,13 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
533
598
|
);
|
|
534
599
|
}
|
|
535
600
|
const words = wordRanges.get(sentenceRange.id);
|
|
601
|
+
const wordToFragment = wordIdToFragment.get(sentenceRange.id);
|
|
536
602
|
return import_epub.Epub.createXmlElement(
|
|
537
603
|
"seq",
|
|
538
604
|
{
|
|
539
605
|
id: `${chapter.id}-s${sentenceRange.id}`,
|
|
540
606
|
"epub:type": "text-range-small",
|
|
541
|
-
"epub:textref": `../${chapter.href}#${
|
|
607
|
+
"epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
542
608
|
},
|
|
543
609
|
words.map(
|
|
544
610
|
(word) => import_epub.Epub.createXmlElement(
|
|
@@ -548,7 +614,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
548
614
|
},
|
|
549
615
|
[
|
|
550
616
|
import_epub.Epub.createXmlElement("text", {
|
|
551
|
-
src: `../${chapter.href}#${
|
|
617
|
+
src: `../${chapter.href}#${wordToFragment.get(word.id)}`
|
|
552
618
|
}),
|
|
553
619
|
import_epub.Epub.createXmlElement("audio", {
|
|
554
620
|
src: `../Audio/${(0, import_posix.basename)(word.audiofile)}`,
|
package/dist/align/align.d.cts
CHANGED
|
@@ -39,7 +39,8 @@ interface Report {
|
|
|
39
39
|
}
|
|
40
40
|
interface AlignOptions {
|
|
41
41
|
reportsPath?: string | null | undefined;
|
|
42
|
-
granularity
|
|
42
|
+
granularity?: "sentence" | "word" | null | undefined;
|
|
43
|
+
textRef?: "id-fragment" | "text-fragment" | null | undefined;
|
|
43
44
|
primaryLocale?: Intl.Locale | null | undefined;
|
|
44
45
|
logger?: Logger | null | undefined;
|
|
45
46
|
onProgress?: ((progress: number) => void) | null | undefined;
|
|
@@ -55,8 +56,9 @@ declare class Aligner {
|
|
|
55
56
|
private alignedChapters;
|
|
56
57
|
private timing;
|
|
57
58
|
private granularity;
|
|
59
|
+
private textRef;
|
|
58
60
|
report: Report;
|
|
59
|
-
constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
|
|
61
|
+
constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, textRef: "id-fragment" | "text-fragment" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
|
|
60
62
|
private getChapterSentences;
|
|
61
63
|
private writeAlignedChapter;
|
|
62
64
|
private addChapterReport;
|
package/dist/align/align.d.ts
CHANGED
|
@@ -39,7 +39,8 @@ interface Report {
|
|
|
39
39
|
}
|
|
40
40
|
interface AlignOptions {
|
|
41
41
|
reportsPath?: string | null | undefined;
|
|
42
|
-
granularity
|
|
42
|
+
granularity?: "sentence" | "word" | null | undefined;
|
|
43
|
+
textRef?: "id-fragment" | "text-fragment" | null | undefined;
|
|
43
44
|
primaryLocale?: Intl.Locale | null | undefined;
|
|
44
45
|
logger?: Logger | null | undefined;
|
|
45
46
|
onProgress?: ((progress: number) => void) | null | undefined;
|
|
@@ -55,8 +56,9 @@ declare class Aligner {
|
|
|
55
56
|
private alignedChapters;
|
|
56
57
|
private timing;
|
|
57
58
|
private granularity;
|
|
59
|
+
private textRef;
|
|
58
60
|
report: Report;
|
|
59
|
-
constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
|
|
61
|
+
constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, textRef: "id-fragment" | "text-fragment" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
|
|
60
62
|
private getChapterSentences;
|
|
61
63
|
private writeAlignedChapter;
|
|
62
64
|
private addChapterReport;
|
package/dist/align/align.js
CHANGED
|
@@ -5,7 +5,7 @@ import {
|
|
|
5
5
|
import { copyFile, mkdir, readFile, readdir, writeFile } from "node:fs/promises";
|
|
6
6
|
import { dirname as autoDirname, join as autoJoin } from "node:path";
|
|
7
7
|
import { basename, dirname, parse, relative } from "node:path/posix";
|
|
8
|
-
import { enumerate } from "itertools";
|
|
8
|
+
import { enumerate, max } from "itertools";
|
|
9
9
|
import memoize from "memoize";
|
|
10
10
|
import { isAudioFile, lookupAudioMime } from "@storyteller-platform/audiobook";
|
|
11
11
|
import {
|
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
} from "./getSentenceRanges.js";
|
|
28
28
|
import { findBoundaries } from "./search.js";
|
|
29
29
|
import { slugify } from "./slugify.js";
|
|
30
|
+
import { TextFragmentTrie } from "./textFragments.js";
|
|
30
31
|
async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
31
32
|
var _stack = [];
|
|
32
33
|
try {
|
|
@@ -62,6 +63,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
|
62
63
|
audiobookFiles,
|
|
63
64
|
transcriptions,
|
|
64
65
|
options.granularity,
|
|
66
|
+
options.textRef,
|
|
65
67
|
options.primaryLocale,
|
|
66
68
|
options.logger
|
|
67
69
|
);
|
|
@@ -85,7 +87,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
|
85
87
|
}
|
|
86
88
|
}
|
|
87
89
|
class Aligner {
|
|
88
|
-
constructor(epub, audiofiles, transcriptions, granularity, languageOverride, logger) {
|
|
90
|
+
constructor(epub, audiofiles, transcriptions, granularity, textRef, languageOverride, logger) {
|
|
89
91
|
this.epub = epub;
|
|
90
92
|
this.audiofiles = audiofiles;
|
|
91
93
|
this.languageOverride = languageOverride;
|
|
@@ -93,12 +95,14 @@ class Aligner {
|
|
|
93
95
|
this.transcription = concatTranscriptions(transcriptions, audiofiles);
|
|
94
96
|
this.getChapterSentences = memoize(this.getChapterSentences.bind(this));
|
|
95
97
|
this.granularity = granularity ?? "sentence";
|
|
98
|
+
this.textRef = textRef ?? "id-fragment";
|
|
96
99
|
}
|
|
97
100
|
transcription;
|
|
98
101
|
totalDuration = 0;
|
|
99
102
|
alignedChapters = [];
|
|
100
103
|
timing = createAggregator();
|
|
101
104
|
granularity;
|
|
105
|
+
textRef;
|
|
102
106
|
report = {
|
|
103
107
|
chapters: []
|
|
104
108
|
};
|
|
@@ -113,8 +117,59 @@ class Aligner {
|
|
|
113
117
|
return segmentation.filter((s) => s.text.match(/\S/));
|
|
114
118
|
}
|
|
115
119
|
async writeAlignedChapter(alignedChapter) {
|
|
120
|
+
const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
|
|
116
121
|
const { chapter, sentenceRanges, wordRanges, xml } = alignedChapter;
|
|
122
|
+
const sentences = await this.getChapterSentences(chapter.id);
|
|
123
|
+
const sentenceIdToFragment = new Map(
|
|
124
|
+
sentenceRanges.map((range) => [
|
|
125
|
+
range.id,
|
|
126
|
+
`${range.chapterId}-s${range.id}`
|
|
127
|
+
])
|
|
128
|
+
);
|
|
129
|
+
const wordIdToFragment = new Map(
|
|
130
|
+
wordRanges.map((ranges) => [
|
|
131
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
132
|
+
ranges[0].sentenceId,
|
|
133
|
+
new Map(
|
|
134
|
+
ranges.map((range) => [
|
|
135
|
+
range.id,
|
|
136
|
+
`${range.chapterId}-s${range.sentenceId}-w${range.id}`
|
|
137
|
+
])
|
|
138
|
+
)
|
|
139
|
+
])
|
|
140
|
+
);
|
|
117
141
|
const wordRangeMap = new Map(wordRanges.map((w) => [w[0].sentenceId, w]));
|
|
142
|
+
if (this.textRef === "text-fragment") {
|
|
143
|
+
const trie = new TextFragmentTrie(
|
|
144
|
+
sentences.map((s) => s.text.replace("\n", " ")),
|
|
145
|
+
locale
|
|
146
|
+
);
|
|
147
|
+
for (const range of sentenceRanges) {
|
|
148
|
+
const sentence = sentences[range.id];
|
|
149
|
+
sentenceIdToFragment.set(
|
|
150
|
+
range.id,
|
|
151
|
+
trie.findMinimalFragment(
|
|
152
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
153
|
+
range.id
|
|
154
|
+
)
|
|
155
|
+
);
|
|
156
|
+
if (this.granularity === "word") {
|
|
157
|
+
const wordRanges2 = wordRangeMap.get(range.id);
|
|
158
|
+
const toFragment = wordIdToFragment.get(range.id);
|
|
159
|
+
const words = sentence.words.entries.filter((w) => w.text.match(/\S/));
|
|
160
|
+
const wordTrie = new TextFragmentTrie(
|
|
161
|
+
words.map((w) => w.text.replace("\n", " ")),
|
|
162
|
+
locale
|
|
163
|
+
);
|
|
164
|
+
for (const wordRange of wordRanges2) {
|
|
165
|
+
toFragment.set(
|
|
166
|
+
wordRange.id,
|
|
167
|
+
wordTrie.findMinimalFragment(wordRange.id)
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
118
173
|
const audiofiles = Array.from(
|
|
119
174
|
new Set(sentenceRanges.map(({ audiofile }) => audiofile))
|
|
120
175
|
);
|
|
@@ -151,7 +206,9 @@ class Aligner {
|
|
|
151
206
|
chapter,
|
|
152
207
|
this.granularity,
|
|
153
208
|
sentenceRanges,
|
|
154
|
-
wordRangeMap
|
|
209
|
+
wordRangeMap,
|
|
210
|
+
sentenceIdToFragment,
|
|
211
|
+
wordIdToFragment
|
|
155
212
|
),
|
|
156
213
|
"xml"
|
|
157
214
|
);
|
|
@@ -284,16 +341,24 @@ class Aligner {
|
|
|
284
341
|
};
|
|
285
342
|
}
|
|
286
343
|
narrowToAvailableBoundary(boundary) {
|
|
287
|
-
const
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
344
|
+
const available = [
|
|
345
|
+
-1,
|
|
346
|
+
...this.alignedChapters.toSorted((a, b) => a.startOffset - b.startOffset).flatMap(({ startOffset, endOffset }) => [startOffset, endOffset]),
|
|
347
|
+
Infinity
|
|
348
|
+
];
|
|
349
|
+
const withinBoundary = [];
|
|
350
|
+
for (let i = 0; i < available.length - 1; i += 2) {
|
|
351
|
+
const [start, end] = [available[i], available[i + 1]];
|
|
352
|
+
if (boundary.start <= start && boundary.end >= start || boundary.start <= end && boundary.end >= end) {
|
|
353
|
+
withinBoundary.push([
|
|
354
|
+
Math.max(boundary.start, start + 1),
|
|
355
|
+
Math.min(boundary.end, end - 1)
|
|
356
|
+
]);
|
|
294
357
|
}
|
|
295
358
|
}
|
|
296
|
-
|
|
359
|
+
const largestBoundary = max(withinBoundary, ([start, end]) => end - start);
|
|
360
|
+
if (!largestBoundary) return { start: boundary.start, end: boundary.end };
|
|
361
|
+
return { start: largestBoundary[0], end: largestBoundary[1] };
|
|
297
362
|
}
|
|
298
363
|
async alignBook(onProgress) {
|
|
299
364
|
const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
|
|
@@ -403,7 +468,7 @@ class Aligner {
|
|
|
403
468
|
alignedChapter.wordRanges[i] = expandEmptySentenceRanges(wordRanges);
|
|
404
469
|
}
|
|
405
470
|
await this.writeAlignedChapter(alignedChapter);
|
|
406
|
-
collapsedStart += sentences.length
|
|
471
|
+
collapsedStart += sentences.length;
|
|
407
472
|
}
|
|
408
473
|
await this.epub.addMetadata({
|
|
409
474
|
type: "meta",
|
|
@@ -431,7 +496,7 @@ class Aligner {
|
|
|
431
496
|
return this.timing;
|
|
432
497
|
}
|
|
433
498
|
}
|
|
434
|
-
function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
499
|
+
function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
|
|
435
500
|
return [
|
|
436
501
|
Epub.createXmlElement(
|
|
437
502
|
"smil",
|
|
@@ -458,7 +523,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
458
523
|
},
|
|
459
524
|
[
|
|
460
525
|
Epub.createXmlElement("text", {
|
|
461
|
-
src: `../${chapter.href}#${
|
|
526
|
+
src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
462
527
|
}),
|
|
463
528
|
Epub.createXmlElement("audio", {
|
|
464
529
|
src: `../Audio/${basename(sentenceRange.audiofile)}`,
|
|
@@ -469,12 +534,13 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
469
534
|
);
|
|
470
535
|
}
|
|
471
536
|
const words = wordRanges.get(sentenceRange.id);
|
|
537
|
+
const wordToFragment = wordIdToFragment.get(sentenceRange.id);
|
|
472
538
|
return Epub.createXmlElement(
|
|
473
539
|
"seq",
|
|
474
540
|
{
|
|
475
541
|
id: `${chapter.id}-s${sentenceRange.id}`,
|
|
476
542
|
"epub:type": "text-range-small",
|
|
477
|
-
"epub:textref": `../${chapter.href}#${
|
|
543
|
+
"epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
478
544
|
},
|
|
479
545
|
words.map(
|
|
480
546
|
(word) => Epub.createXmlElement(
|
|
@@ -484,7 +550,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
484
550
|
},
|
|
485
551
|
[
|
|
486
552
|
Epub.createXmlElement("text", {
|
|
487
|
-
src: `../${chapter.href}#${
|
|
553
|
+
src: `../${chapter.href}#${wordToFragment.get(word.id)}`
|
|
488
554
|
}),
|
|
489
555
|
Epub.createXmlElement("audio", {
|
|
490
556
|
src: `../Audio/${basename(word.audiofile)}`,
|
|
@@ -275,6 +275,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
|
|
|
275
275
|
if (start2 && end2) {
|
|
276
276
|
perSentenceWordRanges.push({
|
|
277
277
|
id: k,
|
|
278
|
+
chapterId,
|
|
278
279
|
sentenceId: j + chapterSentenceIndex + slice[0],
|
|
279
280
|
start: end2.audiofile === start2.audiofile ? start2.start : 0,
|
|
280
281
|
audiofile: end2.audiofile,
|
|
@@ -247,6 +247,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
|
|
|
247
247
|
if (start2 && end2) {
|
|
248
248
|
perSentenceWordRanges.push({
|
|
249
249
|
id: k,
|
|
250
|
+
chapterId,
|
|
250
251
|
sentenceId: j + chapterSentenceIndex + slice[0],
|
|
251
252
|
start: end2.audiofile === start2.audiofile ? start2.start : 0,
|
|
252
253
|
audiofile: end2.audiofile,
|
package/dist/align/parse.cjs
CHANGED
|
@@ -34,6 +34,12 @@ const alignParser = (0, import_core.object)("Alignment", {
|
|
|
34
34
|
"--epub",
|
|
35
35
|
(0, import_valueparser.path)({ mustExist: true, type: "file", extensions: [".epub"] })
|
|
36
36
|
),
|
|
37
|
+
textRef: (0, import_core.withDefault)(
|
|
38
|
+
(0, import_core.option)("--text-ref", (0, import_core.choice)(["id-fragment", "text-fragment"]), {
|
|
39
|
+
description: import_core.message`Whether to use text fragments rather than element id fragments to identify text ranges in generated media overlays.`
|
|
40
|
+
}),
|
|
41
|
+
"id-fragment"
|
|
42
|
+
),
|
|
37
43
|
reports: (0, import_core.optional)((0, import_core.option)("--reports", (0, import_valueparser.path)({ type: "directory" })))
|
|
38
44
|
});
|
|
39
45
|
const alignCommand = (0, import_core.command)(
|
package/dist/align/parse.d.cts
CHANGED
|
@@ -3,10 +3,12 @@ import * as _optique_core from '@optique/core';
|
|
|
3
3
|
declare const alignParser: _optique_core.Parser<"sync", {
|
|
4
4
|
readonly audiobook: string;
|
|
5
5
|
readonly epub: string;
|
|
6
|
+
readonly textRef: "id-fragment" | "text-fragment";
|
|
6
7
|
readonly reports: string | undefined;
|
|
7
8
|
}, {
|
|
8
9
|
readonly audiobook: _optique_core.ValueParserResult<string> | undefined;
|
|
9
10
|
readonly epub: _optique_core.ValueParserResult<string> | undefined;
|
|
11
|
+
readonly textRef: [_optique_core.ValueParserResult<"id-fragment" | "text-fragment"> | undefined] | undefined;
|
|
10
12
|
readonly reports: [_optique_core.ValueParserResult<string> | undefined] | undefined;
|
|
11
13
|
}>;
|
|
12
14
|
declare const alignCommand: _optique_core.Parser<"sync", {
|
|
@@ -16,6 +18,7 @@ declare const alignCommand: _optique_core.Parser<"sync", {
|
|
|
16
18
|
} & {
|
|
17
19
|
readonly audiobook: string;
|
|
18
20
|
readonly epub: string;
|
|
21
|
+
readonly textRef: "id-fragment" | "text-fragment";
|
|
19
22
|
readonly reports: string | undefined;
|
|
20
23
|
} & {
|
|
21
24
|
readonly noProgress: boolean;
|
package/dist/align/parse.d.ts
CHANGED
|
@@ -3,10 +3,12 @@ import * as _optique_core from '@optique/core';
|
|
|
3
3
|
declare const alignParser: _optique_core.Parser<"sync", {
|
|
4
4
|
readonly audiobook: string;
|
|
5
5
|
readonly epub: string;
|
|
6
|
+
readonly textRef: "id-fragment" | "text-fragment";
|
|
6
7
|
readonly reports: string | undefined;
|
|
7
8
|
}, {
|
|
8
9
|
readonly audiobook: _optique_core.ValueParserResult<string> | undefined;
|
|
9
10
|
readonly epub: _optique_core.ValueParserResult<string> | undefined;
|
|
11
|
+
readonly textRef: [_optique_core.ValueParserResult<"id-fragment" | "text-fragment"> | undefined] | undefined;
|
|
10
12
|
readonly reports: [_optique_core.ValueParserResult<string> | undefined] | undefined;
|
|
11
13
|
}>;
|
|
12
14
|
declare const alignCommand: _optique_core.Parser<"sync", {
|
|
@@ -16,6 +18,7 @@ declare const alignCommand: _optique_core.Parser<"sync", {
|
|
|
16
18
|
} & {
|
|
17
19
|
readonly audiobook: string;
|
|
18
20
|
readonly epub: string;
|
|
21
|
+
readonly textRef: "id-fragment" | "text-fragment";
|
|
19
22
|
readonly reports: string | undefined;
|
|
20
23
|
} & {
|
|
21
24
|
readonly noProgress: boolean;
|
package/dist/align/parse.js
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
import "../chunk-BIEQXUOY.js";
|
|
2
2
|
import {
|
|
3
|
+
choice,
|
|
3
4
|
command,
|
|
4
5
|
constant,
|
|
5
6
|
merge,
|
|
6
7
|
message,
|
|
7
8
|
object,
|
|
8
9
|
option,
|
|
9
|
-
optional
|
|
10
|
+
optional,
|
|
11
|
+
withDefault
|
|
10
12
|
} from "@optique/core";
|
|
11
13
|
import { path } from "@optique/run/valueparser";
|
|
12
14
|
import {
|
|
@@ -23,6 +25,12 @@ const alignParser = object("Alignment", {
|
|
|
23
25
|
"--epub",
|
|
24
26
|
path({ mustExist: true, type: "file", extensions: [".epub"] })
|
|
25
27
|
),
|
|
28
|
+
textRef: withDefault(
|
|
29
|
+
option("--text-ref", choice(["id-fragment", "text-fragment"]), {
|
|
30
|
+
description: message`Whether to use text fragments rather than element id fragments to identify text ranges in generated media overlays.`
|
|
31
|
+
}),
|
|
32
|
+
"id-fragment"
|
|
33
|
+
),
|
|
26
34
|
reports: optional(option("--reports", path({ type: "directory" })))
|
|
27
35
|
});
|
|
28
36
|
const alignCommand = command(
|