@storyteller-platform/align 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/align.cjs +65 -7
- package/dist/align/align.d.cts +4 -2
- package/dist/align/align.d.ts +4 -2
- package/dist/align/align.js +65 -7
- package/dist/align/getSentenceRanges.cjs +1 -0
- package/dist/align/getSentenceRanges.d.cts +1 -0
- package/dist/align/getSentenceRanges.d.ts +1 -0
- package/dist/align/getSentenceRanges.js +1 -0
- package/dist/align/parse.cjs +6 -0
- package/dist/align/parse.d.cts +3 -0
- package/dist/align/parse.d.ts +3 -0
- package/dist/align/parse.js +9 -1
- package/dist/align/textFragments.cjs +147 -0
- package/dist/align/textFragments.d.cts +23 -0
- package/dist/align/textFragments.d.ts +23 -0
- package/dist/align/textFragments.js +124 -0
- package/dist/cli/bin.cjs +38 -24
- package/dist/cli/bin.js +35 -21
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/snapshot/parse.cjs +61 -0
- package/dist/snapshot/parse.d.cts +24 -0
- package/dist/snapshot/parse.d.ts +24 -0
- package/dist/snapshot/parse.js +45 -0
- package/dist/snapshot/snapshot.cjs +224 -0
- package/dist/snapshot/snapshot.d.cts +6 -0
- package/dist/snapshot/snapshot.d.ts +6 -0
- package/dist/snapshot/snapshot.js +161 -0
- package/dist/transcribe/parse.cjs +2 -2
- package/dist/transcribe/parse.js +1 -1
- package/dist/transcribe/transcribe.cjs +2 -0
- package/dist/transcribe/transcribe.d.cts +2 -1
- package/dist/transcribe/transcribe.d.ts +2 -1
- package/dist/transcribe/transcribe.js +2 -0
- package/package.json +3 -3
package/dist/align/align.cjs
CHANGED
|
@@ -91,6 +91,7 @@ var import_segmentation = require("../markup/segmentation.cjs");
|
|
|
91
91
|
var import_getSentenceRanges = require("./getSentenceRanges.cjs");
|
|
92
92
|
var import_search = require("./search.cjs");
|
|
93
93
|
var import_slugify = require("./slugify.cjs");
|
|
94
|
+
var import_textFragments = require("./textFragments.cjs");
|
|
94
95
|
async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
95
96
|
var _stack = [];
|
|
96
97
|
try {
|
|
@@ -126,6 +127,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
|
126
127
|
audiobookFiles,
|
|
127
128
|
transcriptions,
|
|
128
129
|
options.granularity,
|
|
130
|
+
options.textRef,
|
|
129
131
|
options.primaryLocale,
|
|
130
132
|
options.logger
|
|
131
133
|
);
|
|
@@ -149,7 +151,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
|
149
151
|
}
|
|
150
152
|
}
|
|
151
153
|
class Aligner {
|
|
152
|
-
constructor(epub, audiofiles, transcriptions, granularity, languageOverride, logger) {
|
|
154
|
+
constructor(epub, audiofiles, transcriptions, granularity, textRef, languageOverride, logger) {
|
|
153
155
|
this.epub = epub;
|
|
154
156
|
this.audiofiles = audiofiles;
|
|
155
157
|
this.languageOverride = languageOverride;
|
|
@@ -157,12 +159,14 @@ class Aligner {
|
|
|
157
159
|
this.transcription = concatTranscriptions(transcriptions, audiofiles);
|
|
158
160
|
this.getChapterSentences = (0, import_memoize.default)(this.getChapterSentences.bind(this));
|
|
159
161
|
this.granularity = granularity ?? "sentence";
|
|
162
|
+
this.textRef = textRef ?? "id-fragment";
|
|
160
163
|
}
|
|
161
164
|
transcription;
|
|
162
165
|
totalDuration = 0;
|
|
163
166
|
alignedChapters = [];
|
|
164
167
|
timing = (0, import_ghost_story.createAggregator)();
|
|
165
168
|
granularity;
|
|
169
|
+
textRef;
|
|
166
170
|
report = {
|
|
167
171
|
chapters: []
|
|
168
172
|
};
|
|
@@ -177,8 +181,59 @@ class Aligner {
|
|
|
177
181
|
return segmentation.filter((s) => s.text.match(/\S/));
|
|
178
182
|
}
|
|
179
183
|
async writeAlignedChapter(alignedChapter) {
|
|
184
|
+
const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
|
|
180
185
|
const { chapter, sentenceRanges, wordRanges, xml } = alignedChapter;
|
|
186
|
+
const sentences = await this.getChapterSentences(chapter.id);
|
|
187
|
+
const sentenceIdToFragment = new Map(
|
|
188
|
+
sentenceRanges.map((range) => [
|
|
189
|
+
range.id,
|
|
190
|
+
`${range.chapterId}-s${range.id}`
|
|
191
|
+
])
|
|
192
|
+
);
|
|
193
|
+
const wordIdToFragment = new Map(
|
|
194
|
+
wordRanges.map((ranges) => [
|
|
195
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
196
|
+
ranges[0].sentenceId,
|
|
197
|
+
new Map(
|
|
198
|
+
ranges.map((range) => [
|
|
199
|
+
range.id,
|
|
200
|
+
`${range.chapterId}-s${range.sentenceId}-w${range.id}`
|
|
201
|
+
])
|
|
202
|
+
)
|
|
203
|
+
])
|
|
204
|
+
);
|
|
181
205
|
const wordRangeMap = new Map(wordRanges.map((w) => [w[0].sentenceId, w]));
|
|
206
|
+
if (this.textRef === "text-fragment") {
|
|
207
|
+
const trie = new import_textFragments.TextFragmentTrie(
|
|
208
|
+
sentences.map((s) => s.text.replace("\n", " ")),
|
|
209
|
+
locale
|
|
210
|
+
);
|
|
211
|
+
for (const range of sentenceRanges) {
|
|
212
|
+
const sentence = sentences[range.id];
|
|
213
|
+
sentenceIdToFragment.set(
|
|
214
|
+
range.id,
|
|
215
|
+
trie.findMinimalFragment(
|
|
216
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
217
|
+
range.id
|
|
218
|
+
)
|
|
219
|
+
);
|
|
220
|
+
if (this.granularity === "word") {
|
|
221
|
+
const wordRanges2 = wordRangeMap.get(range.id);
|
|
222
|
+
const toFragment = wordIdToFragment.get(range.id);
|
|
223
|
+
const words = sentence.words.entries.filter((w) => w.text.match(/\S/));
|
|
224
|
+
const wordTrie = new import_textFragments.TextFragmentTrie(
|
|
225
|
+
words.map((w) => w.text.replace("\n", " ")),
|
|
226
|
+
locale
|
|
227
|
+
);
|
|
228
|
+
for (const wordRange of wordRanges2) {
|
|
229
|
+
toFragment.set(
|
|
230
|
+
wordRange.id,
|
|
231
|
+
wordTrie.findMinimalFragment(wordRange.id)
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
182
237
|
const audiofiles = Array.from(
|
|
183
238
|
new Set(sentenceRanges.map(({ audiofile }) => audiofile))
|
|
184
239
|
);
|
|
@@ -215,7 +270,9 @@ class Aligner {
|
|
|
215
270
|
chapter,
|
|
216
271
|
this.granularity,
|
|
217
272
|
sentenceRanges,
|
|
218
|
-
wordRangeMap
|
|
273
|
+
wordRangeMap,
|
|
274
|
+
sentenceIdToFragment,
|
|
275
|
+
wordIdToFragment
|
|
219
276
|
),
|
|
220
277
|
"xml"
|
|
221
278
|
);
|
|
@@ -475,7 +532,7 @@ class Aligner {
|
|
|
475
532
|
alignedChapter.wordRanges[i] = (0, import_getSentenceRanges.expandEmptySentenceRanges)(wordRanges);
|
|
476
533
|
}
|
|
477
534
|
await this.writeAlignedChapter(alignedChapter);
|
|
478
|
-
collapsedStart += sentences.length
|
|
535
|
+
collapsedStart += sentences.length;
|
|
479
536
|
}
|
|
480
537
|
await this.epub.addMetadata({
|
|
481
538
|
type: "meta",
|
|
@@ -503,7 +560,7 @@ class Aligner {
|
|
|
503
560
|
return this.timing;
|
|
504
561
|
}
|
|
505
562
|
}
|
|
506
|
-
function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
563
|
+
function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
|
|
507
564
|
return [
|
|
508
565
|
import_epub.Epub.createXmlElement(
|
|
509
566
|
"smil",
|
|
@@ -530,7 +587,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
530
587
|
},
|
|
531
588
|
[
|
|
532
589
|
import_epub.Epub.createXmlElement("text", {
|
|
533
|
-
src: `../${chapter.href}#${
|
|
590
|
+
src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
534
591
|
}),
|
|
535
592
|
import_epub.Epub.createXmlElement("audio", {
|
|
536
593
|
src: `../Audio/${(0, import_posix.basename)(sentenceRange.audiofile)}`,
|
|
@@ -541,12 +598,13 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
541
598
|
);
|
|
542
599
|
}
|
|
543
600
|
const words = wordRanges.get(sentenceRange.id);
|
|
601
|
+
const wordToFragment = wordIdToFragment.get(sentenceRange.id);
|
|
544
602
|
return import_epub.Epub.createXmlElement(
|
|
545
603
|
"seq",
|
|
546
604
|
{
|
|
547
605
|
id: `${chapter.id}-s${sentenceRange.id}`,
|
|
548
606
|
"epub:type": "text-range-small",
|
|
549
|
-
"epub:textref": `../${chapter.href}#${
|
|
607
|
+
"epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
550
608
|
},
|
|
551
609
|
words.map(
|
|
552
610
|
(word) => import_epub.Epub.createXmlElement(
|
|
@@ -556,7 +614,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
556
614
|
},
|
|
557
615
|
[
|
|
558
616
|
import_epub.Epub.createXmlElement("text", {
|
|
559
|
-
src: `../${chapter.href}#${
|
|
617
|
+
src: `../${chapter.href}#${wordToFragment.get(word.id)}`
|
|
560
618
|
}),
|
|
561
619
|
import_epub.Epub.createXmlElement("audio", {
|
|
562
620
|
src: `../Audio/${(0, import_posix.basename)(word.audiofile)}`,
|
package/dist/align/align.d.cts
CHANGED
|
@@ -39,7 +39,8 @@ interface Report {
|
|
|
39
39
|
}
|
|
40
40
|
interface AlignOptions {
|
|
41
41
|
reportsPath?: string | null | undefined;
|
|
42
|
-
granularity
|
|
42
|
+
granularity?: "sentence" | "word" | null | undefined;
|
|
43
|
+
textRef?: "id-fragment" | "text-fragment" | null | undefined;
|
|
43
44
|
primaryLocale?: Intl.Locale | null | undefined;
|
|
44
45
|
logger?: Logger | null | undefined;
|
|
45
46
|
onProgress?: ((progress: number) => void) | null | undefined;
|
|
@@ -55,8 +56,9 @@ declare class Aligner {
|
|
|
55
56
|
private alignedChapters;
|
|
56
57
|
private timing;
|
|
57
58
|
private granularity;
|
|
59
|
+
private textRef;
|
|
58
60
|
report: Report;
|
|
59
|
-
constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
|
|
61
|
+
constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, textRef: "id-fragment" | "text-fragment" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
|
|
60
62
|
private getChapterSentences;
|
|
61
63
|
private writeAlignedChapter;
|
|
62
64
|
private addChapterReport;
|
package/dist/align/align.d.ts
CHANGED
|
@@ -39,7 +39,8 @@ interface Report {
|
|
|
39
39
|
}
|
|
40
40
|
interface AlignOptions {
|
|
41
41
|
reportsPath?: string | null | undefined;
|
|
42
|
-
granularity
|
|
42
|
+
granularity?: "sentence" | "word" | null | undefined;
|
|
43
|
+
textRef?: "id-fragment" | "text-fragment" | null | undefined;
|
|
43
44
|
primaryLocale?: Intl.Locale | null | undefined;
|
|
44
45
|
logger?: Logger | null | undefined;
|
|
45
46
|
onProgress?: ((progress: number) => void) | null | undefined;
|
|
@@ -55,8 +56,9 @@ declare class Aligner {
|
|
|
55
56
|
private alignedChapters;
|
|
56
57
|
private timing;
|
|
57
58
|
private granularity;
|
|
59
|
+
private textRef;
|
|
58
60
|
report: Report;
|
|
59
|
-
constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
|
|
61
|
+
constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, textRef: "id-fragment" | "text-fragment" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
|
|
60
62
|
private getChapterSentences;
|
|
61
63
|
private writeAlignedChapter;
|
|
62
64
|
private addChapterReport;
|
package/dist/align/align.js
CHANGED
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
} from "./getSentenceRanges.js";
|
|
28
28
|
import { findBoundaries } from "./search.js";
|
|
29
29
|
import { slugify } from "./slugify.js";
|
|
30
|
+
import { TextFragmentTrie } from "./textFragments.js";
|
|
30
31
|
async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
31
32
|
var _stack = [];
|
|
32
33
|
try {
|
|
@@ -62,6 +63,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
|
62
63
|
audiobookFiles,
|
|
63
64
|
transcriptions,
|
|
64
65
|
options.granularity,
|
|
66
|
+
options.textRef,
|
|
65
67
|
options.primaryLocale,
|
|
66
68
|
options.logger
|
|
67
69
|
);
|
|
@@ -85,7 +87,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
|
|
|
85
87
|
}
|
|
86
88
|
}
|
|
87
89
|
class Aligner {
|
|
88
|
-
constructor(epub, audiofiles, transcriptions, granularity, languageOverride, logger) {
|
|
90
|
+
constructor(epub, audiofiles, transcriptions, granularity, textRef, languageOverride, logger) {
|
|
89
91
|
this.epub = epub;
|
|
90
92
|
this.audiofiles = audiofiles;
|
|
91
93
|
this.languageOverride = languageOverride;
|
|
@@ -93,12 +95,14 @@ class Aligner {
|
|
|
93
95
|
this.transcription = concatTranscriptions(transcriptions, audiofiles);
|
|
94
96
|
this.getChapterSentences = memoize(this.getChapterSentences.bind(this));
|
|
95
97
|
this.granularity = granularity ?? "sentence";
|
|
98
|
+
this.textRef = textRef ?? "id-fragment";
|
|
96
99
|
}
|
|
97
100
|
transcription;
|
|
98
101
|
totalDuration = 0;
|
|
99
102
|
alignedChapters = [];
|
|
100
103
|
timing = createAggregator();
|
|
101
104
|
granularity;
|
|
105
|
+
textRef;
|
|
102
106
|
report = {
|
|
103
107
|
chapters: []
|
|
104
108
|
};
|
|
@@ -113,8 +117,59 @@ class Aligner {
|
|
|
113
117
|
return segmentation.filter((s) => s.text.match(/\S/));
|
|
114
118
|
}
|
|
115
119
|
async writeAlignedChapter(alignedChapter) {
|
|
120
|
+
const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
|
|
116
121
|
const { chapter, sentenceRanges, wordRanges, xml } = alignedChapter;
|
|
122
|
+
const sentences = await this.getChapterSentences(chapter.id);
|
|
123
|
+
const sentenceIdToFragment = new Map(
|
|
124
|
+
sentenceRanges.map((range) => [
|
|
125
|
+
range.id,
|
|
126
|
+
`${range.chapterId}-s${range.id}`
|
|
127
|
+
])
|
|
128
|
+
);
|
|
129
|
+
const wordIdToFragment = new Map(
|
|
130
|
+
wordRanges.map((ranges) => [
|
|
131
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
132
|
+
ranges[0].sentenceId,
|
|
133
|
+
new Map(
|
|
134
|
+
ranges.map((range) => [
|
|
135
|
+
range.id,
|
|
136
|
+
`${range.chapterId}-s${range.sentenceId}-w${range.id}`
|
|
137
|
+
])
|
|
138
|
+
)
|
|
139
|
+
])
|
|
140
|
+
);
|
|
117
141
|
const wordRangeMap = new Map(wordRanges.map((w) => [w[0].sentenceId, w]));
|
|
142
|
+
if (this.textRef === "text-fragment") {
|
|
143
|
+
const trie = new TextFragmentTrie(
|
|
144
|
+
sentences.map((s) => s.text.replace("\n", " ")),
|
|
145
|
+
locale
|
|
146
|
+
);
|
|
147
|
+
for (const range of sentenceRanges) {
|
|
148
|
+
const sentence = sentences[range.id];
|
|
149
|
+
sentenceIdToFragment.set(
|
|
150
|
+
range.id,
|
|
151
|
+
trie.findMinimalFragment(
|
|
152
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
153
|
+
range.id
|
|
154
|
+
)
|
|
155
|
+
);
|
|
156
|
+
if (this.granularity === "word") {
|
|
157
|
+
const wordRanges2 = wordRangeMap.get(range.id);
|
|
158
|
+
const toFragment = wordIdToFragment.get(range.id);
|
|
159
|
+
const words = sentence.words.entries.filter((w) => w.text.match(/\S/));
|
|
160
|
+
const wordTrie = new TextFragmentTrie(
|
|
161
|
+
words.map((w) => w.text.replace("\n", " ")),
|
|
162
|
+
locale
|
|
163
|
+
);
|
|
164
|
+
for (const wordRange of wordRanges2) {
|
|
165
|
+
toFragment.set(
|
|
166
|
+
wordRange.id,
|
|
167
|
+
wordTrie.findMinimalFragment(wordRange.id)
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
118
173
|
const audiofiles = Array.from(
|
|
119
174
|
new Set(sentenceRanges.map(({ audiofile }) => audiofile))
|
|
120
175
|
);
|
|
@@ -151,7 +206,9 @@ class Aligner {
|
|
|
151
206
|
chapter,
|
|
152
207
|
this.granularity,
|
|
153
208
|
sentenceRanges,
|
|
154
|
-
wordRangeMap
|
|
209
|
+
wordRangeMap,
|
|
210
|
+
sentenceIdToFragment,
|
|
211
|
+
wordIdToFragment
|
|
155
212
|
),
|
|
156
213
|
"xml"
|
|
157
214
|
);
|
|
@@ -411,7 +468,7 @@ class Aligner {
|
|
|
411
468
|
alignedChapter.wordRanges[i] = expandEmptySentenceRanges(wordRanges);
|
|
412
469
|
}
|
|
413
470
|
await this.writeAlignedChapter(alignedChapter);
|
|
414
|
-
collapsedStart += sentences.length
|
|
471
|
+
collapsedStart += sentences.length;
|
|
415
472
|
}
|
|
416
473
|
await this.epub.addMetadata({
|
|
417
474
|
type: "meta",
|
|
@@ -439,7 +496,7 @@ class Aligner {
|
|
|
439
496
|
return this.timing;
|
|
440
497
|
}
|
|
441
498
|
}
|
|
442
|
-
function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
499
|
+
function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
|
|
443
500
|
return [
|
|
444
501
|
Epub.createXmlElement(
|
|
445
502
|
"smil",
|
|
@@ -466,7 +523,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
466
523
|
},
|
|
467
524
|
[
|
|
468
525
|
Epub.createXmlElement("text", {
|
|
469
|
-
src: `../${chapter.href}#${
|
|
526
|
+
src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
470
527
|
}),
|
|
471
528
|
Epub.createXmlElement("audio", {
|
|
472
529
|
src: `../Audio/${basename(sentenceRange.audiofile)}`,
|
|
@@ -477,12 +534,13 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
477
534
|
);
|
|
478
535
|
}
|
|
479
536
|
const words = wordRanges.get(sentenceRange.id);
|
|
537
|
+
const wordToFragment = wordIdToFragment.get(sentenceRange.id);
|
|
480
538
|
return Epub.createXmlElement(
|
|
481
539
|
"seq",
|
|
482
540
|
{
|
|
483
541
|
id: `${chapter.id}-s${sentenceRange.id}`,
|
|
484
542
|
"epub:type": "text-range-small",
|
|
485
|
-
"epub:textref": `../${chapter.href}#${
|
|
543
|
+
"epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
|
|
486
544
|
},
|
|
487
545
|
words.map(
|
|
488
546
|
(word) => Epub.createXmlElement(
|
|
@@ -492,7 +550,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
|
|
|
492
550
|
},
|
|
493
551
|
[
|
|
494
552
|
Epub.createXmlElement("text", {
|
|
495
|
-
src: `../${chapter.href}#${
|
|
553
|
+
src: `../${chapter.href}#${wordToFragment.get(word.id)}`
|
|
496
554
|
}),
|
|
497
555
|
Epub.createXmlElement("audio", {
|
|
498
556
|
src: `../Audio/${basename(word.audiofile)}`,
|
|
@@ -275,6 +275,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
|
|
|
275
275
|
if (start2 && end2) {
|
|
276
276
|
perSentenceWordRanges.push({
|
|
277
277
|
id: k,
|
|
278
|
+
chapterId,
|
|
278
279
|
sentenceId: j + chapterSentenceIndex + slice[0],
|
|
279
280
|
start: end2.audiofile === start2.audiofile ? start2.start : 0,
|
|
280
281
|
audiofile: end2.audiofile,
|
|
@@ -247,6 +247,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
|
|
|
247
247
|
if (start2 && end2) {
|
|
248
248
|
perSentenceWordRanges.push({
|
|
249
249
|
id: k,
|
|
250
|
+
chapterId,
|
|
250
251
|
sentenceId: j + chapterSentenceIndex + slice[0],
|
|
251
252
|
start: end2.audiofile === start2.audiofile ? start2.start : 0,
|
|
252
253
|
audiofile: end2.audiofile,
|
package/dist/align/parse.cjs
CHANGED
|
@@ -34,6 +34,12 @@ const alignParser = (0, import_core.object)("Alignment", {
|
|
|
34
34
|
"--epub",
|
|
35
35
|
(0, import_valueparser.path)({ mustExist: true, type: "file", extensions: [".epub"] })
|
|
36
36
|
),
|
|
37
|
+
textRef: (0, import_core.withDefault)(
|
|
38
|
+
(0, import_core.option)("--text-ref", (0, import_core.choice)(["id-fragment", "text-fragment"]), {
|
|
39
|
+
description: import_core.message`Whether to use text fragments rather than element id fragments to identify text ranges in generated media overlays.`
|
|
40
|
+
}),
|
|
41
|
+
"id-fragment"
|
|
42
|
+
),
|
|
37
43
|
reports: (0, import_core.optional)((0, import_core.option)("--reports", (0, import_valueparser.path)({ type: "directory" })))
|
|
38
44
|
});
|
|
39
45
|
const alignCommand = (0, import_core.command)(
|
package/dist/align/parse.d.cts
CHANGED
|
@@ -3,10 +3,12 @@ import * as _optique_core from '@optique/core';
|
|
|
3
3
|
declare const alignParser: _optique_core.Parser<"sync", {
|
|
4
4
|
readonly audiobook: string;
|
|
5
5
|
readonly epub: string;
|
|
6
|
+
readonly textRef: "id-fragment" | "text-fragment";
|
|
6
7
|
readonly reports: string | undefined;
|
|
7
8
|
}, {
|
|
8
9
|
readonly audiobook: _optique_core.ValueParserResult<string> | undefined;
|
|
9
10
|
readonly epub: _optique_core.ValueParserResult<string> | undefined;
|
|
11
|
+
readonly textRef: [_optique_core.ValueParserResult<"id-fragment" | "text-fragment"> | undefined] | undefined;
|
|
10
12
|
readonly reports: [_optique_core.ValueParserResult<string> | undefined] | undefined;
|
|
11
13
|
}>;
|
|
12
14
|
declare const alignCommand: _optique_core.Parser<"sync", {
|
|
@@ -16,6 +18,7 @@ declare const alignCommand: _optique_core.Parser<"sync", {
|
|
|
16
18
|
} & {
|
|
17
19
|
readonly audiobook: string;
|
|
18
20
|
readonly epub: string;
|
|
21
|
+
readonly textRef: "id-fragment" | "text-fragment";
|
|
19
22
|
readonly reports: string | undefined;
|
|
20
23
|
} & {
|
|
21
24
|
readonly noProgress: boolean;
|
package/dist/align/parse.d.ts
CHANGED
|
@@ -3,10 +3,12 @@ import * as _optique_core from '@optique/core';
|
|
|
3
3
|
declare const alignParser: _optique_core.Parser<"sync", {
|
|
4
4
|
readonly audiobook: string;
|
|
5
5
|
readonly epub: string;
|
|
6
|
+
readonly textRef: "id-fragment" | "text-fragment";
|
|
6
7
|
readonly reports: string | undefined;
|
|
7
8
|
}, {
|
|
8
9
|
readonly audiobook: _optique_core.ValueParserResult<string> | undefined;
|
|
9
10
|
readonly epub: _optique_core.ValueParserResult<string> | undefined;
|
|
11
|
+
readonly textRef: [_optique_core.ValueParserResult<"id-fragment" | "text-fragment"> | undefined] | undefined;
|
|
10
12
|
readonly reports: [_optique_core.ValueParserResult<string> | undefined] | undefined;
|
|
11
13
|
}>;
|
|
12
14
|
declare const alignCommand: _optique_core.Parser<"sync", {
|
|
@@ -16,6 +18,7 @@ declare const alignCommand: _optique_core.Parser<"sync", {
|
|
|
16
18
|
} & {
|
|
17
19
|
readonly audiobook: string;
|
|
18
20
|
readonly epub: string;
|
|
21
|
+
readonly textRef: "id-fragment" | "text-fragment";
|
|
19
22
|
readonly reports: string | undefined;
|
|
20
23
|
} & {
|
|
21
24
|
readonly noProgress: boolean;
|
package/dist/align/parse.js
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
import "../chunk-BIEQXUOY.js";
|
|
2
2
|
import {
|
|
3
|
+
choice,
|
|
3
4
|
command,
|
|
4
5
|
constant,
|
|
5
6
|
merge,
|
|
6
7
|
message,
|
|
7
8
|
object,
|
|
8
9
|
option,
|
|
9
|
-
optional
|
|
10
|
+
optional,
|
|
11
|
+
withDefault
|
|
10
12
|
} from "@optique/core";
|
|
11
13
|
import { path } from "@optique/run/valueparser";
|
|
12
14
|
import {
|
|
@@ -23,6 +25,12 @@ const alignParser = object("Alignment", {
|
|
|
23
25
|
"--epub",
|
|
24
26
|
path({ mustExist: true, type: "file", extensions: [".epub"] })
|
|
25
27
|
),
|
|
28
|
+
textRef: withDefault(
|
|
29
|
+
option("--text-ref", choice(["id-fragment", "text-fragment"]), {
|
|
30
|
+
description: message`Whether to use text fragments rather than element id fragments to identify text ranges in generated media overlays.`
|
|
31
|
+
}),
|
|
32
|
+
"id-fragment"
|
|
33
|
+
),
|
|
26
34
|
reports: optional(option("--reports", path({ type: "directory" })))
|
|
27
35
|
});
|
|
28
36
|
const alignCommand = command(
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var textFragments_exports = {};
|
|
20
|
+
__export(textFragments_exports, {
|
|
21
|
+
TextFragmentTrie: () => TextFragmentTrie
|
|
22
|
+
});
|
|
23
|
+
module.exports = __toCommonJS(textFragments_exports);
|
|
24
|
+
var import_itertools = require("itertools");
|
|
25
|
+
var import_runes2 = require("runes2");
|
|
26
|
+
class TextFragmentTrie {
|
|
27
|
+
root = new Node(null, "");
|
|
28
|
+
spans;
|
|
29
|
+
constructor(casedSpans, locale = new Intl.Locale("en-Latn-US")) {
|
|
30
|
+
this.spans = casedSpans.map((span) => span.toLocaleLowerCase(locale));
|
|
31
|
+
for (const [i, span] of (0, import_itertools.enumerate)(this.spans)) {
|
|
32
|
+
const parents = [this.root];
|
|
33
|
+
for (const [j, char] of (0, import_itertools.enumerate)((0, import_runes2.runes)(span))) {
|
|
34
|
+
for (const [k, parent] of (0, import_itertools.enumerate)(parents)) {
|
|
35
|
+
const newNode = new Node(parent, char, { span: i, pos: j });
|
|
36
|
+
let node = parent.children.find((child) => child.eq(newNode));
|
|
37
|
+
if (!node) {
|
|
38
|
+
node = newNode;
|
|
39
|
+
parent.children.push(node);
|
|
40
|
+
} else {
|
|
41
|
+
node.indices.push({ span: i, pos: j });
|
|
42
|
+
}
|
|
43
|
+
parents[k] = node;
|
|
44
|
+
}
|
|
45
|
+
parents.push(this.root);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
findMinimalFragment(spanIndex) {
|
|
50
|
+
let node = this.root;
|
|
51
|
+
while (node.children.length) {
|
|
52
|
+
const candidates = node.children.filter(
|
|
53
|
+
(child2) => child2.indices.some(
|
|
54
|
+
({ span: childSpanIndex }) => childSpanIndex === spanIndex
|
|
55
|
+
)
|
|
56
|
+
);
|
|
57
|
+
const child = (0, import_itertools.min)(
|
|
58
|
+
candidates,
|
|
59
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
60
|
+
(c) => c.indices.find((i) => i.span === spanIndex).pos
|
|
61
|
+
);
|
|
62
|
+
if (!child) {
|
|
63
|
+
return this.nodeToFragment(node, spanIndex, true);
|
|
64
|
+
}
|
|
65
|
+
if (child.indices.length === 1) {
|
|
66
|
+
return this.nodeToFragment(child, spanIndex);
|
|
67
|
+
}
|
|
68
|
+
node = child;
|
|
69
|
+
}
|
|
70
|
+
return this.nodeToFragment(node, spanIndex, true);
|
|
71
|
+
}
|
|
72
|
+
nodeToFragment(node, spanIndex, findPrefix) {
|
|
73
|
+
const span = this.spans[spanIndex];
|
|
74
|
+
let fragment = ":~:text=";
|
|
75
|
+
let prefix = "";
|
|
76
|
+
if (findPrefix) {
|
|
77
|
+
const prev = this.spans[spanIndex - 1];
|
|
78
|
+
if (prev) {
|
|
79
|
+
const prefixes = node.indices.filter(({ span: s }) => s !== spanIndex).map(({ span: spanIndex2, pos }) => {
|
|
80
|
+
let startNode2 = node;
|
|
81
|
+
let startPos = pos;
|
|
82
|
+
while (startNode2.parent && startNode2.parent !== this.root) {
|
|
83
|
+
startPos -= startNode2.value.length;
|
|
84
|
+
startNode2 = startNode2.parent;
|
|
85
|
+
}
|
|
86
|
+
const prev2 = this.spans[spanIndex2 - 1];
|
|
87
|
+
const span2 = this.spans[spanIndex2];
|
|
88
|
+
return (prev2 ?? "") + span2.slice(0, startPos);
|
|
89
|
+
});
|
|
90
|
+
const reversedPrefixes = prefixes.map((p) => (0, import_runes2.runes)(p).toReversed());
|
|
91
|
+
for (const [i2, char] of (0, import_itertools.enumerate)((0, import_runes2.runes)(prev).toReversed())) {
|
|
92
|
+
prefix = char + prefix;
|
|
93
|
+
for (const [j, p] of (0, import_itertools.enumerate)([...reversedPrefixes.toReversed()])) {
|
|
94
|
+
if (p[i2] !== char) {
|
|
95
|
+
reversedPrefixes.splice(reversedPrefixes.length - 1 - j, 1);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (reversedPrefixes.length === 0) {
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
if (prefix) {
|
|
105
|
+
fragment += `${encodeTextFragmentPart(prefix)}-,`;
|
|
106
|
+
}
|
|
107
|
+
let startNode = node;
|
|
108
|
+
let start = "";
|
|
109
|
+
while (startNode) {
|
|
110
|
+
start = startNode.value + start;
|
|
111
|
+
startNode = startNode.parent;
|
|
112
|
+
}
|
|
113
|
+
fragment += encodeTextFragmentPart(start);
|
|
114
|
+
const remainingSentence = span.slice(start.length + node.value.length);
|
|
115
|
+
let end = "";
|
|
116
|
+
let i = remainingSentence.length - 1;
|
|
117
|
+
while (remainingSentence.indexOf(end) !== i + 1 && i >= node.value.length) {
|
|
118
|
+
end = remainingSentence.slice(i);
|
|
119
|
+
i--;
|
|
120
|
+
}
|
|
121
|
+
if (end) {
|
|
122
|
+
fragment += `,${encodeTextFragmentPart(end)}`;
|
|
123
|
+
}
|
|
124
|
+
return fragment;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
function encodeTextFragmentPart(part) {
|
|
128
|
+
return encodeURIComponent(part).replaceAll(/-/g, "%2d").replaceAll(/,/g, "%2c");
|
|
129
|
+
}
|
|
130
|
+
class Node {
|
|
131
|
+
constructor(parent, value, firstIndex) {
|
|
132
|
+
this.parent = parent;
|
|
133
|
+
this.value = value;
|
|
134
|
+
if (firstIndex !== void 0) {
|
|
135
|
+
this.indices.push(firstIndex);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
children = [];
|
|
139
|
+
indices = [];
|
|
140
|
+
eq(other) {
|
|
141
|
+
return this.value === other.value;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
145
|
+
0 && (module.exports = {
|
|
146
|
+
TextFragmentTrie
|
|
147
|
+
});
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
declare class TextFragmentTrie {
|
|
2
|
+
private root;
|
|
3
|
+
private spans;
|
|
4
|
+
constructor(casedSpans: string[], locale?: Intl.Locale);
|
|
5
|
+
findMinimalFragment(spanIndex: number): string;
|
|
6
|
+
nodeToFragment(node: Node, spanIndex: number, findPrefix?: boolean): string;
|
|
7
|
+
}
|
|
8
|
+
declare class Node {
|
|
9
|
+
parent: Node | null;
|
|
10
|
+
value: string;
|
|
11
|
+
children: Node[];
|
|
12
|
+
indices: {
|
|
13
|
+
span: number;
|
|
14
|
+
pos: number;
|
|
15
|
+
}[];
|
|
16
|
+
constructor(parent: Node | null, value: string, firstIndex?: {
|
|
17
|
+
span: number;
|
|
18
|
+
pos: number;
|
|
19
|
+
});
|
|
20
|
+
eq(other: Node): boolean;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export { TextFragmentTrie };
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
declare class TextFragmentTrie {
|
|
2
|
+
private root;
|
|
3
|
+
private spans;
|
|
4
|
+
constructor(casedSpans: string[], locale?: Intl.Locale);
|
|
5
|
+
findMinimalFragment(spanIndex: number): string;
|
|
6
|
+
nodeToFragment(node: Node, spanIndex: number, findPrefix?: boolean): string;
|
|
7
|
+
}
|
|
8
|
+
declare class Node {
|
|
9
|
+
parent: Node | null;
|
|
10
|
+
value: string;
|
|
11
|
+
children: Node[];
|
|
12
|
+
indices: {
|
|
13
|
+
span: number;
|
|
14
|
+
pos: number;
|
|
15
|
+
}[];
|
|
16
|
+
constructor(parent: Node | null, value: string, firstIndex?: {
|
|
17
|
+
span: number;
|
|
18
|
+
pos: number;
|
|
19
|
+
});
|
|
20
|
+
eq(other: Node): boolean;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export { TextFragmentTrie };
|