@storyteller-platform/align 0.1.18 → 0.1.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/align.cjs +107 -45
- package/dist/align/align.d.cts +3 -0
- package/dist/align/align.d.ts +3 -0
- package/dist/align/align.js +110 -46
- package/dist/align/getSentenceRanges.cjs +116 -68
- package/dist/align/getSentenceRanges.d.cts +35 -5
- package/dist/align/getSentenceRanges.d.ts +35 -5
- package/dist/align/getSentenceRanges.js +113 -67
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/markup/markup.cjs +18 -1
- package/dist/markup/markup.d.cts +1 -1
- package/dist/markup/markup.d.ts +1 -1
- package/dist/markup/markup.js +18 -1
- package/dist/markup/serializeDom.cjs +80 -53
- package/dist/markup/serializeDom.d.cts +3 -4
- package/dist/markup/serializeDom.d.ts +3 -4
- package/dist/markup/serializeDom.js +79 -51
- package/dist/markup/transform.cjs +6 -1
- package/dist/markup/transform.js +6 -1
- package/package.json +3 -3
|
@@ -5,9 +5,9 @@ import { getTrackDuration } from "../common/ffmpeg.js";
|
|
|
5
5
|
import { errorAlign } from "../errorAlign/errorAlign.js";
|
|
6
6
|
import { Alignment, reversed } from "../errorAlign/utils.js";
|
|
7
7
|
import { slugify } from "./slugify.js";
|
|
8
|
-
function findStartTimestamp(matchStartIndex,
|
|
9
|
-
const entry =
|
|
10
|
-
(entry2) =>
|
|
8
|
+
function findStartTimestamp(matchStartIndex, timeline) {
|
|
9
|
+
const entry = timeline.find(
|
|
10
|
+
(entry2) => entry2.mappedEndOffsetUtf16 > matchStartIndex
|
|
11
11
|
);
|
|
12
12
|
if (!entry) return null;
|
|
13
13
|
return {
|
|
@@ -16,9 +16,9 @@ function findStartTimestamp(matchStartIndex, transcription) {
|
|
|
16
16
|
audiofile: entry.audiofile
|
|
17
17
|
};
|
|
18
18
|
}
|
|
19
|
-
function findEndTimestamp(matchEndIndex,
|
|
20
|
-
const entry =
|
|
21
|
-
(entry2) =>
|
|
19
|
+
function findEndTimestamp(matchEndIndex, timeline) {
|
|
20
|
+
const entry = timeline.findLast(
|
|
21
|
+
(entry2) => entry2.mappedStartOffsetUtf16 < matchEndIndex
|
|
22
22
|
);
|
|
23
23
|
if (!entry) return null;
|
|
24
24
|
return {
|
|
@@ -109,17 +109,23 @@ function errorAlignWithNarrowing(refSentences, hyp, narrowStart, narrowEnd) {
|
|
|
109
109
|
);
|
|
110
110
|
return { alignments, slice: [slice[0] + narrowed[0], slice[0] + narrowed[1]] };
|
|
111
111
|
}
|
|
112
|
-
|
|
112
|
+
function mapTranscriptionTimeline(transcription, mapping) {
|
|
113
|
+
return transcription.timeline.map((entry) => ({
|
|
114
|
+
...entry,
|
|
115
|
+
mappedStartOffsetUtf16: mapping.map(entry.startOffsetUtf16 ?? 0, 1),
|
|
116
|
+
mappedEndOffsetUtf16: mapping.map(entry.endOffsetUtf16 ?? 0, -1)
|
|
117
|
+
}));
|
|
118
|
+
}
|
|
119
|
+
async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, chapterId, chapterOffset, chapterEndOffset, granularity, locale) {
|
|
113
120
|
const sentenceRanges = [];
|
|
114
|
-
const
|
|
115
|
-
const
|
|
121
|
+
const wordRanges = [];
|
|
122
|
+
const slugifiedChapterTranscript = transcriptionText.slice(
|
|
116
123
|
chapterOffset,
|
|
117
124
|
chapterEndOffset
|
|
118
125
|
);
|
|
119
|
-
const { result: slugifiedChapterTranscript, mapping: transcriptMapping } = await slugify(chapterTranscript, locale);
|
|
120
126
|
const slugifiedChapterSentences = [];
|
|
121
127
|
for (const s of sentences) {
|
|
122
|
-
const { result } = await slugify(s, locale);
|
|
128
|
+
const { result } = await slugify(s.text, locale);
|
|
123
129
|
slugifiedChapterSentences.push(result);
|
|
124
130
|
}
|
|
125
131
|
let firstFoundSentence = 0;
|
|
@@ -188,21 +194,16 @@ async function getSentenceRanges(transcription, sentences, chapterOffset, chapte
|
|
|
188
194
|
const sentenceLengthInSlugifiedTranscript = sentenceAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
|
|
189
195
|
if (score > 0) {
|
|
190
196
|
const start = findStartTimestamp(
|
|
191
|
-
chapterOffset +
|
|
192
|
-
|
|
193
|
-
1
|
|
194
|
-
),
|
|
195
|
-
transcription
|
|
196
|
-
);
|
|
197
|
-
chapterTranscriptEndIndex = chapterOffset + transcriptMapping.invert().map(
|
|
198
|
-
slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex + sentenceLengthInSlugifiedTranscript,
|
|
199
|
-
-1
|
|
197
|
+
chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
|
|
198
|
+
mappedTimeline
|
|
200
199
|
);
|
|
201
|
-
|
|
200
|
+
chapterTranscriptEndIndex = chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex + sentenceLengthInSlugifiedTranscript;
|
|
201
|
+
const end = findEndTimestamp(chapterTranscriptEndIndex, mappedTimeline);
|
|
202
202
|
if (start && end) {
|
|
203
203
|
if (start.audiofile !== end.audiofile) {
|
|
204
204
|
sentenceRanges.push({
|
|
205
205
|
id: j + chapterSentenceIndex + slice[0],
|
|
206
|
+
chapterId,
|
|
206
207
|
start: 0,
|
|
207
208
|
audiofile: end.audiofile,
|
|
208
209
|
end: end.end
|
|
@@ -210,12 +211,56 @@ async function getSentenceRanges(transcription, sentences, chapterOffset, chapte
|
|
|
210
211
|
} else {
|
|
211
212
|
sentenceRanges.push({
|
|
212
213
|
id: j + chapterSentenceIndex + slice[0],
|
|
214
|
+
chapterId,
|
|
213
215
|
start: start.start,
|
|
214
216
|
audiofile: start.audiofile,
|
|
215
217
|
end: end.end
|
|
216
218
|
});
|
|
217
219
|
}
|
|
218
220
|
}
|
|
221
|
+
if (granularity === "word") {
|
|
222
|
+
const sentenceSegmentation = sentences[j + chapterSentenceIndex + slice[0]];
|
|
223
|
+
const words = [];
|
|
224
|
+
for (const entry of sentenceSegmentation.words.entries) {
|
|
225
|
+
if (!entry.text.match(/\S/)) continue;
|
|
226
|
+
const { result } = await slugify(entry.text, locale);
|
|
227
|
+
words.push(result);
|
|
228
|
+
}
|
|
229
|
+
let currentTranscriptWordWindowIndex = currentTranscriptWindowIndex;
|
|
230
|
+
let sentenceAlignmentIndex = 0;
|
|
231
|
+
const perSentenceWordRanges = [];
|
|
232
|
+
for (const [k, word] of enumerate(words)) {
|
|
233
|
+
if (!word) continue;
|
|
234
|
+
const { alignments: wordAlignments } = getAlignmentsForSentence(
|
|
235
|
+
word,
|
|
236
|
+
sentenceAlignments.slice(sentenceAlignmentIndex)
|
|
237
|
+
);
|
|
238
|
+
const wordLengthInSlugifiedTranscript = wordAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
|
|
239
|
+
const start2 = findStartTimestamp(
|
|
240
|
+
chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWordWindowIndex,
|
|
241
|
+
mappedTimeline
|
|
242
|
+
);
|
|
243
|
+
const end2 = findEndTimestamp(
|
|
244
|
+
chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWordWindowIndex + wordLengthInSlugifiedTranscript,
|
|
245
|
+
mappedTimeline
|
|
246
|
+
);
|
|
247
|
+
if (start2 && end2) {
|
|
248
|
+
perSentenceWordRanges.push({
|
|
249
|
+
id: k,
|
|
250
|
+
sentenceId: j + chapterSentenceIndex + slice[0],
|
|
251
|
+
start: end2.audiofile === start2.audiofile ? start2.start : 0,
|
|
252
|
+
audiofile: end2.audiofile,
|
|
253
|
+
end: end2.end
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
sentenceAlignmentIndex += wordAlignments.length;
|
|
257
|
+
currentTranscriptWordWindowIndex += wordLengthInSlugifiedTranscript;
|
|
258
|
+
if (slugifiedChapterTranscriptWindow[currentTranscriptWordWindowIndex] === "-") {
|
|
259
|
+
currentTranscriptWordWindowIndex++;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
wordRanges.push(perSentenceWordRanges);
|
|
263
|
+
}
|
|
219
264
|
}
|
|
220
265
|
alignmentIndex += sentenceAlignments.length;
|
|
221
266
|
currentTranscriptWindowIndex += sentenceLengthInSlugifiedTranscript;
|
|
@@ -231,6 +276,7 @@ async function getSentenceRanges(transcription, sentences, chapterOffset, chapte
|
|
|
231
276
|
}
|
|
232
277
|
return {
|
|
233
278
|
sentenceRanges,
|
|
279
|
+
wordRanges,
|
|
234
280
|
transcriptionOffset: chapterTranscriptEndIndex,
|
|
235
281
|
firstFoundSentence,
|
|
236
282
|
lastFoundSentence
|
|
@@ -242,66 +288,53 @@ async function getLargestGap(trailing, leading) {
|
|
|
242
288
|
if (trailingGap > leadingGap) return [trailingGap, trailing.audiofile];
|
|
243
289
|
return [leadingGap, leading.audiofile];
|
|
244
290
|
}
|
|
245
|
-
async function interpolateSentenceRanges(sentenceRanges,
|
|
291
|
+
async function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts) {
|
|
246
292
|
const interpolated = [];
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
const
|
|
258
|
-
const
|
|
259
|
-
for (let i = 0; i < count; i++) {
|
|
260
|
-
interpolated.push({
|
|
261
|
-
id: i,
|
|
262
|
-
start: start + interpolatedLength * i,
|
|
263
|
-
end: start + interpolatedLength * (i + 1),
|
|
264
|
-
audiofile: first.audiofile
|
|
265
|
-
});
|
|
266
|
-
}
|
|
267
|
-
interpolated.push(first);
|
|
268
|
-
} else {
|
|
269
|
-
rest.unshift(first);
|
|
270
|
-
}
|
|
271
|
-
for (const sentenceRange of rest) {
|
|
272
|
-
if (interpolated.length === 0) {
|
|
273
|
-
interpolated.push(sentenceRange);
|
|
274
|
-
continue;
|
|
275
|
-
}
|
|
276
|
-
const lastSentenceRange2 = interpolated[interpolated.length - 1];
|
|
277
|
-
const count = sentenceRange.id - lastSentenceRange2.id - 1;
|
|
293
|
+
for (let i = 0; i < sentenceRanges.length; i++) {
|
|
294
|
+
const endRange = sentenceRanges[i];
|
|
295
|
+
const startRange = sentenceRanges[i - 1] ?? {
|
|
296
|
+
id: 0,
|
|
297
|
+
audiofile: endRange.audiofile,
|
|
298
|
+
chapterId: endRange.chapterId,
|
|
299
|
+
start: 0,
|
|
300
|
+
end: 0
|
|
301
|
+
};
|
|
302
|
+
const newChapter = startRange.chapterId !== endRange.chapterId;
|
|
303
|
+
const newAudiofile = startRange.audiofile !== endRange.audiofile;
|
|
304
|
+
const count = newChapter ? chapterSentenceCounts[startRange.chapterId] - startRange.id - 1 : endRange.id - startRange.id - 1;
|
|
278
305
|
if (count === 0) {
|
|
279
|
-
interpolated.push(
|
|
306
|
+
interpolated.push(endRange);
|
|
280
307
|
continue;
|
|
281
308
|
}
|
|
282
|
-
|
|
283
|
-
let [diff, audiofile] = crossesAudioBoundary ? await getLargestGap(lastSentenceRange2, sentenceRange) : [sentenceRange.start - lastSentenceRange2.end, sentenceRange.audiofile];
|
|
309
|
+
let [diff, audiofile] = newAudiofile ? await getLargestGap(startRange, endRange) : [endRange.start - startRange.end, endRange.audiofile];
|
|
284
310
|
if (diff <= 0) {
|
|
285
|
-
if (
|
|
286
|
-
const rangeLength =
|
|
311
|
+
if (newAudiofile) {
|
|
312
|
+
const rangeLength = endRange.end - endRange.start;
|
|
287
313
|
diff = rangeLength < 0.5 ? rangeLength / 2 : 0.25;
|
|
288
|
-
|
|
314
|
+
endRange.start = diff;
|
|
289
315
|
} else {
|
|
290
316
|
diff = 0.25;
|
|
291
|
-
|
|
317
|
+
startRange.end = startRange.start - diff;
|
|
292
318
|
}
|
|
293
319
|
}
|
|
294
320
|
const interpolatedLength = diff / count;
|
|
295
|
-
const start =
|
|
296
|
-
for (let
|
|
321
|
+
const start = newAudiofile ? 0 : startRange.end;
|
|
322
|
+
for (let i2 = 0; i2 < count; i2++) {
|
|
323
|
+
let id = startRange.id + i2 + 1;
|
|
324
|
+
let chapterId = startRange.chapterId;
|
|
325
|
+
if (newChapter && i2 > chapterSentenceCounts[startRange.chapterId] - startRange.id) {
|
|
326
|
+
id = i2;
|
|
327
|
+
chapterId = endRange.chapterId;
|
|
328
|
+
}
|
|
297
329
|
interpolated.push({
|
|
298
|
-
id
|
|
299
|
-
|
|
300
|
-
|
|
330
|
+
id,
|
|
331
|
+
chapterId,
|
|
332
|
+
start: start + interpolatedLength * i2,
|
|
333
|
+
end: start + interpolatedLength * (i2 + 1),
|
|
301
334
|
audiofile
|
|
302
335
|
});
|
|
303
336
|
}
|
|
304
|
-
interpolated.push(
|
|
337
|
+
interpolated.push(endRange);
|
|
305
338
|
}
|
|
306
339
|
return interpolated;
|
|
307
340
|
}
|
|
@@ -319,6 +352,17 @@ function expandEmptySentenceRanges(sentenceRanges) {
|
|
|
319
352
|
}
|
|
320
353
|
return expandedRanges;
|
|
321
354
|
}
|
|
355
|
+
async function collapseSentenceRangeGaps(sentenceRanges) {
|
|
356
|
+
const collapsed = [];
|
|
357
|
+
for (const [i, sentenceRange] of enumerate(sentenceRanges)) {
|
|
358
|
+
const nextSentence = sentenceRanges[i + 1];
|
|
359
|
+
const prevSentence = sentenceRanges[i - 1];
|
|
360
|
+
const start = prevSentence?.audiofile !== sentenceRange.audiofile ? 0 : sentenceRange.start;
|
|
361
|
+
const end = nextSentence?.audiofile !== sentenceRange.audiofile ? await getTrackDuration(sentenceRange.audiofile) : nextSentence.start;
|
|
362
|
+
collapsed.push({ ...sentenceRange, start, end });
|
|
363
|
+
}
|
|
364
|
+
return collapsed;
|
|
365
|
+
}
|
|
322
366
|
function getChapterDuration(sentenceRanges) {
|
|
323
367
|
let i = 0;
|
|
324
368
|
let duration = 0;
|
|
@@ -339,9 +383,11 @@ function getChapterDuration(sentenceRanges) {
|
|
|
339
383
|
return duration;
|
|
340
384
|
}
|
|
341
385
|
export {
|
|
386
|
+
collapseSentenceRangeGaps,
|
|
342
387
|
expandEmptySentenceRanges,
|
|
343
388
|
findEndTimestamp,
|
|
344
389
|
getChapterDuration,
|
|
345
390
|
getSentenceRanges,
|
|
346
|
-
interpolateSentenceRanges
|
|
391
|
+
interpolateSentenceRanges,
|
|
392
|
+
mapTranscriptionTimeline
|
|
347
393
|
};
|
package/dist/index.d.cts
CHANGED
package/dist/index.d.ts
CHANGED
package/dist/markup/markup.cjs
CHANGED
|
@@ -104,6 +104,7 @@ async function markup(input, output, options) {
|
|
|
104
104
|
const { markedUp, timing: chapterTiming } = markupChapter(
|
|
105
105
|
chapterId,
|
|
106
106
|
chapterXml,
|
|
107
|
+
options.granularity ?? "sentence",
|
|
107
108
|
segmentation,
|
|
108
109
|
mapping
|
|
109
110
|
);
|
|
@@ -118,7 +119,7 @@ async function markup(input, output, options) {
|
|
|
118
119
|
__callDispose(_stack, _error, _hasError);
|
|
119
120
|
}
|
|
120
121
|
}
|
|
121
|
-
function markupChapter(chapterId, chapterXml, segmentation, mapping) {
|
|
122
|
+
function markupChapter(chapterId, chapterXml, granularity, segmentation, mapping) {
|
|
122
123
|
const timing = (0, import_ghost_story.createTiming)();
|
|
123
124
|
const html = import_epub.Epub.findXmlChildByName("html", chapterXml);
|
|
124
125
|
if (!html) throw new Error("Invalid XHTML document: no html element");
|
|
@@ -132,6 +133,22 @@ function markupChapter(chapterId, chapterXml, segmentation, mapping) {
|
|
|
132
133
|
let pos = 0;
|
|
133
134
|
let i = 0;
|
|
134
135
|
for (const sentence of segmentation) {
|
|
136
|
+
if (granularity === "word") {
|
|
137
|
+
let j = 0;
|
|
138
|
+
let wordPos = pos;
|
|
139
|
+
for (const word of sentence.words.entries) {
|
|
140
|
+
if (word.text.match(/\S/)) {
|
|
141
|
+
root = (0, import_transform.addMark)(
|
|
142
|
+
root,
|
|
143
|
+
mapping.invert().map(wordPos),
|
|
144
|
+
mapping.invert().map(wordPos + word.text.replace(/\n$/, "").length, -1),
|
|
145
|
+
new import_model.Mark("span", { id: `${chapterId}-s${i}-w${j}` })
|
|
146
|
+
);
|
|
147
|
+
j++;
|
|
148
|
+
}
|
|
149
|
+
wordPos += word.text.replace(/\n$/, "").length;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
135
152
|
if (sentence.text.match(/\S/)) {
|
|
136
153
|
root = (0, import_transform.addMark)(
|
|
137
154
|
root,
|
package/dist/markup/markup.d.cts
CHANGED
|
@@ -12,7 +12,7 @@ interface MarkupOptions {
|
|
|
12
12
|
logger?: Logger;
|
|
13
13
|
}
|
|
14
14
|
declare function markup(input: string, output: string, options: MarkupOptions): Promise<TimingAggregator>;
|
|
15
|
-
declare function markupChapter(chapterId: string, chapterXml: ParsedXml, segmentation: Sentence[], mapping: Mapping): {
|
|
15
|
+
declare function markupChapter(chapterId: string, chapterXml: ParsedXml, granularity: "word" | "sentence", segmentation: Sentence[], mapping: Mapping): {
|
|
16
16
|
markedUp: ParsedXml;
|
|
17
17
|
timing: _storyteller_platform_ghost_story.Timing;
|
|
18
18
|
};
|
package/dist/markup/markup.d.ts
CHANGED
|
@@ -12,7 +12,7 @@ interface MarkupOptions {
|
|
|
12
12
|
logger?: Logger;
|
|
13
13
|
}
|
|
14
14
|
declare function markup(input: string, output: string, options: MarkupOptions): Promise<TimingAggregator>;
|
|
15
|
-
declare function markupChapter(chapterId: string, chapterXml: ParsedXml, segmentation: Sentence[], mapping: Mapping): {
|
|
15
|
+
declare function markupChapter(chapterId: string, chapterXml: ParsedXml, granularity: "word" | "sentence", segmentation: Sentence[], mapping: Mapping): {
|
|
16
16
|
markedUp: ParsedXml;
|
|
17
17
|
timing: _storyteller_platform_ghost_story.Timing;
|
|
18
18
|
};
|
package/dist/markup/markup.js
CHANGED
|
@@ -42,6 +42,7 @@ async function markup(input, output, options) {
|
|
|
42
42
|
const { markedUp, timing: chapterTiming } = markupChapter(
|
|
43
43
|
chapterId,
|
|
44
44
|
chapterXml,
|
|
45
|
+
options.granularity ?? "sentence",
|
|
45
46
|
segmentation,
|
|
46
47
|
mapping
|
|
47
48
|
);
|
|
@@ -56,7 +57,7 @@ async function markup(input, output, options) {
|
|
|
56
57
|
__callDispose(_stack, _error, _hasError);
|
|
57
58
|
}
|
|
58
59
|
}
|
|
59
|
-
function markupChapter(chapterId, chapterXml, segmentation, mapping) {
|
|
60
|
+
function markupChapter(chapterId, chapterXml, granularity, segmentation, mapping) {
|
|
60
61
|
const timing = createTiming();
|
|
61
62
|
const html = Epub.findXmlChildByName("html", chapterXml);
|
|
62
63
|
if (!html) throw new Error("Invalid XHTML document: no html element");
|
|
@@ -70,6 +71,22 @@ function markupChapter(chapterId, chapterXml, segmentation, mapping) {
|
|
|
70
71
|
let pos = 0;
|
|
71
72
|
let i = 0;
|
|
72
73
|
for (const sentence of segmentation) {
|
|
74
|
+
if (granularity === "word") {
|
|
75
|
+
let j = 0;
|
|
76
|
+
let wordPos = pos;
|
|
77
|
+
for (const word of sentence.words.entries) {
|
|
78
|
+
if (word.text.match(/\S/)) {
|
|
79
|
+
root = addMark(
|
|
80
|
+
root,
|
|
81
|
+
mapping.invert().map(wordPos),
|
|
82
|
+
mapping.invert().map(wordPos + word.text.replace(/\n$/, "").length, -1),
|
|
83
|
+
new Mark("span", { id: `${chapterId}-s${i}-w${j}` })
|
|
84
|
+
);
|
|
85
|
+
j++;
|
|
86
|
+
}
|
|
87
|
+
wordPos += word.text.replace(/\n$/, "").length;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
73
90
|
if (sentence.text.match(/\S/)) {
|
|
74
91
|
root = addMark(
|
|
75
92
|
root,
|
|
@@ -18,70 +18,97 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
18
18
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
19
|
var serializeDom_exports = {};
|
|
20
20
|
__export(serializeDom_exports, {
|
|
21
|
-
serializeDom: () => serializeDom
|
|
22
|
-
serializeDomNode: () => serializeDomNode
|
|
21
|
+
serializeDom: () => serializeDom
|
|
23
22
|
});
|
|
24
23
|
module.exports = __toCommonJS(serializeDom_exports);
|
|
25
24
|
var import_epub = require("@storyteller-platform/epub");
|
|
26
25
|
var import_model = require("./model.cjs");
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
function serializeDomNode(node) {
|
|
31
|
-
if (node instanceof import_model.TextNode) {
|
|
32
|
-
return import_epub.Epub.createXmlTextNode(node.text);
|
|
26
|
+
class Serializer {
|
|
27
|
+
constructor(doc) {
|
|
28
|
+
this.doc = doc;
|
|
33
29
|
}
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
const partitioned = nodes.reduce((acc, child) => {
|
|
42
|
-
const lastPartition = acc.at(-1);
|
|
43
|
-
if (!lastPartition) {
|
|
44
|
-
return [[child]];
|
|
45
|
-
}
|
|
46
|
-
const lastChild = lastPartition.at(-1);
|
|
47
|
-
if (!lastChild) {
|
|
48
|
-
return [...acc.slice(0, acc.length), [child]];
|
|
30
|
+
serializedIds = /* @__PURE__ */ new Set();
|
|
31
|
+
serialize() {
|
|
32
|
+
return this.doc.children.map((child) => this.serializeDomNode(child));
|
|
33
|
+
}
|
|
34
|
+
serializeDomNode(node) {
|
|
35
|
+
if (node instanceof import_model.TextNode) {
|
|
36
|
+
return import_epub.Epub.createXmlTextNode(node.text);
|
|
49
37
|
}
|
|
50
|
-
|
|
51
|
-
const
|
|
52
|
-
if (
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
38
|
+
let attrs = node.attrs;
|
|
39
|
+
const id = node.attrs["id"];
|
|
40
|
+
if (id) {
|
|
41
|
+
if (this.serializedIds.has(id)) {
|
|
42
|
+
const { id: _id, ...remaining } = node.attrs;
|
|
43
|
+
attrs = remaining;
|
|
44
|
+
} else {
|
|
45
|
+
this.serializedIds.add(id);
|
|
46
|
+
}
|
|
57
47
|
}
|
|
58
|
-
return
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
48
|
+
return import_epub.Epub.createXmlElement(
|
|
49
|
+
node.tagName,
|
|
50
|
+
attrs,
|
|
51
|
+
this.serializeDomNodes(node.children)
|
|
52
|
+
);
|
|
63
53
|
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
54
|
+
serializeDomNodes(nodes) {
|
|
55
|
+
const partitioned = nodes.reduce((acc, child) => {
|
|
56
|
+
const lastPartition = acc.at(-1);
|
|
57
|
+
if (!lastPartition) {
|
|
58
|
+
return [[child]];
|
|
59
|
+
}
|
|
60
|
+
const lastChild = lastPartition.at(-1);
|
|
61
|
+
if (!lastChild) {
|
|
62
|
+
return [...acc.slice(0, acc.length), [child]];
|
|
63
|
+
}
|
|
64
|
+
const childFirstMark = child.marks[0];
|
|
65
|
+
const lastChildFirstMark = lastChild.marks[0];
|
|
66
|
+
if (childFirstMark === lastChildFirstMark || childFirstMark?.eq(lastChildFirstMark)) {
|
|
67
|
+
return [
|
|
68
|
+
...acc.slice(0, acc.length - 1),
|
|
69
|
+
[...lastPartition.slice(0, lastPartition.length), child]
|
|
70
|
+
];
|
|
71
|
+
}
|
|
72
|
+
return [...acc, [child]];
|
|
73
|
+
}, []);
|
|
74
|
+
const xmlChildren = [];
|
|
75
|
+
for (const partition of partitioned) {
|
|
76
|
+
xmlChildren.push(...this.serializePartition(partition));
|
|
77
|
+
}
|
|
78
|
+
return xmlChildren;
|
|
72
79
|
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
80
|
+
serializePartition(nodes) {
|
|
81
|
+
const firstChild = nodes[0];
|
|
82
|
+
if (!firstChild) return [];
|
|
83
|
+
const firstMark = firstChild.marks[0];
|
|
84
|
+
if (!firstMark) {
|
|
85
|
+
return nodes.map((child) => this.serializeDomNode(child));
|
|
86
|
+
}
|
|
87
|
+
let attrs = firstMark.attrs;
|
|
88
|
+
const id = firstMark.attrs["id"];
|
|
89
|
+
if (id) {
|
|
90
|
+
if (this.serializedIds.has(id)) {
|
|
91
|
+
const { id: _id, ...remaining } = firstMark.attrs;
|
|
92
|
+
attrs = remaining;
|
|
93
|
+
} else {
|
|
94
|
+
this.serializedIds.add(id);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return [
|
|
98
|
+
import_epub.Epub.createXmlElement(
|
|
99
|
+
firstMark.tagName,
|
|
100
|
+
attrs,
|
|
101
|
+
this.serializeDomNodes(
|
|
102
|
+
nodes.map((node) => node.copy({ marks: node.marks.slice(1) }))
|
|
103
|
+
)
|
|
79
104
|
)
|
|
80
|
-
|
|
81
|
-
|
|
105
|
+
];
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
function serializeDom(doc) {
|
|
109
|
+
return new Serializer(doc).serialize();
|
|
82
110
|
}
|
|
83
111
|
// Annotate the CommonJS export names for ESM import in node:
|
|
84
112
|
0 && (module.exports = {
|
|
85
|
-
serializeDom
|
|
86
|
-
serializeDomNode
|
|
113
|
+
serializeDom
|
|
87
114
|
});
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import { ParsedXml
|
|
2
|
-
import { Root
|
|
1
|
+
import { ParsedXml } from '@storyteller-platform/epub';
|
|
2
|
+
import { Root } from './model.cjs';
|
|
3
3
|
|
|
4
4
|
declare function serializeDom(doc: Root): ParsedXml;
|
|
5
|
-
declare function serializeDomNode(node: Node | TextNode): XmlNode;
|
|
6
5
|
|
|
7
|
-
export { serializeDom
|
|
6
|
+
export { serializeDom };
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import { ParsedXml
|
|
2
|
-
import { Root
|
|
1
|
+
import { ParsedXml } from '@storyteller-platform/epub';
|
|
2
|
+
import { Root } from './model.js';
|
|
3
3
|
|
|
4
4
|
declare function serializeDom(doc: Root): ParsedXml;
|
|
5
|
-
declare function serializeDomNode(node: Node | TextNode): XmlNode;
|
|
6
5
|
|
|
7
|
-
export { serializeDom
|
|
6
|
+
export { serializeDom };
|