@storyteller-platform/align 0.1.25 → 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/align/align.cjs +21 -9
  2. package/dist/align/align.js +22 -11
  3. package/dist/align/getSentenceRanges.cjs +21 -60
  4. package/dist/align/getSentenceRanges.d.cts +1 -2
  5. package/dist/align/getSentenceRanges.d.ts +1 -2
  6. package/dist/align/getSentenceRanges.js +21 -59
  7. package/dist/align/interpolateSentenceRanges.cjs +124 -0
  8. package/dist/align/interpolateSentenceRanges.d.cts +23 -0
  9. package/dist/align/interpolateSentenceRanges.d.ts +23 -0
  10. package/dist/align/interpolateSentenceRanges.js +101 -0
  11. package/dist/align/search.cjs +18 -7
  12. package/dist/align/search.js +18 -7
  13. package/dist/index.d.cts +1 -2
  14. package/dist/index.d.ts +1 -2
  15. package/dist/markup/markup.cjs +21 -14
  16. package/dist/markup/markup.d.cts +2 -4
  17. package/dist/markup/markup.d.ts +2 -4
  18. package/dist/markup/markup.js +28 -16
  19. package/dist/markup/model.cjs +138 -5
  20. package/dist/markup/model.d.cts +2 -57
  21. package/dist/markup/model.d.ts +2 -57
  22. package/dist/markup/model.js +136 -5
  23. package/dist/markup/parseDom.cjs +80 -25
  24. package/dist/markup/parseDom.d.cts +4 -4
  25. package/dist/markup/parseDom.d.ts +4 -4
  26. package/dist/markup/parseDom.js +87 -24
  27. package/dist/markup/resolvedPos.cjs +85 -0
  28. package/dist/markup/resolvedPos.d.cts +2 -0
  29. package/dist/markup/resolvedPos.d.ts +2 -0
  30. package/dist/markup/resolvedPos.js +62 -0
  31. package/dist/markup/segmentation.cjs +4 -8
  32. package/dist/markup/segmentation.d.cts +3 -8
  33. package/dist/markup/segmentation.d.ts +3 -8
  34. package/dist/markup/segmentation.js +3 -7
  35. package/dist/markup/serializeDom.d.cts +1 -1
  36. package/dist/markup/serializeDom.d.ts +1 -1
  37. package/dist/markup/transform.cjs +59 -2
  38. package/dist/markup/transform.d.cts +8 -2
  39. package/dist/markup/transform.d.ts +8 -2
  40. package/dist/markup/transform.js +58 -1
  41. package/dist/model-Bv3yPEdd.d.cts +96 -0
  42. package/dist/model-Bv3yPEdd.d.ts +96 -0
  43. package/dist/snapshot/snapshot.cjs +8 -6
  44. package/dist/snapshot/snapshot.js +9 -7
  45. package/package.json +2 -1
@@ -87,8 +87,11 @@ var import_audiobook = require("@storyteller-platform/audiobook");
87
87
  var import_epub = require("@storyteller-platform/epub");
88
88
  var import_ghost_story = require("@storyteller-platform/ghost-story");
89
89
  var import_ffmpeg = require("../common/ffmpeg.cjs");
90
+ var import_parseDom = require("../markup/parseDom.cjs");
90
91
  var import_segmentation = require("../markup/segmentation.cjs");
92
+ var import_transform = require("../markup/transform.cjs");
91
93
  var import_getSentenceRanges = require("./getSentenceRanges.cjs");
94
+ var import_interpolateSentenceRanges = require("./interpolateSentenceRanges.cjs");
92
95
  var import_search = require("./search.cjs");
93
96
  var import_slugify = require("./slugify.cjs");
94
97
  var import_textFragments = require("./textFragments.cjs");
@@ -172,12 +175,12 @@ class Aligner {
172
175
  };
173
176
  async getChapterSentences(chapterId) {
174
177
  const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
175
- const { result: segmentation } = await (0, import_segmentation.getXhtmlSegmentation)(
176
- import_epub.Epub.getXhtmlBody(chapterXml),
177
- {
178
- primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
179
- }
180
- );
178
+ const original = (0, import_parseDom.parseDom)(import_epub.Epub.getXhtmlBody(chapterXml));
179
+ const inlined = (0, import_transform.inlineFootnotes)(original);
180
+ const lifted = (0, import_transform.liftText)(inlined.root);
181
+ const segmentation = await (0, import_segmentation.segmentChapter)(lifted.result, {
182
+ primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
183
+ });
181
184
  return segmentation.filter((s) => s.text.match(/\S/));
182
185
  }
183
186
  async writeAlignedChapter(alignedChapter) {
@@ -505,16 +508,25 @@ class Aligner {
505
508
  });
506
509
  const sentenceRanges = [];
507
510
  const chapterSentenceCounts = {};
511
+ const audioFileDurations = {};
508
512
  for (const alignedChapter of audioOrderedChapters) {
509
513
  sentenceRanges.push(...alignedChapter.sentenceRanges);
514
+ for (const sentenceRange of sentenceRanges) {
515
+ if (!(sentenceRange.audiofile in audioFileDurations)) {
516
+ audioFileDurations[sentenceRange.audiofile] = await (0, import_ffmpeg.getTrackDuration)(
517
+ sentenceRange.audiofile
518
+ );
519
+ }
520
+ }
510
521
  const sentences = await this.getChapterSentences(
511
522
  alignedChapter.chapter.id
512
523
  );
513
524
  chapterSentenceCounts[alignedChapter.chapter.id] = sentences.length;
514
525
  }
515
- const interpolated = await (0, import_getSentenceRanges.interpolateSentenceRanges)(
526
+ const interpolated = (0, import_interpolateSentenceRanges.interpolateSentenceRanges)(
516
527
  sentenceRanges,
517
- chapterSentenceCounts
528
+ chapterSentenceCounts,
529
+ audioFileDurations
518
530
  );
519
531
  const expanded = (0, import_getSentenceRanges.expandEmptySentenceRanges)(interpolated);
520
532
  const collapsed = await (0, import_getSentenceRanges.collapseSentenceRangeGaps)(expanded);
@@ -525,7 +537,7 @@ class Aligner {
525
537
  );
526
538
  const finalSentenceRanges = collapsed.slice(
527
539
  collapsedStart,
528
- collapsedStart + sentences.length - 1
540
+ collapsedStart + sentences.length
529
541
  );
530
542
  alignedChapter.sentenceRanges = finalSentenceRanges;
531
543
  for (const [i, wordRanges] of (0, import_itertools.enumerate)(alignedChapter.wordRanges)) {
@@ -16,15 +16,17 @@ import {
16
16
  createTiming
17
17
  } from "@storyteller-platform/ghost-story";
18
18
  import { getTrackDuration } from "../common/ffmpeg.js";
19
- import { getXhtmlSegmentation } from "../markup/segmentation.js";
19
+ import { parseDom } from "../markup/parseDom.js";
20
+ import { segmentChapter } from "../markup/segmentation.js";
21
+ import { inlineFootnotes, liftText } from "../markup/transform.js";
20
22
  import {
21
23
  collapseSentenceRangeGaps,
22
24
  expandEmptySentenceRanges,
23
25
  getChapterDuration,
24
26
  getSentenceRanges,
25
- interpolateSentenceRanges,
26
27
  mapTranscriptionTimeline
27
28
  } from "./getSentenceRanges.js";
29
+ import { interpolateSentenceRanges } from "./interpolateSentenceRanges.js";
28
30
  import { findBoundaries } from "./search.js";
29
31
  import { slugify } from "./slugify.js";
30
32
  import { TextFragmentTrie } from "./textFragments.js";
@@ -108,12 +110,12 @@ class Aligner {
108
110
  };
109
111
  async getChapterSentences(chapterId) {
110
112
  const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
111
- const { result: segmentation } = await getXhtmlSegmentation(
112
- Epub.getXhtmlBody(chapterXml),
113
- {
114
- primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
115
- }
116
- );
113
+ const original = parseDom(Epub.getXhtmlBody(chapterXml));
114
+ const inlined = inlineFootnotes(original);
115
+ const lifted = liftText(inlined.root);
116
+ const segmentation = await segmentChapter(lifted.result, {
117
+ primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
118
+ });
117
119
  return segmentation.filter((s) => s.text.match(/\S/));
118
120
  }
119
121
  async writeAlignedChapter(alignedChapter) {
@@ -441,16 +443,25 @@ class Aligner {
441
443
  });
442
444
  const sentenceRanges = [];
443
445
  const chapterSentenceCounts = {};
446
+ const audioFileDurations = {};
444
447
  for (const alignedChapter of audioOrderedChapters) {
445
448
  sentenceRanges.push(...alignedChapter.sentenceRanges);
449
+ for (const sentenceRange of sentenceRanges) {
450
+ if (!(sentenceRange.audiofile in audioFileDurations)) {
451
+ audioFileDurations[sentenceRange.audiofile] = await getTrackDuration(
452
+ sentenceRange.audiofile
453
+ );
454
+ }
455
+ }
446
456
  const sentences = await this.getChapterSentences(
447
457
  alignedChapter.chapter.id
448
458
  );
449
459
  chapterSentenceCounts[alignedChapter.chapter.id] = sentences.length;
450
460
  }
451
- const interpolated = await interpolateSentenceRanges(
461
+ const interpolated = interpolateSentenceRanges(
452
462
  sentenceRanges,
453
- chapterSentenceCounts
463
+ chapterSentenceCounts,
464
+ audioFileDurations
454
465
  );
455
466
  const expanded = expandEmptySentenceRanges(interpolated);
456
467
  const collapsed = await collapseSentenceRangeGaps(expanded);
@@ -461,7 +472,7 @@ class Aligner {
461
472
  );
462
473
  const finalSentenceRanges = collapsed.slice(
463
474
  collapsedStart,
464
- collapsedStart + sentences.length - 1
475
+ collapsedStart + sentences.length
465
476
  );
466
477
  alignedChapter.sentenceRanges = finalSentenceRanges;
467
478
  for (const [i, wordRanges] of enumerate(alignedChapter.wordRanges)) {
@@ -23,10 +23,10 @@ __export(getSentenceRanges_exports, {
23
23
  findEndTimestamp: () => findEndTimestamp,
24
24
  getChapterDuration: () => getChapterDuration,
25
25
  getSentenceRanges: () => getSentenceRanges,
26
- interpolateSentenceRanges: () => interpolateSentenceRanges,
27
26
  mapTranscriptionTimeline: () => mapTranscriptionTimeline
28
27
  });
29
28
  module.exports = __toCommonJS(getSentenceRanges_exports);
29
+ var import_fastest_levenshtein = require("fastest-levenshtein");
30
30
  var import_itertools = require("itertools");
31
31
  var import_runes2 = require("runes2");
32
32
  var import_ffmpeg = require("../common/ffmpeg.cjs");
@@ -67,6 +67,9 @@ function getAlignmentsForSentence(sentence, alignments) {
67
67
  if (alignment.opType === "DELETE" || alignment.opType === "INSERT" && sentenceIndex > 0) {
68
68
  score -= (alignment.ref ?? alignment.hyp).length + 1;
69
69
  }
70
+ if (alignment.opType === "SUBSTITUTE") {
71
+ score -= (0, import_fastest_levenshtein.distance)(alignment.ref, alignment.hyp);
72
+ }
70
73
  result.push(alignment);
71
74
  }
72
75
  return {
@@ -211,6 +214,8 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
211
214
  }
212
215
  let alignmentIndex = 0;
213
216
  let currentTranscriptWindowIndex = 0;
217
+ let lastGoodSentenceIndex = slice[0] - 1;
218
+ let lastGoodTranscriptWindowIndex = -1;
214
219
  for (const [j, slugifiedSentence] of (0, import_itertools.enumerate)(
215
220
  slugifiedChapterSentenceWindowList
216
221
  )) {
@@ -221,6 +226,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
221
226
  );
222
227
  const sentenceLengthInSlugifiedTranscript = sentenceAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
223
228
  if (score > 0) {
229
+ lastGoodSentenceIndex = j + slice[0];
224
230
  const start = findStartTimestamp(
225
231
  chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
226
232
  mappedTimeline
@@ -296,9 +302,21 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
296
302
  if (slugifiedChapterTranscriptWindow[currentTranscriptWindowIndex] === "-") {
297
303
  currentTranscriptWindowIndex++;
298
304
  }
305
+ if (score > 0) {
306
+ lastGoodTranscriptWindowIndex = currentTranscriptWindowIndex;
307
+ }
308
+ }
309
+ if (lastGoodSentenceIndex === -1) {
310
+ return {
311
+ sentenceRanges,
312
+ wordRanges,
313
+ transcriptionOffset: chapterTranscriptEndIndex,
314
+ firstFoundSentence,
315
+ lastFoundSentence: chapterSentenceIndex - 1
316
+ };
299
317
  }
300
- chapterSentenceIndex = i;
301
- slugifiedChapterTranscriptWindowStartIndex += currentTranscriptWindowIndex;
318
+ chapterSentenceIndex += lastGoodSentenceIndex + 1;
319
+ slugifiedChapterTranscriptWindowStartIndex += lastGoodTranscriptWindowIndex;
302
320
  if (slugifiedChapterTranscript[slugifiedChapterTranscriptWindowStartIndex] === "-") {
303
321
  slugifiedChapterTranscriptWindowStartIndex++;
304
322
  }
@@ -311,62 +329,6 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
311
329
  lastFoundSentence
312
330
  };
313
331
  }
314
- async function getLargestGap(trailing, leading) {
315
- const leadingGap = leading.start;
316
- const trailingGap = await (0, import_ffmpeg.getTrackDuration)(trailing.audiofile) - trailing.end;
317
- if (trailingGap > leadingGap) return [trailingGap, trailing.audiofile];
318
- return [leadingGap, leading.audiofile];
319
- }
320
- async function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts) {
321
- const interpolated = [];
322
- for (let i = 0; i < sentenceRanges.length; i++) {
323
- const endRange = sentenceRanges[i];
324
- const startRange = sentenceRanges[i - 1] ?? {
325
- id: 0,
326
- audiofile: endRange.audiofile,
327
- chapterId: endRange.chapterId,
328
- start: 0,
329
- end: 0
330
- };
331
- const newChapter = startRange.chapterId !== endRange.chapterId;
332
- const newAudiofile = startRange.audiofile !== endRange.audiofile;
333
- const count = newChapter ? chapterSentenceCounts[startRange.chapterId] - startRange.id - 1 : endRange.id - startRange.id - 1;
334
- if (count === 0) {
335
- interpolated.push(endRange);
336
- continue;
337
- }
338
- let [diff, audiofile] = newAudiofile ? await getLargestGap(startRange, endRange) : [endRange.start - startRange.end, endRange.audiofile];
339
- if (diff <= 0) {
340
- if (newAudiofile) {
341
- const rangeLength = endRange.end - endRange.start;
342
- diff = rangeLength < 0.5 ? rangeLength / 2 : 0.25;
343
- endRange.start = diff;
344
- } else {
345
- diff = 0.25;
346
- startRange.end = startRange.start - diff;
347
- }
348
- }
349
- const interpolatedLength = diff / count;
350
- const start = newAudiofile ? 0 : startRange.end;
351
- for (let i2 = 0; i2 < count; i2++) {
352
- let id = startRange.id + i2 + 1;
353
- let chapterId = startRange.chapterId;
354
- if (newChapter && i2 > chapterSentenceCounts[startRange.chapterId] - startRange.id) {
355
- id = i2;
356
- chapterId = endRange.chapterId;
357
- }
358
- interpolated.push({
359
- id,
360
- chapterId,
361
- start: start + interpolatedLength * i2,
362
- end: start + interpolatedLength * (i2 + 1),
363
- audiofile
364
- });
365
- }
366
- interpolated.push(endRange);
367
- }
368
- return interpolated;
369
- }
370
332
  function expandEmptySentenceRanges(sentenceRanges) {
371
333
  const expandedRanges = [];
372
334
  for (const sentenceRange of sentenceRanges) {
@@ -418,6 +380,5 @@ function getChapterDuration(sentenceRanges) {
418
380
  findEndTimestamp,
419
381
  getChapterDuration,
420
382
  getSentenceRanges,
421
- interpolateSentenceRanges,
422
383
  mapTranscriptionTimeline
423
384
  });
@@ -54,7 +54,6 @@ declare function getSentenceRanges(transcriptionText: string, mappedTimeline: Ma
54
54
  firstFoundSentence: number;
55
55
  lastFoundSentence: number;
56
56
  }>;
57
- declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>): Promise<SentenceRange[]>;
58
57
  /**
59
58
  * Whisper sometimes provides words with no time information,
60
59
  * or start and end timestamps that are equal. EpubCheck complains
@@ -65,4 +64,4 @@ declare function expandEmptySentenceRanges<Range extends SentenceRange | WordRan
65
64
  declare function collapseSentenceRangeGaps(sentenceRanges: SentenceRange[]): Promise<SentenceRange[]>;
66
65
  declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
67
66
 
68
- export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges, mapTranscriptionTimeline };
67
+ export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, mapTranscriptionTimeline };
@@ -54,7 +54,6 @@ declare function getSentenceRanges(transcriptionText: string, mappedTimeline: Ma
54
54
  firstFoundSentence: number;
55
55
  lastFoundSentence: number;
56
56
  }>;
57
- declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>): Promise<SentenceRange[]>;
58
57
  /**
59
58
  * Whisper sometimes provides words with no time information,
60
59
  * or start and end timestamps that are equal. EpubCheck complains
@@ -65,4 +64,4 @@ declare function expandEmptySentenceRanges<Range extends SentenceRange | WordRan
65
64
  declare function collapseSentenceRangeGaps(sentenceRanges: SentenceRange[]): Promise<SentenceRange[]>;
66
65
  declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
67
66
 
68
- export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges, mapTranscriptionTimeline };
67
+ export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, mapTranscriptionTimeline };
@@ -1,4 +1,5 @@
1
1
  import "../chunk-BIEQXUOY.js";
2
+ import { distance } from "fastest-levenshtein";
2
3
  import { enumerate } from "itertools";
3
4
  import { runes } from "runes2";
4
5
  import { getTrackDuration } from "../common/ffmpeg.js";
@@ -39,6 +40,9 @@ function getAlignmentsForSentence(sentence, alignments) {
39
40
  if (alignment.opType === "DELETE" || alignment.opType === "INSERT" && sentenceIndex > 0) {
40
41
  score -= (alignment.ref ?? alignment.hyp).length + 1;
41
42
  }
43
+ if (alignment.opType === "SUBSTITUTE") {
44
+ score -= distance(alignment.ref, alignment.hyp);
45
+ }
42
46
  result.push(alignment);
43
47
  }
44
48
  return {
@@ -183,6 +187,8 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
183
187
  }
184
188
  let alignmentIndex = 0;
185
189
  let currentTranscriptWindowIndex = 0;
190
+ let lastGoodSentenceIndex = slice[0] - 1;
191
+ let lastGoodTranscriptWindowIndex = -1;
186
192
  for (const [j, slugifiedSentence] of enumerate(
187
193
  slugifiedChapterSentenceWindowList
188
194
  )) {
@@ -193,6 +199,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
193
199
  );
194
200
  const sentenceLengthInSlugifiedTranscript = sentenceAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
195
201
  if (score > 0) {
202
+ lastGoodSentenceIndex = j + slice[0];
196
203
  const start = findStartTimestamp(
197
204
  chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
198
205
  mappedTimeline
@@ -268,9 +275,21 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
268
275
  if (slugifiedChapterTranscriptWindow[currentTranscriptWindowIndex] === "-") {
269
276
  currentTranscriptWindowIndex++;
270
277
  }
278
+ if (score > 0) {
279
+ lastGoodTranscriptWindowIndex = currentTranscriptWindowIndex;
280
+ }
281
+ }
282
+ if (lastGoodSentenceIndex === -1) {
283
+ return {
284
+ sentenceRanges,
285
+ wordRanges,
286
+ transcriptionOffset: chapterTranscriptEndIndex,
287
+ firstFoundSentence,
288
+ lastFoundSentence: chapterSentenceIndex - 1
289
+ };
271
290
  }
272
- chapterSentenceIndex = i;
273
- slugifiedChapterTranscriptWindowStartIndex += currentTranscriptWindowIndex;
291
+ chapterSentenceIndex += lastGoodSentenceIndex + 1;
292
+ slugifiedChapterTranscriptWindowStartIndex += lastGoodTranscriptWindowIndex;
274
293
  if (slugifiedChapterTranscript[slugifiedChapterTranscriptWindowStartIndex] === "-") {
275
294
  slugifiedChapterTranscriptWindowStartIndex++;
276
295
  }
@@ -283,62 +302,6 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
283
302
  lastFoundSentence
284
303
  };
285
304
  }
286
- async function getLargestGap(trailing, leading) {
287
- const leadingGap = leading.start;
288
- const trailingGap = await getTrackDuration(trailing.audiofile) - trailing.end;
289
- if (trailingGap > leadingGap) return [trailingGap, trailing.audiofile];
290
- return [leadingGap, leading.audiofile];
291
- }
292
- async function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts) {
293
- const interpolated = [];
294
- for (let i = 0; i < sentenceRanges.length; i++) {
295
- const endRange = sentenceRanges[i];
296
- const startRange = sentenceRanges[i - 1] ?? {
297
- id: 0,
298
- audiofile: endRange.audiofile,
299
- chapterId: endRange.chapterId,
300
- start: 0,
301
- end: 0
302
- };
303
- const newChapter = startRange.chapterId !== endRange.chapterId;
304
- const newAudiofile = startRange.audiofile !== endRange.audiofile;
305
- const count = newChapter ? chapterSentenceCounts[startRange.chapterId] - startRange.id - 1 : endRange.id - startRange.id - 1;
306
- if (count === 0) {
307
- interpolated.push(endRange);
308
- continue;
309
- }
310
- let [diff, audiofile] = newAudiofile ? await getLargestGap(startRange, endRange) : [endRange.start - startRange.end, endRange.audiofile];
311
- if (diff <= 0) {
312
- if (newAudiofile) {
313
- const rangeLength = endRange.end - endRange.start;
314
- diff = rangeLength < 0.5 ? rangeLength / 2 : 0.25;
315
- endRange.start = diff;
316
- } else {
317
- diff = 0.25;
318
- startRange.end = startRange.start - diff;
319
- }
320
- }
321
- const interpolatedLength = diff / count;
322
- const start = newAudiofile ? 0 : startRange.end;
323
- for (let i2 = 0; i2 < count; i2++) {
324
- let id = startRange.id + i2 + 1;
325
- let chapterId = startRange.chapterId;
326
- if (newChapter && i2 > chapterSentenceCounts[startRange.chapterId] - startRange.id) {
327
- id = i2;
328
- chapterId = endRange.chapterId;
329
- }
330
- interpolated.push({
331
- id,
332
- chapterId,
333
- start: start + interpolatedLength * i2,
334
- end: start + interpolatedLength * (i2 + 1),
335
- audiofile
336
- });
337
- }
338
- interpolated.push(endRange);
339
- }
340
- return interpolated;
341
- }
342
305
  function expandEmptySentenceRanges(sentenceRanges) {
343
306
  const expandedRanges = [];
344
307
  for (const sentenceRange of sentenceRanges) {
@@ -389,6 +352,5 @@ export {
389
352
  findEndTimestamp,
390
353
  getChapterDuration,
391
354
  getSentenceRanges,
392
- interpolateSentenceRanges,
393
355
  mapTranscriptionTimeline
394
356
  };
@@ -0,0 +1,124 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var interpolateSentenceRanges_exports = {};
20
+ __export(interpolateSentenceRanges_exports, {
21
+ interpolateSentenceRanges: () => interpolateSentenceRanges
22
+ });
23
+ module.exports = __toCommonJS(interpolateSentenceRanges_exports);
24
+ function buildGapRanges(slots, left, right, audioFileDurations) {
25
+ const n = slots.length;
26
+ if (n === 0) return [];
27
+ if (left.audiofile === right.audiofile) {
28
+ const span = right.time - left.time;
29
+ return slots.map((slot, i) => ({
30
+ ...slot,
31
+ audiofile: left.audiofile,
32
+ start: left.time + span * i / n,
33
+ end: left.time + span * (i + 1) / n
34
+ }));
35
+ }
36
+ const leftDuration = audioFileDurations[left.audiofile] ?? left.time;
37
+ const leftAvail = leftDuration - left.time;
38
+ const rightAvail = right.time;
39
+ const total = leftAvail + rightAvail;
40
+ let n1 = total > 0 ? Math.round(n * (leftAvail / total)) : n;
41
+ let n2 = n - n1;
42
+ n1 = Math.max(0, n1);
43
+ n2 = n - n1;
44
+ const result = [];
45
+ if (n1 > 0) {
46
+ for (let i = 0; i < n1; i++) {
47
+ result.push({
48
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
49
+ ...slots[i],
50
+ audiofile: left.audiofile,
51
+ start: left.time + leftAvail * i / n1,
52
+ end: left.time + leftAvail * (i + 1) / n1
53
+ });
54
+ }
55
+ }
56
+ if (n2 > 0) {
57
+ for (let i = 0; i < n2; i++) {
58
+ result.push({
59
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
60
+ ...slots[n1 + i],
61
+ audiofile: right.audiofile,
62
+ start: rightAvail * i / n2,
63
+ end: rightAvail * (i + 1) / n2
64
+ });
65
+ }
66
+ }
67
+ return result;
68
+ }
69
+ function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts, audioFileDurations) {
70
+ if (sentenceRanges.length === 0) return [];
71
+ const result = [];
72
+ const first = sentenceRanges[0];
73
+ if (first.id > 0) {
74
+ const slots = Array.from({ length: first.id }, (_, i) => ({
75
+ chapterId: first.chapterId,
76
+ id: i
77
+ }));
78
+ const left = { time: 0, audiofile: first.audiofile };
79
+ const right = { time: first.start, audiofile: first.audiofile };
80
+ result.push(...buildGapRanges(slots, left, right, audioFileDurations));
81
+ }
82
+ result.push(first);
83
+ for (let idx = 1; idx < sentenceRanges.length; idx++) {
84
+ const prev = sentenceRanges[idx - 1];
85
+ const curr = sentenceRanges[idx];
86
+ const left = { time: prev.end, audiofile: prev.audiofile };
87
+ const right = { time: curr.start, audiofile: curr.audiofile };
88
+ const gapSlots = [];
89
+ if (prev.chapterId === curr.chapterId) {
90
+ for (let id = prev.id + 1; id < curr.id; id++) {
91
+ gapSlots.push({ chapterId: prev.chapterId, id });
92
+ }
93
+ } else {
94
+ const prevTotal = chapterSentenceCounts[prev.chapterId] ?? prev.id + 1;
95
+ for (let id = prev.id + 1; id < prevTotal; id++) {
96
+ gapSlots.push({ chapterId: prev.chapterId, id });
97
+ }
98
+ for (let id = 0; id < curr.id; id++) {
99
+ gapSlots.push({ chapterId: curr.chapterId, id });
100
+ }
101
+ }
102
+ if (gapSlots.length > 0) {
103
+ result.push(...buildGapRanges(gapSlots, left, right, audioFileDurations));
104
+ }
105
+ result.push(curr);
106
+ }
107
+ const last = sentenceRanges[sentenceRanges.length - 1];
108
+ const lastTotal = chapterSentenceCounts[last.chapterId] ?? last.id + 1;
109
+ if (last.id < lastTotal - 1) {
110
+ const slots = Array.from(
111
+ { length: lastTotal - 1 - last.id },
112
+ (_, i) => ({ chapterId: last.chapterId, id: last.id + 1 + i })
113
+ );
114
+ const fileEnd = audioFileDurations[last.audiofile] ?? last.end;
115
+ const left = { time: last.end, audiofile: last.audiofile };
116
+ const right = { time: fileEnd, audiofile: last.audiofile };
117
+ result.push(...buildGapRanges(slots, left, right, audioFileDurations));
118
+ }
119
+ return result;
120
+ }
121
+ // Annotate the CommonJS export names for ESM import in node:
122
+ 0 && (module.exports = {
123
+ interpolateSentenceRanges
124
+ });
@@ -0,0 +1,23 @@
1
+ import { SentenceRange } from './getSentenceRanges.cjs';
2
+ import '@storyteller-platform/ghost-story';
3
+ import '@echogarden/text-segmentation';
4
+ import '@storyteller-platform/transliteration';
5
+
6
+ /**
7
+ * Given a sequence of sentence ranges from an entire book,
8
+ * ordered by occurrence in audio, interpolates sentence ranges
9
+ * to fill any gaps.
10
+ *
11
+ * A gap may be:
12
+ * - A non-linearity between two sequential sentence ranges
13
+ * in the same chapter, e.g. chapter001#0 -> chapter001#3
14
+ * - A chapter whose sentence ranges start at a number greater
15
+ * than 0, e.g. chapter001#330 -> chapter002#2
16
+ * - A chapter whose sentence ranges end at a number lower
17
+ * than the total number of sentences in that chapter,
18
+ * e.g. chapter001#325 -> chapter002#0, where
19
+ * chapterSentenceCounts["chapter001"] === 330
20
+ */
21
+ declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>, audioFileDurations: Record<string, number>): SentenceRange[];
22
+
23
+ export { interpolateSentenceRanges };
@@ -0,0 +1,23 @@
1
+ import { SentenceRange } from './getSentenceRanges.js';
2
+ import '@storyteller-platform/ghost-story';
3
+ import '@echogarden/text-segmentation';
4
+ import '@storyteller-platform/transliteration';
5
+
6
+ /**
7
+ * Given a sequence of sentence ranges from an entire book,
8
+ * ordered by occurrence in audio, interpolates sentence ranges
9
+ * to fill any gaps.
10
+ *
11
+ * A gap may be:
12
+ * - A non-linearity between two sequential sentence ranges
13
+ * in the same chapter, e.g. chapter001#0 -> chapter001#3
14
+ * - A chapter whose sentence ranges start at a number greater
15
+ * than 0, e.g. chapter001#330 -> chapter002#2
16
+ * - A chapter whose sentence ranges end at a number lower
17
+ * than the total number of sentences in that chapter,
18
+ * e.g. chapter001#325 -> chapter002#0, where
19
+ * chapterSentenceCounts["chapter001"] === 330
20
+ */
21
+ declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>, audioFileDurations: Record<string, number>): SentenceRange[];
22
+
23
+ export { interpolateSentenceRanges };