@storyteller-platform/align 0.1.24 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/align/align.cjs +21 -9
  2. package/dist/align/align.js +22 -11
  3. package/dist/align/getSentenceRanges.cjs +0 -58
  4. package/dist/align/getSentenceRanges.d.cts +1 -2
  5. package/dist/align/getSentenceRanges.d.ts +1 -2
  6. package/dist/align/getSentenceRanges.js +0 -57
  7. package/dist/align/interpolateSentenceRanges.cjs +124 -0
  8. package/dist/align/interpolateSentenceRanges.d.cts +23 -0
  9. package/dist/align/interpolateSentenceRanges.d.ts +23 -0
  10. package/dist/align/interpolateSentenceRanges.js +101 -0
  11. package/dist/align/search.cjs +18 -7
  12. package/dist/align/search.js +18 -7
  13. package/dist/align/slugify.cjs +31 -23
  14. package/dist/align/slugify.js +31 -23
  15. package/dist/index.d.cts +1 -2
  16. package/dist/index.d.ts +1 -2
  17. package/dist/markup/markup.cjs +21 -14
  18. package/dist/markup/markup.d.cts +2 -4
  19. package/dist/markup/markup.d.ts +2 -4
  20. package/dist/markup/markup.js +28 -16
  21. package/dist/markup/model.cjs +138 -5
  22. package/dist/markup/model.d.cts +2 -57
  23. package/dist/markup/model.d.ts +2 -57
  24. package/dist/markup/model.js +136 -5
  25. package/dist/markup/parseDom.cjs +80 -25
  26. package/dist/markup/parseDom.d.cts +4 -4
  27. package/dist/markup/parseDom.d.ts +4 -4
  28. package/dist/markup/parseDom.js +87 -24
  29. package/dist/markup/resolvedPos.cjs +85 -0
  30. package/dist/markup/resolvedPos.d.cts +2 -0
  31. package/dist/markup/resolvedPos.d.ts +2 -0
  32. package/dist/markup/resolvedPos.js +62 -0
  33. package/dist/markup/segmentation.cjs +4 -8
  34. package/dist/markup/segmentation.d.cts +3 -8
  35. package/dist/markup/segmentation.d.ts +3 -8
  36. package/dist/markup/segmentation.js +3 -7
  37. package/dist/markup/serializeDom.d.cts +1 -1
  38. package/dist/markup/serializeDom.d.ts +1 -1
  39. package/dist/markup/transform.cjs +59 -2
  40. package/dist/markup/transform.d.cts +8 -2
  41. package/dist/markup/transform.d.ts +8 -2
  42. package/dist/markup/transform.js +58 -1
  43. package/dist/model-Bv3yPEdd.d.cts +96 -0
  44. package/dist/model-Bv3yPEdd.d.ts +96 -0
  45. package/dist/snapshot/snapshot.cjs +8 -6
  46. package/dist/snapshot/snapshot.js +9 -7
  47. package/package.json +4 -4
@@ -87,8 +87,11 @@ var import_audiobook = require("@storyteller-platform/audiobook");
87
87
  var import_epub = require("@storyteller-platform/epub");
88
88
  var import_ghost_story = require("@storyteller-platform/ghost-story");
89
89
  var import_ffmpeg = require("../common/ffmpeg.cjs");
90
+ var import_parseDom = require("../markup/parseDom.cjs");
90
91
  var import_segmentation = require("../markup/segmentation.cjs");
92
+ var import_transform = require("../markup/transform.cjs");
91
93
  var import_getSentenceRanges = require("./getSentenceRanges.cjs");
94
+ var import_interpolateSentenceRanges = require("./interpolateSentenceRanges.cjs");
92
95
  var import_search = require("./search.cjs");
93
96
  var import_slugify = require("./slugify.cjs");
94
97
  var import_textFragments = require("./textFragments.cjs");
@@ -172,12 +175,12 @@ class Aligner {
172
175
  };
173
176
  async getChapterSentences(chapterId) {
174
177
  const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
175
- const { result: segmentation } = await (0, import_segmentation.getXhtmlSegmentation)(
176
- import_epub.Epub.getXhtmlBody(chapterXml),
177
- {
178
- primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
179
- }
180
- );
178
+ const original = (0, import_parseDom.parseDom)(import_epub.Epub.getXhtmlBody(chapterXml));
179
+ const inlined = (0, import_transform.inlineFootnotes)(original);
180
+ const lifted = (0, import_transform.liftText)(inlined.root);
181
+ const segmentation = await (0, import_segmentation.segmentChapter)(lifted.result, {
182
+ primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
183
+ });
181
184
  return segmentation.filter((s) => s.text.match(/\S/));
182
185
  }
183
186
  async writeAlignedChapter(alignedChapter) {
@@ -505,16 +508,25 @@ class Aligner {
505
508
  });
506
509
  const sentenceRanges = [];
507
510
  const chapterSentenceCounts = {};
511
+ const audioFileDurations = {};
508
512
  for (const alignedChapter of audioOrderedChapters) {
509
513
  sentenceRanges.push(...alignedChapter.sentenceRanges);
514
+ for (const sentenceRange of sentenceRanges) {
515
+ if (!(sentenceRange.audiofile in audioFileDurations)) {
516
+ audioFileDurations[sentenceRange.audiofile] = await (0, import_ffmpeg.getTrackDuration)(
517
+ sentenceRange.audiofile
518
+ );
519
+ }
520
+ }
510
521
  const sentences = await this.getChapterSentences(
511
522
  alignedChapter.chapter.id
512
523
  );
513
524
  chapterSentenceCounts[alignedChapter.chapter.id] = sentences.length;
514
525
  }
515
- const interpolated = await (0, import_getSentenceRanges.interpolateSentenceRanges)(
526
+ const interpolated = (0, import_interpolateSentenceRanges.interpolateSentenceRanges)(
516
527
  sentenceRanges,
517
- chapterSentenceCounts
528
+ chapterSentenceCounts,
529
+ audioFileDurations
518
530
  );
519
531
  const expanded = (0, import_getSentenceRanges.expandEmptySentenceRanges)(interpolated);
520
532
  const collapsed = await (0, import_getSentenceRanges.collapseSentenceRangeGaps)(expanded);
@@ -525,7 +537,7 @@ class Aligner {
525
537
  );
526
538
  const finalSentenceRanges = collapsed.slice(
527
539
  collapsedStart,
528
- collapsedStart + sentences.length - 1
540
+ collapsedStart + sentences.length
529
541
  );
530
542
  alignedChapter.sentenceRanges = finalSentenceRanges;
531
543
  for (const [i, wordRanges] of (0, import_itertools.enumerate)(alignedChapter.wordRanges)) {
@@ -16,15 +16,17 @@ import {
16
16
  createTiming
17
17
  } from "@storyteller-platform/ghost-story";
18
18
  import { getTrackDuration } from "../common/ffmpeg.js";
19
- import { getXhtmlSegmentation } from "../markup/segmentation.js";
19
+ import { parseDom } from "../markup/parseDom.js";
20
+ import { segmentChapter } from "../markup/segmentation.js";
21
+ import { inlineFootnotes, liftText } from "../markup/transform.js";
20
22
  import {
21
23
  collapseSentenceRangeGaps,
22
24
  expandEmptySentenceRanges,
23
25
  getChapterDuration,
24
26
  getSentenceRanges,
25
- interpolateSentenceRanges,
26
27
  mapTranscriptionTimeline
27
28
  } from "./getSentenceRanges.js";
29
+ import { interpolateSentenceRanges } from "./interpolateSentenceRanges.js";
28
30
  import { findBoundaries } from "./search.js";
29
31
  import { slugify } from "./slugify.js";
30
32
  import { TextFragmentTrie } from "./textFragments.js";
@@ -108,12 +110,12 @@ class Aligner {
108
110
  };
109
111
  async getChapterSentences(chapterId) {
110
112
  const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
111
- const { result: segmentation } = await getXhtmlSegmentation(
112
- Epub.getXhtmlBody(chapterXml),
113
- {
114
- primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
115
- }
116
- );
113
+ const original = parseDom(Epub.getXhtmlBody(chapterXml));
114
+ const inlined = inlineFootnotes(original);
115
+ const lifted = liftText(inlined.root);
116
+ const segmentation = await segmentChapter(lifted.result, {
117
+ primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
118
+ });
117
119
  return segmentation.filter((s) => s.text.match(/\S/));
118
120
  }
119
121
  async writeAlignedChapter(alignedChapter) {
@@ -441,16 +443,25 @@ class Aligner {
441
443
  });
442
444
  const sentenceRanges = [];
443
445
  const chapterSentenceCounts = {};
446
+ const audioFileDurations = {};
444
447
  for (const alignedChapter of audioOrderedChapters) {
445
448
  sentenceRanges.push(...alignedChapter.sentenceRanges);
449
+ for (const sentenceRange of sentenceRanges) {
450
+ if (!(sentenceRange.audiofile in audioFileDurations)) {
451
+ audioFileDurations[sentenceRange.audiofile] = await getTrackDuration(
452
+ sentenceRange.audiofile
453
+ );
454
+ }
455
+ }
446
456
  const sentences = await this.getChapterSentences(
447
457
  alignedChapter.chapter.id
448
458
  );
449
459
  chapterSentenceCounts[alignedChapter.chapter.id] = sentences.length;
450
460
  }
451
- const interpolated = await interpolateSentenceRanges(
461
+ const interpolated = interpolateSentenceRanges(
452
462
  sentenceRanges,
453
- chapterSentenceCounts
463
+ chapterSentenceCounts,
464
+ audioFileDurations
454
465
  );
455
466
  const expanded = expandEmptySentenceRanges(interpolated);
456
467
  const collapsed = await collapseSentenceRangeGaps(expanded);
@@ -461,7 +472,7 @@ class Aligner {
461
472
  );
462
473
  const finalSentenceRanges = collapsed.slice(
463
474
  collapsedStart,
464
- collapsedStart + sentences.length - 1
475
+ collapsedStart + sentences.length
465
476
  );
466
477
  alignedChapter.sentenceRanges = finalSentenceRanges;
467
478
  for (const [i, wordRanges] of enumerate(alignedChapter.wordRanges)) {
@@ -23,7 +23,6 @@ __export(getSentenceRanges_exports, {
23
23
  findEndTimestamp: () => findEndTimestamp,
24
24
  getChapterDuration: () => getChapterDuration,
25
25
  getSentenceRanges: () => getSentenceRanges,
26
- interpolateSentenceRanges: () => interpolateSentenceRanges,
27
26
  mapTranscriptionTimeline: () => mapTranscriptionTimeline
28
27
  });
29
28
  module.exports = __toCommonJS(getSentenceRanges_exports);
@@ -311,62 +310,6 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
311
310
  lastFoundSentence
312
311
  };
313
312
  }
314
- async function getLargestGap(trailing, leading) {
315
- const leadingGap = leading.start;
316
- const trailingGap = await (0, import_ffmpeg.getTrackDuration)(trailing.audiofile) - trailing.end;
317
- if (trailingGap > leadingGap) return [trailingGap, trailing.audiofile];
318
- return [leadingGap, leading.audiofile];
319
- }
320
- async function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts) {
321
- const interpolated = [];
322
- for (let i = 0; i < sentenceRanges.length; i++) {
323
- const endRange = sentenceRanges[i];
324
- const startRange = sentenceRanges[i - 1] ?? {
325
- id: 0,
326
- audiofile: endRange.audiofile,
327
- chapterId: endRange.chapterId,
328
- start: 0,
329
- end: 0
330
- };
331
- const newChapter = startRange.chapterId !== endRange.chapterId;
332
- const newAudiofile = startRange.audiofile !== endRange.audiofile;
333
- const count = newChapter ? chapterSentenceCounts[startRange.chapterId] - startRange.id - 1 : endRange.id - startRange.id - 1;
334
- if (count === 0) {
335
- interpolated.push(endRange);
336
- continue;
337
- }
338
- let [diff, audiofile] = newAudiofile ? await getLargestGap(startRange, endRange) : [endRange.start - startRange.end, endRange.audiofile];
339
- if (diff <= 0) {
340
- if (newAudiofile) {
341
- const rangeLength = endRange.end - endRange.start;
342
- diff = rangeLength < 0.5 ? rangeLength / 2 : 0.25;
343
- endRange.start = diff;
344
- } else {
345
- diff = 0.25;
346
- startRange.end = startRange.start - diff;
347
- }
348
- }
349
- const interpolatedLength = diff / count;
350
- const start = newAudiofile ? 0 : startRange.end;
351
- for (let i2 = 0; i2 < count; i2++) {
352
- let id = startRange.id + i2 + 1;
353
- let chapterId = startRange.chapterId;
354
- if (newChapter && i2 > chapterSentenceCounts[startRange.chapterId] - startRange.id) {
355
- id = i2;
356
- chapterId = endRange.chapterId;
357
- }
358
- interpolated.push({
359
- id,
360
- chapterId,
361
- start: start + interpolatedLength * i2,
362
- end: start + interpolatedLength * (i2 + 1),
363
- audiofile
364
- });
365
- }
366
- interpolated.push(endRange);
367
- }
368
- return interpolated;
369
- }
370
313
  function expandEmptySentenceRanges(sentenceRanges) {
371
314
  const expandedRanges = [];
372
315
  for (const sentenceRange of sentenceRanges) {
@@ -418,6 +361,5 @@ function getChapterDuration(sentenceRanges) {
418
361
  findEndTimestamp,
419
362
  getChapterDuration,
420
363
  getSentenceRanges,
421
- interpolateSentenceRanges,
422
364
  mapTranscriptionTimeline
423
365
  });
@@ -54,7 +54,6 @@ declare function getSentenceRanges(transcriptionText: string, mappedTimeline: Ma
54
54
  firstFoundSentence: number;
55
55
  lastFoundSentence: number;
56
56
  }>;
57
- declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>): Promise<SentenceRange[]>;
58
57
  /**
59
58
  * Whisper sometimes provides words with no time information,
60
59
  * or start and end timestamps that are equal. EpubCheck complains
@@ -65,4 +64,4 @@ declare function expandEmptySentenceRanges<Range extends SentenceRange | WordRan
65
64
  declare function collapseSentenceRangeGaps(sentenceRanges: SentenceRange[]): Promise<SentenceRange[]>;
66
65
  declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
67
66
 
68
- export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges, mapTranscriptionTimeline };
67
+ export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, mapTranscriptionTimeline };
@@ -54,7 +54,6 @@ declare function getSentenceRanges(transcriptionText: string, mappedTimeline: Ma
54
54
  firstFoundSentence: number;
55
55
  lastFoundSentence: number;
56
56
  }>;
57
- declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>): Promise<SentenceRange[]>;
58
57
  /**
59
58
  * Whisper sometimes provides words with no time information,
60
59
  * or start and end timestamps that are equal. EpubCheck complains
@@ -65,4 +64,4 @@ declare function expandEmptySentenceRanges<Range extends SentenceRange | WordRan
65
64
  declare function collapseSentenceRangeGaps(sentenceRanges: SentenceRange[]): Promise<SentenceRange[]>;
66
65
  declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
67
66
 
68
- export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges, mapTranscriptionTimeline };
67
+ export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, mapTranscriptionTimeline };
@@ -283,62 +283,6 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
283
283
  lastFoundSentence
284
284
  };
285
285
  }
286
- async function getLargestGap(trailing, leading) {
287
- const leadingGap = leading.start;
288
- const trailingGap = await getTrackDuration(trailing.audiofile) - trailing.end;
289
- if (trailingGap > leadingGap) return [trailingGap, trailing.audiofile];
290
- return [leadingGap, leading.audiofile];
291
- }
292
- async function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts) {
293
- const interpolated = [];
294
- for (let i = 0; i < sentenceRanges.length; i++) {
295
- const endRange = sentenceRanges[i];
296
- const startRange = sentenceRanges[i - 1] ?? {
297
- id: 0,
298
- audiofile: endRange.audiofile,
299
- chapterId: endRange.chapterId,
300
- start: 0,
301
- end: 0
302
- };
303
- const newChapter = startRange.chapterId !== endRange.chapterId;
304
- const newAudiofile = startRange.audiofile !== endRange.audiofile;
305
- const count = newChapter ? chapterSentenceCounts[startRange.chapterId] - startRange.id - 1 : endRange.id - startRange.id - 1;
306
- if (count === 0) {
307
- interpolated.push(endRange);
308
- continue;
309
- }
310
- let [diff, audiofile] = newAudiofile ? await getLargestGap(startRange, endRange) : [endRange.start - startRange.end, endRange.audiofile];
311
- if (diff <= 0) {
312
- if (newAudiofile) {
313
- const rangeLength = endRange.end - endRange.start;
314
- diff = rangeLength < 0.5 ? rangeLength / 2 : 0.25;
315
- endRange.start = diff;
316
- } else {
317
- diff = 0.25;
318
- startRange.end = startRange.start - diff;
319
- }
320
- }
321
- const interpolatedLength = diff / count;
322
- const start = newAudiofile ? 0 : startRange.end;
323
- for (let i2 = 0; i2 < count; i2++) {
324
- let id = startRange.id + i2 + 1;
325
- let chapterId = startRange.chapterId;
326
- if (newChapter && i2 > chapterSentenceCounts[startRange.chapterId] - startRange.id) {
327
- id = i2;
328
- chapterId = endRange.chapterId;
329
- }
330
- interpolated.push({
331
- id,
332
- chapterId,
333
- start: start + interpolatedLength * i2,
334
- end: start + interpolatedLength * (i2 + 1),
335
- audiofile
336
- });
337
- }
338
- interpolated.push(endRange);
339
- }
340
- return interpolated;
341
- }
342
286
  function expandEmptySentenceRanges(sentenceRanges) {
343
287
  const expandedRanges = [];
344
288
  for (const sentenceRange of sentenceRanges) {
@@ -389,6 +333,5 @@ export {
389
333
  findEndTimestamp,
390
334
  getChapterDuration,
391
335
  getSentenceRanges,
392
- interpolateSentenceRanges,
393
336
  mapTranscriptionTimeline
394
337
  };
@@ -0,0 +1,124 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var interpolateSentenceRanges_exports = {};
20
+ __export(interpolateSentenceRanges_exports, {
21
+ interpolateSentenceRanges: () => interpolateSentenceRanges
22
+ });
23
+ module.exports = __toCommonJS(interpolateSentenceRanges_exports);
24
+ function buildGapRanges(slots, left, right, audioFileDurations) {
25
+ const n = slots.length;
26
+ if (n === 0) return [];
27
+ if (left.audiofile === right.audiofile) {
28
+ const span = right.time - left.time;
29
+ return slots.map((slot, i) => ({
30
+ ...slot,
31
+ audiofile: left.audiofile,
32
+ start: left.time + span * i / n,
33
+ end: left.time + span * (i + 1) / n
34
+ }));
35
+ }
36
+ const leftDuration = audioFileDurations[left.audiofile] ?? left.time;
37
+ const leftAvail = leftDuration - left.time;
38
+ const rightAvail = right.time;
39
+ const total = leftAvail + rightAvail;
40
+ let n1 = total > 0 ? Math.round(n * (leftAvail / total)) : n;
41
+ let n2 = n - n1;
42
+ n1 = Math.max(0, n1);
43
+ n2 = n - n1;
44
+ const result = [];
45
+ if (n1 > 0) {
46
+ for (let i = 0; i < n1; i++) {
47
+ result.push({
48
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
49
+ ...slots[i],
50
+ audiofile: left.audiofile,
51
+ start: left.time + leftAvail * i / n1,
52
+ end: left.time + leftAvail * (i + 1) / n1
53
+ });
54
+ }
55
+ }
56
+ if (n2 > 0) {
57
+ for (let i = 0; i < n2; i++) {
58
+ result.push({
59
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
60
+ ...slots[n1 + i],
61
+ audiofile: right.audiofile,
62
+ start: rightAvail * i / n2,
63
+ end: rightAvail * (i + 1) / n2
64
+ });
65
+ }
66
+ }
67
+ return result;
68
+ }
69
+ function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts, audioFileDurations) {
70
+ if (sentenceRanges.length === 0) return [];
71
+ const result = [];
72
+ const first = sentenceRanges[0];
73
+ if (first.id > 0) {
74
+ const slots = Array.from({ length: first.id }, (_, i) => ({
75
+ chapterId: first.chapterId,
76
+ id: i
77
+ }));
78
+ const left = { time: 0, audiofile: first.audiofile };
79
+ const right = { time: first.start, audiofile: first.audiofile };
80
+ result.push(...buildGapRanges(slots, left, right, audioFileDurations));
81
+ }
82
+ result.push(first);
83
+ for (let idx = 1; idx < sentenceRanges.length; idx++) {
84
+ const prev = sentenceRanges[idx - 1];
85
+ const curr = sentenceRanges[idx];
86
+ const left = { time: prev.end, audiofile: prev.audiofile };
87
+ const right = { time: curr.start, audiofile: curr.audiofile };
88
+ const gapSlots = [];
89
+ if (prev.chapterId === curr.chapterId) {
90
+ for (let id = prev.id + 1; id < curr.id; id++) {
91
+ gapSlots.push({ chapterId: prev.chapterId, id });
92
+ }
93
+ } else {
94
+ const prevTotal = chapterSentenceCounts[prev.chapterId] ?? prev.id + 1;
95
+ for (let id = prev.id + 1; id < prevTotal; id++) {
96
+ gapSlots.push({ chapterId: prev.chapterId, id });
97
+ }
98
+ for (let id = 0; id < curr.id; id++) {
99
+ gapSlots.push({ chapterId: curr.chapterId, id });
100
+ }
101
+ }
102
+ if (gapSlots.length > 0) {
103
+ result.push(...buildGapRanges(gapSlots, left, right, audioFileDurations));
104
+ }
105
+ result.push(curr);
106
+ }
107
+ const last = sentenceRanges[sentenceRanges.length - 1];
108
+ const lastTotal = chapterSentenceCounts[last.chapterId] ?? last.id + 1;
109
+ if (last.id < lastTotal - 1) {
110
+ const slots = Array.from(
111
+ { length: lastTotal - 1 - last.id },
112
+ (_, i) => ({ chapterId: last.chapterId, id: last.id + 1 + i })
113
+ );
114
+ const fileEnd = audioFileDurations[last.audiofile] ?? last.end;
115
+ const left = { time: last.end, audiofile: last.audiofile };
116
+ const right = { time: fileEnd, audiofile: last.audiofile };
117
+ result.push(...buildGapRanges(slots, left, right, audioFileDurations));
118
+ }
119
+ return result;
120
+ }
121
+ // Annotate the CommonJS export names for ESM import in node:
122
+ 0 && (module.exports = {
123
+ interpolateSentenceRanges
124
+ });
@@ -0,0 +1,23 @@
1
+ import { SentenceRange } from './getSentenceRanges.cjs';
2
+ import '@storyteller-platform/ghost-story';
3
+ import '@echogarden/text-segmentation';
4
+ import '@storyteller-platform/transliteration';
5
+
6
+ /**
7
+ * Given a sequence of sentence ranges from an entire book,
8
+ * ordered by occurrence in audio, interpolates sentence ranges
9
+ * to fill any gaps.
10
+ *
11
+ * A gap may be:
12
+ * - A non-linearity between two sequential sentence ranges
13
+ * in the same chapter, e.g. chapter001#0 -> chapter001#3
14
+ * - A chapter whose sentence ranges start at a number greater
15
+ * than 0, e.g. chapter001#330 -> chapter002#2
16
+ * - A chapter whose sentence ranges end at a number lower
17
+ * than the total number of sentences in that chapter,
18
+ * e.g. chapter001#325 -> chapter002#0, where
19
+ * chapterSentenceCounts["chapter001"] === 330
20
+ */
21
+ declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>, audioFileDurations: Record<string, number>): SentenceRange[];
22
+
23
+ export { interpolateSentenceRanges };
@@ -0,0 +1,23 @@
1
+ import { SentenceRange } from './getSentenceRanges.js';
2
+ import '@storyteller-platform/ghost-story';
3
+ import '@echogarden/text-segmentation';
4
+ import '@storyteller-platform/transliteration';
5
+
6
+ /**
7
+ * Given a sequence of sentence ranges from an entire book,
8
+ * ordered by occurrence in audio, interpolates sentence ranges
9
+ * to fill any gaps.
10
+ *
11
+ * A gap may be:
12
+ * - A non-linearity between two sequential sentence ranges
13
+ * in the same chapter, e.g. chapter001#0 -> chapter001#3
14
+ * - A chapter whose sentence ranges start at a number greater
15
+ * than 0, e.g. chapter001#330 -> chapter002#2
16
+ * - A chapter whose sentence ranges end at a number lower
17
+ * than the total number of sentences in that chapter,
18
+ * e.g. chapter001#325 -> chapter002#0, where
19
+ * chapterSentenceCounts["chapter001"] === 330
20
+ */
21
+ declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>, audioFileDurations: Record<string, number>): SentenceRange[];
22
+
23
+ export { interpolateSentenceRanges };
@@ -0,0 +1,101 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ function buildGapRanges(slots, left, right, audioFileDurations) {
3
+ const n = slots.length;
4
+ if (n === 0) return [];
5
+ if (left.audiofile === right.audiofile) {
6
+ const span = right.time - left.time;
7
+ return slots.map((slot, i) => ({
8
+ ...slot,
9
+ audiofile: left.audiofile,
10
+ start: left.time + span * i / n,
11
+ end: left.time + span * (i + 1) / n
12
+ }));
13
+ }
14
+ const leftDuration = audioFileDurations[left.audiofile] ?? left.time;
15
+ const leftAvail = leftDuration - left.time;
16
+ const rightAvail = right.time;
17
+ const total = leftAvail + rightAvail;
18
+ let n1 = total > 0 ? Math.round(n * (leftAvail / total)) : n;
19
+ let n2 = n - n1;
20
+ n1 = Math.max(0, n1);
21
+ n2 = n - n1;
22
+ const result = [];
23
+ if (n1 > 0) {
24
+ for (let i = 0; i < n1; i++) {
25
+ result.push({
26
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
27
+ ...slots[i],
28
+ audiofile: left.audiofile,
29
+ start: left.time + leftAvail * i / n1,
30
+ end: left.time + leftAvail * (i + 1) / n1
31
+ });
32
+ }
33
+ }
34
+ if (n2 > 0) {
35
+ for (let i = 0; i < n2; i++) {
36
+ result.push({
37
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
38
+ ...slots[n1 + i],
39
+ audiofile: right.audiofile,
40
+ start: rightAvail * i / n2,
41
+ end: rightAvail * (i + 1) / n2
42
+ });
43
+ }
44
+ }
45
+ return result;
46
+ }
47
+ function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts, audioFileDurations) {
48
+ if (sentenceRanges.length === 0) return [];
49
+ const result = [];
50
+ const first = sentenceRanges[0];
51
+ if (first.id > 0) {
52
+ const slots = Array.from({ length: first.id }, (_, i) => ({
53
+ chapterId: first.chapterId,
54
+ id: i
55
+ }));
56
+ const left = { time: 0, audiofile: first.audiofile };
57
+ const right = { time: first.start, audiofile: first.audiofile };
58
+ result.push(...buildGapRanges(slots, left, right, audioFileDurations));
59
+ }
60
+ result.push(first);
61
+ for (let idx = 1; idx < sentenceRanges.length; idx++) {
62
+ const prev = sentenceRanges[idx - 1];
63
+ const curr = sentenceRanges[idx];
64
+ const left = { time: prev.end, audiofile: prev.audiofile };
65
+ const right = { time: curr.start, audiofile: curr.audiofile };
66
+ const gapSlots = [];
67
+ if (prev.chapterId === curr.chapterId) {
68
+ for (let id = prev.id + 1; id < curr.id; id++) {
69
+ gapSlots.push({ chapterId: prev.chapterId, id });
70
+ }
71
+ } else {
72
+ const prevTotal = chapterSentenceCounts[prev.chapterId] ?? prev.id + 1;
73
+ for (let id = prev.id + 1; id < prevTotal; id++) {
74
+ gapSlots.push({ chapterId: prev.chapterId, id });
75
+ }
76
+ for (let id = 0; id < curr.id; id++) {
77
+ gapSlots.push({ chapterId: curr.chapterId, id });
78
+ }
79
+ }
80
+ if (gapSlots.length > 0) {
81
+ result.push(...buildGapRanges(gapSlots, left, right, audioFileDurations));
82
+ }
83
+ result.push(curr);
84
+ }
85
+ const last = sentenceRanges[sentenceRanges.length - 1];
86
+ const lastTotal = chapterSentenceCounts[last.chapterId] ?? last.id + 1;
87
+ if (last.id < lastTotal - 1) {
88
+ const slots = Array.from(
89
+ { length: lastTotal - 1 - last.id },
90
+ (_, i) => ({ chapterId: last.chapterId, id: last.id + 1 + i })
91
+ );
92
+ const fileEnd = audioFileDurations[last.audiofile] ?? last.end;
93
+ const left = { time: last.end, audiofile: last.audiofile };
94
+ const right = { time: fileEnd, audiofile: last.audiofile };
95
+ result.push(...buildGapRanges(slots, left, right, audioFileDurations));
96
+ }
97
+ return result;
98
+ }
99
+ export {
100
+ interpolateSentenceRanges
101
+ };
@@ -37,16 +37,16 @@ function buildNgramIndex(text) {
37
37
  }
38
38
  return index;
39
39
  }
40
+ const NGRAM_SIZE = 5;
40
41
  function* ngrams(text) {
41
42
  const words = text.split("-");
42
- let pos = 0;
43
- for (const i of (0, import_itertools.range)(words.length - 4)) {
44
- const ngram = words.slice(i, i + 5).join("-");
45
- yield [ngram, pos];
46
- pos += words[i].length + 1;
43
+ for (const i of (0, import_itertools.range)(words.length - NGRAM_SIZE - 1)) {
44
+ const ngram = words.slice(i, i + NGRAM_SIZE).join("-");
45
+ yield [ngram, i];
47
46
  }
48
47
  }
49
48
  function collectBoundaryVotes(query, document) {
49
+ const queryWords = query.split("-");
50
50
  const documentIndex = buildNgramIndex(document);
51
51
  let skippedNgrams = 0;
52
52
  let totalNgrams = 0;
@@ -61,7 +61,7 @@ function collectBoundaryVotes(query, document) {
61
61
  }
62
62
  for (const documentStart of documentStarts) {
63
63
  startVotes.push(documentStart - start);
64
- endVotes.push(documentStart + (query.length - start));
64
+ endVotes.push(documentStart + (queryWords.length - start));
65
65
  }
66
66
  }
67
67
  if (skippedNgrams > totalNgrams / 2) {
@@ -97,6 +97,14 @@ function chooseBestFromBins(bins, dir) {
97
97
  }
98
98
  return dir > 0 ? (0, import_itertools.max)(best) ?? null : (0, import_itertools.min)(best) ?? null;
99
99
  }
100
+ function getOffsetFromWordIndex(wordIndex, document) {
101
+ const words = document.split("-");
102
+ let offset = 0;
103
+ for (const i of (0, import_itertools.range)(Math.min(words.length, Math.max(0, wordIndex)))) {
104
+ offset += words[i].length + 1;
105
+ }
106
+ return offset;
107
+ }
100
108
  function findBoundaries(query, document) {
101
109
  const boundaryVotes = collectBoundaryVotes(query, document);
102
110
  if (!boundaryVotes) return null;
@@ -111,7 +119,10 @@ function findBoundaries(query, document) {
111
119
  if (bestEnd === null) {
112
120
  return null;
113
121
  }
114
- return { start: bestStart, end: bestEnd };
122
+ return {
123
+ start: getOffsetFromWordIndex(bestStart, document),
124
+ end: getOffsetFromWordIndex(bestEnd, document)
125
+ };
115
126
  }
116
127
  // Annotate the CommonJS export names for ESM import in node:
117
128
  0 && (module.exports = {