npm - @storyteller-platform/align - Versions diffs - 0.1.24 → 0.1.26 - Mend

@storyteller-platform/align 0.1.24 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/dist/align/align.cjs +21 -9
package/dist/align/align.js +22 -11
package/dist/align/getSentenceRanges.cjs +0 -58
package/dist/align/getSentenceRanges.d.cts +1 -2
package/dist/align/getSentenceRanges.d.ts +1 -2
package/dist/align/getSentenceRanges.js +0 -57
package/dist/align/interpolateSentenceRanges.cjs +124 -0
package/dist/align/interpolateSentenceRanges.d.cts +23 -0
package/dist/align/interpolateSentenceRanges.d.ts +23 -0
package/dist/align/interpolateSentenceRanges.js +101 -0
package/dist/align/search.cjs +18 -7
package/dist/align/search.js +18 -7
package/dist/align/slugify.cjs +31 -23
package/dist/align/slugify.js +31 -23
package/dist/index.d.cts +1 -2
package/dist/index.d.ts +1 -2
package/dist/markup/markup.cjs +21 -14
package/dist/markup/markup.d.cts +2 -4
package/dist/markup/markup.d.ts +2 -4
package/dist/markup/markup.js +28 -16
package/dist/markup/model.cjs +138 -5
package/dist/markup/model.d.cts +2 -57
package/dist/markup/model.d.ts +2 -57
package/dist/markup/model.js +136 -5
package/dist/markup/parseDom.cjs +80 -25
package/dist/markup/parseDom.d.cts +4 -4
package/dist/markup/parseDom.d.ts +4 -4
package/dist/markup/parseDom.js +87 -24
package/dist/markup/resolvedPos.cjs +85 -0
package/dist/markup/resolvedPos.d.cts +2 -0
package/dist/markup/resolvedPos.d.ts +2 -0
package/dist/markup/resolvedPos.js +62 -0
package/dist/markup/segmentation.cjs +4 -8
package/dist/markup/segmentation.d.cts +3 -8
package/dist/markup/segmentation.d.ts +3 -8
package/dist/markup/segmentation.js +3 -7
package/dist/markup/serializeDom.d.cts +1 -1
package/dist/markup/serializeDom.d.ts +1 -1
package/dist/markup/transform.cjs +59 -2
package/dist/markup/transform.d.cts +8 -2
package/dist/markup/transform.d.ts +8 -2
package/dist/markup/transform.js +58 -1
package/dist/model-Bv3yPEdd.d.cts +96 -0
package/dist/model-Bv3yPEdd.d.ts +96 -0
package/dist/snapshot/snapshot.cjs +8 -6
package/dist/snapshot/snapshot.js +9 -7
package/package.json +4 -4

package/dist/align/align.cjs CHANGED Viewed

@@ -87,8 +87,11 @@ var import_audiobook = require("@storyteller-platform/audiobook");
 var import_epub = require("@storyteller-platform/epub");
 var import_ghost_story = require("@storyteller-platform/ghost-story");
 var import_ffmpeg = require("../common/ffmpeg.cjs");
+var import_parseDom = require("../markup/parseDom.cjs");
 var import_segmentation = require("../markup/segmentation.cjs");
+var import_transform = require("../markup/transform.cjs");
 var import_getSentenceRanges = require("./getSentenceRanges.cjs");
+var import_interpolateSentenceRanges = require("./interpolateSentenceRanges.cjs");
 var import_search = require("./search.cjs");
 var import_slugify = require("./slugify.cjs");
 var import_textFragments = require("./textFragments.cjs");
@@ -172,12 +175,12 @@ class Aligner {
   };
   async getChapterSentences(chapterId) {
     const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
-    const { result: segmentation } = await (0, import_segmentation.getXhtmlSegmentation)(
-      import_epub.Epub.getXhtmlBody(chapterXml),
-      {
-        primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
-      }
-    );
+    const original = (0, import_parseDom.parseDom)(import_epub.Epub.getXhtmlBody(chapterXml));
+    const inlined = (0, import_transform.inlineFootnotes)(original);
+    const lifted = (0, import_transform.liftText)(inlined.root);
+    const segmentation = await (0, import_segmentation.segmentChapter)(lifted.result, {
+      primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
+    });
     return segmentation.filter((s) => s.text.match(/\S/));
   }
   async writeAlignedChapter(alignedChapter) {
@@ -505,16 +508,25 @@ class Aligner {
     });
     const sentenceRanges = [];
     const chapterSentenceCounts = {};
+    const audioFileDurations = {};
     for (const alignedChapter of audioOrderedChapters) {
       sentenceRanges.push(...alignedChapter.sentenceRanges);
+      for (const sentenceRange of sentenceRanges) {
+        if (!(sentenceRange.audiofile in audioFileDurations)) {
+          audioFileDurations[sentenceRange.audiofile] = await (0, import_ffmpeg.getTrackDuration)(
+            sentenceRange.audiofile
+          );
+        }
+      }
       const sentences = await this.getChapterSentences(
         alignedChapter.chapter.id
       );
       chapterSentenceCounts[alignedChapter.chapter.id] = sentences.length;
     }
-    const interpolated = await (0, import_getSentenceRanges.interpolateSentenceRanges)(
+    const interpolated = (0, import_interpolateSentenceRanges.interpolateSentenceRanges)(
       sentenceRanges,
-      chapterSentenceCounts
+      chapterSentenceCounts,
+      audioFileDurations
     );
     const expanded = (0, import_getSentenceRanges.expandEmptySentenceRanges)(interpolated);
     const collapsed = await (0, import_getSentenceRanges.collapseSentenceRangeGaps)(expanded);
@@ -525,7 +537,7 @@ class Aligner {
       );
       const finalSentenceRanges = collapsed.slice(
         collapsedStart,
-        collapsedStart + sentences.length - 1
+        collapsedStart + sentences.length
       );
       alignedChapter.sentenceRanges = finalSentenceRanges;
       for (const [i, wordRanges] of (0, import_itertools.enumerate)(alignedChapter.wordRanges)) {

package/dist/align/align.js CHANGED Viewed

@@ -16,15 +16,17 @@ import {
   createTiming
 } from "@storyteller-platform/ghost-story";
 import { getTrackDuration } from "../common/ffmpeg.js";
-import { getXhtmlSegmentation } from "../markup/segmentation.js";
+import { parseDom } from "../markup/parseDom.js";
+import { segmentChapter } from "../markup/segmentation.js";
+import { inlineFootnotes, liftText } from "../markup/transform.js";
 import {
   collapseSentenceRangeGaps,
   expandEmptySentenceRanges,
   getChapterDuration,
   getSentenceRanges,
-  interpolateSentenceRanges,
   mapTranscriptionTimeline
 } from "./getSentenceRanges.js";
+import { interpolateSentenceRanges } from "./interpolateSentenceRanges.js";
 import { findBoundaries } from "./search.js";
 import { slugify } from "./slugify.js";
 import { TextFragmentTrie } from "./textFragments.js";
@@ -108,12 +110,12 @@ class Aligner {
   };
   async getChapterSentences(chapterId) {
     const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
-    const { result: segmentation } = await getXhtmlSegmentation(
-      Epub.getXhtmlBody(chapterXml),
-      {
-        primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
-      }
-    );
+    const original = parseDom(Epub.getXhtmlBody(chapterXml));
+    const inlined = inlineFootnotes(original);
+    const lifted = liftText(inlined.root);
+    const segmentation = await segmentChapter(lifted.result, {
+      primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
+    });
     return segmentation.filter((s) => s.text.match(/\S/));
   }
   async writeAlignedChapter(alignedChapter) {
@@ -441,16 +443,25 @@ class Aligner {
     });
     const sentenceRanges = [];
     const chapterSentenceCounts = {};
+    const audioFileDurations = {};
     for (const alignedChapter of audioOrderedChapters) {
       sentenceRanges.push(...alignedChapter.sentenceRanges);
+      for (const sentenceRange of sentenceRanges) {
+        if (!(sentenceRange.audiofile in audioFileDurations)) {
+          audioFileDurations[sentenceRange.audiofile] = await getTrackDuration(
+            sentenceRange.audiofile
+          );
+        }
+      }
       const sentences = await this.getChapterSentences(
         alignedChapter.chapter.id
       );
       chapterSentenceCounts[alignedChapter.chapter.id] = sentences.length;
     }
-    const interpolated = await interpolateSentenceRanges(
+    const interpolated = interpolateSentenceRanges(
       sentenceRanges,
-      chapterSentenceCounts
+      chapterSentenceCounts,
+      audioFileDurations
     );
     const expanded = expandEmptySentenceRanges(interpolated);
     const collapsed = await collapseSentenceRangeGaps(expanded);
@@ -461,7 +472,7 @@ class Aligner {
       );
       const finalSentenceRanges = collapsed.slice(
         collapsedStart,
-        collapsedStart + sentences.length - 1
+        collapsedStart + sentences.length
       );
       alignedChapter.sentenceRanges = finalSentenceRanges;
       for (const [i, wordRanges] of enumerate(alignedChapter.wordRanges)) {

package/dist/align/getSentenceRanges.cjs CHANGED Viewed

@@ -23,7 +23,6 @@ __export(getSentenceRanges_exports, {
   findEndTimestamp: () => findEndTimestamp,
   getChapterDuration: () => getChapterDuration,
   getSentenceRanges: () => getSentenceRanges,
-  interpolateSentenceRanges: () => interpolateSentenceRanges,
   mapTranscriptionTimeline: () => mapTranscriptionTimeline
 });
 module.exports = __toCommonJS(getSentenceRanges_exports);
@@ -311,62 +310,6 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
     lastFoundSentence
   };
 }
-async function getLargestGap(trailing, leading) {
-  const leadingGap = leading.start;
-  const trailingGap = await (0, import_ffmpeg.getTrackDuration)(trailing.audiofile) - trailing.end;
-  if (trailingGap > leadingGap) return [trailingGap, trailing.audiofile];
-  return [leadingGap, leading.audiofile];
-}
-async function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts) {
-  const interpolated = [];
-  for (let i = 0; i < sentenceRanges.length; i++) {
-    const endRange = sentenceRanges[i];
-    const startRange = sentenceRanges[i - 1] ?? {
-      id: 0,
-      audiofile: endRange.audiofile,
-      chapterId: endRange.chapterId,
-      start: 0,
-      end: 0
-    };
-    const newChapter = startRange.chapterId !== endRange.chapterId;
-    const newAudiofile = startRange.audiofile !== endRange.audiofile;
-    const count = newChapter ? chapterSentenceCounts[startRange.chapterId] - startRange.id - 1 : endRange.id - startRange.id - 1;
-    if (count === 0) {
-      interpolated.push(endRange);
-      continue;
-    }
-    let [diff, audiofile] = newAudiofile ? await getLargestGap(startRange, endRange) : [endRange.start - startRange.end, endRange.audiofile];
-    if (diff <= 0) {
-      if (newAudiofile) {
-        const rangeLength = endRange.end - endRange.start;
-        diff = rangeLength < 0.5 ? rangeLength / 2 : 0.25;
-        endRange.start = diff;
-      } else {
-        diff = 0.25;
-        startRange.end = startRange.start - diff;
-      }
-    }
-    const interpolatedLength = diff / count;
-    const start = newAudiofile ? 0 : startRange.end;
-    for (let i2 = 0; i2 < count; i2++) {
-      let id = startRange.id + i2 + 1;
-      let chapterId = startRange.chapterId;
-      if (newChapter && i2 > chapterSentenceCounts[startRange.chapterId] - startRange.id) {
-        id = i2;
-        chapterId = endRange.chapterId;
-      }
-      interpolated.push({
-        id,
-        chapterId,
-        start: start + interpolatedLength * i2,
-        end: start + interpolatedLength * (i2 + 1),
-        audiofile
-      });
-    }
-    interpolated.push(endRange);
-  }
-  return interpolated;
-}
 function expandEmptySentenceRanges(sentenceRanges) {
   const expandedRanges = [];
   for (const sentenceRange of sentenceRanges) {
@@ -418,6 +361,5 @@ function getChapterDuration(sentenceRanges) {
   findEndTimestamp,
   getChapterDuration,
   getSentenceRanges,
-  interpolateSentenceRanges,
   mapTranscriptionTimeline
 });

package/dist/align/getSentenceRanges.d.cts CHANGED Viewed

@@ -54,7 +54,6 @@ declare function getSentenceRanges(transcriptionText: string, mappedTimeline: Ma
     firstFoundSentence: number;
     lastFoundSentence: number;
 }>;
-declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>): Promise<SentenceRange[]>;
 /**
  * Whisper sometimes provides words with no time information,
  * or start and end timestamps that are equal. EpubCheck complains
@@ -65,4 +64,4 @@ declare function expandEmptySentenceRanges<Range extends SentenceRange | WordRan
 declare function collapseSentenceRangeGaps(sentenceRanges: SentenceRange[]): Promise<SentenceRange[]>;
 declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
-export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges, mapTranscriptionTimeline };
+export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, mapTranscriptionTimeline };

package/dist/align/getSentenceRanges.d.ts CHANGED Viewed

@@ -54,7 +54,6 @@ declare function getSentenceRanges(transcriptionText: string, mappedTimeline: Ma
     firstFoundSentence: number;
     lastFoundSentence: number;
 }>;
-declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>): Promise<SentenceRange[]>;
 /**
  * Whisper sometimes provides words with no time information,
  * or start and end timestamps that are equal. EpubCheck complains
@@ -65,4 +64,4 @@ declare function expandEmptySentenceRanges<Range extends SentenceRange | WordRan
 declare function collapseSentenceRangeGaps(sentenceRanges: SentenceRange[]): Promise<SentenceRange[]>;
 declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
-export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges, mapTranscriptionTimeline };
+export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, mapTranscriptionTimeline };

package/dist/align/getSentenceRanges.js CHANGED Viewed

@@ -283,62 +283,6 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
     lastFoundSentence
   };
 }
-async function getLargestGap(trailing, leading) {
-  const leadingGap = leading.start;
-  const trailingGap = await getTrackDuration(trailing.audiofile) - trailing.end;
-  if (trailingGap > leadingGap) return [trailingGap, trailing.audiofile];
-  return [leadingGap, leading.audiofile];
-}
-async function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts) {
-  const interpolated = [];
-  for (let i = 0; i < sentenceRanges.length; i++) {
-    const endRange = sentenceRanges[i];
-    const startRange = sentenceRanges[i - 1] ?? {
-      id: 0,
-      audiofile: endRange.audiofile,
-      chapterId: endRange.chapterId,
-      start: 0,
-      end: 0
-    };
-    const newChapter = startRange.chapterId !== endRange.chapterId;
-    const newAudiofile = startRange.audiofile !== endRange.audiofile;
-    const count = newChapter ? chapterSentenceCounts[startRange.chapterId] - startRange.id - 1 : endRange.id - startRange.id - 1;
-    if (count === 0) {
-      interpolated.push(endRange);
-      continue;
-    }
-    let [diff, audiofile] = newAudiofile ? await getLargestGap(startRange, endRange) : [endRange.start - startRange.end, endRange.audiofile];
-    if (diff <= 0) {
-      if (newAudiofile) {
-        const rangeLength = endRange.end - endRange.start;
-        diff = rangeLength < 0.5 ? rangeLength / 2 : 0.25;
-        endRange.start = diff;
-      } else {
-        diff = 0.25;
-        startRange.end = startRange.start - diff;
-      }
-    }
-    const interpolatedLength = diff / count;
-    const start = newAudiofile ? 0 : startRange.end;
-    for (let i2 = 0; i2 < count; i2++) {
-      let id = startRange.id + i2 + 1;
-      let chapterId = startRange.chapterId;
-      if (newChapter && i2 > chapterSentenceCounts[startRange.chapterId] - startRange.id) {
-        id = i2;
-        chapterId = endRange.chapterId;
-      }
-      interpolated.push({
-        id,
-        chapterId,
-        start: start + interpolatedLength * i2,
-        end: start + interpolatedLength * (i2 + 1),
-        audiofile
-      });
-    }
-    interpolated.push(endRange);
-  }
-  return interpolated;
-}
 function expandEmptySentenceRanges(sentenceRanges) {
   const expandedRanges = [];
   for (const sentenceRange of sentenceRanges) {
@@ -389,6 +333,5 @@ export {
   findEndTimestamp,
   getChapterDuration,
   getSentenceRanges,
-  interpolateSentenceRanges,
   mapTranscriptionTimeline
 };

package/dist/align/interpolateSentenceRanges.cjs ADDED Viewed

@@ -0,0 +1,124 @@
+"use strict";
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+var interpolateSentenceRanges_exports = {};
+__export(interpolateSentenceRanges_exports, {
+  interpolateSentenceRanges: () => interpolateSentenceRanges
+});
+module.exports = __toCommonJS(interpolateSentenceRanges_exports);
+function buildGapRanges(slots, left, right, audioFileDurations) {
+  const n = slots.length;
+  if (n === 0) return [];
+  if (left.audiofile === right.audiofile) {
+    const span = right.time - left.time;
+    return slots.map((slot, i) => ({
+      ...slot,
+      audiofile: left.audiofile,
+      start: left.time + span * i / n,
+      end: left.time + span * (i + 1) / n
+    }));
+  }
+  const leftDuration = audioFileDurations[left.audiofile] ?? left.time;
+  const leftAvail = leftDuration - left.time;
+  const rightAvail = right.time;
+  const total = leftAvail + rightAvail;
+  let n1 = total > 0 ? Math.round(n * (leftAvail / total)) : n;
+  let n2 = n - n1;
+  n1 = Math.max(0, n1);
+  n2 = n - n1;
+  const result = [];
+  if (n1 > 0) {
+    for (let i = 0; i < n1; i++) {
+      result.push({
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        ...slots[i],
+        audiofile: left.audiofile,
+        start: left.time + leftAvail * i / n1,
+        end: left.time + leftAvail * (i + 1) / n1
+      });
+    }
+  }
+  if (n2 > 0) {
+    for (let i = 0; i < n2; i++) {
+      result.push({
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        ...slots[n1 + i],
+        audiofile: right.audiofile,
+        start: rightAvail * i / n2,
+        end: rightAvail * (i + 1) / n2
+      });
+    }
+  }
+  return result;
+}
+function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts, audioFileDurations) {
+  if (sentenceRanges.length === 0) return [];
+  const result = [];
+  const first = sentenceRanges[0];
+  if (first.id > 0) {
+    const slots = Array.from({ length: first.id }, (_, i) => ({
+      chapterId: first.chapterId,
+      id: i
+    }));
+    const left = { time: 0, audiofile: first.audiofile };
+    const right = { time: first.start, audiofile: first.audiofile };
+    result.push(...buildGapRanges(slots, left, right, audioFileDurations));
+  }
+  result.push(first);
+  for (let idx = 1; idx < sentenceRanges.length; idx++) {
+    const prev = sentenceRanges[idx - 1];
+    const curr = sentenceRanges[idx];
+    const left = { time: prev.end, audiofile: prev.audiofile };
+    const right = { time: curr.start, audiofile: curr.audiofile };
+    const gapSlots = [];
+    if (prev.chapterId === curr.chapterId) {
+      for (let id = prev.id + 1; id < curr.id; id++) {
+        gapSlots.push({ chapterId: prev.chapterId, id });
+      }
+    } else {
+      const prevTotal = chapterSentenceCounts[prev.chapterId] ?? prev.id + 1;
+      for (let id = prev.id + 1; id < prevTotal; id++) {
+        gapSlots.push({ chapterId: prev.chapterId, id });
+      }
+      for (let id = 0; id < curr.id; id++) {
+        gapSlots.push({ chapterId: curr.chapterId, id });
+      }
+    }
+    if (gapSlots.length > 0) {
+      result.push(...buildGapRanges(gapSlots, left, right, audioFileDurations));
+    }
+    result.push(curr);
+  }
+  const last = sentenceRanges[sentenceRanges.length - 1];
+  const lastTotal = chapterSentenceCounts[last.chapterId] ?? last.id + 1;
+  if (last.id < lastTotal - 1) {
+    const slots = Array.from(
+      { length: lastTotal - 1 - last.id },
+      (_, i) => ({ chapterId: last.chapterId, id: last.id + 1 + i })
+    );
+    const fileEnd = audioFileDurations[last.audiofile] ?? last.end;
+    const left = { time: last.end, audiofile: last.audiofile };
+    const right = { time: fileEnd, audiofile: last.audiofile };
+    result.push(...buildGapRanges(slots, left, right, audioFileDurations));
+  }
+  return result;
+}
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  interpolateSentenceRanges
+});

package/dist/align/interpolateSentenceRanges.d.cts ADDED Viewed

@@ -0,0 +1,23 @@
+import { SentenceRange } from './getSentenceRanges.cjs';
+import '@storyteller-platform/ghost-story';
+import '@echogarden/text-segmentation';
+import '@storyteller-platform/transliteration';
+/**
+ * Given a sequence of sentence ranges from an entire book,
+ * ordered by occurrence in audio, interpolates sentence ranges
+ * to fill any gaps.
+ *
+ * A gap may be:
+ *   - A non-linearity between two sequential sentence ranges
+ *     in the same chapter, e.g. chapter001#0 -> chapter001#3
+ *   - A chapter whose sentence ranges start at a number greater
+ *     than 0, e.g. chapter001#330 -> chapter002#2
+ *   - A chapter whose sentence ranges end at a number lower
+ *     than the total number of sentences in that chapter,
+ *     e.g. chapter001#325 -> chapter002#0, where
+ *     chapterSentenceCounts["chapter001"] === 330
+ */
+declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>, audioFileDurations: Record<string, number>): SentenceRange[];
+export { interpolateSentenceRanges };

package/dist/align/interpolateSentenceRanges.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+import { SentenceRange } from './getSentenceRanges.js';
+import '@storyteller-platform/ghost-story';
+import '@echogarden/text-segmentation';
+import '@storyteller-platform/transliteration';
+/**
+ * Given a sequence of sentence ranges from an entire book,
+ * ordered by occurrence in audio, interpolates sentence ranges
+ * to fill any gaps.
+ *
+ * A gap may be:
+ *   - A non-linearity between two sequential sentence ranges
+ *     in the same chapter, e.g. chapter001#0 -> chapter001#3
+ *   - A chapter whose sentence ranges start at a number greater
+ *     than 0, e.g. chapter001#330 -> chapter002#2
+ *   - A chapter whose sentence ranges end at a number lower
+ *     than the total number of sentences in that chapter,
+ *     e.g. chapter001#325 -> chapter002#0, where
+ *     chapterSentenceCounts["chapter001"] === 330
+ */
+declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>, audioFileDurations: Record<string, number>): SentenceRange[];
+export { interpolateSentenceRanges };

package/dist/align/interpolateSentenceRanges.js ADDED Viewed

@@ -0,0 +1,101 @@
+import "../chunk-BIEQXUOY.js";
+function buildGapRanges(slots, left, right, audioFileDurations) {
+  const n = slots.length;
+  if (n === 0) return [];
+  if (left.audiofile === right.audiofile) {
+    const span = right.time - left.time;
+    return slots.map((slot, i) => ({
+      ...slot,
+      audiofile: left.audiofile,
+      start: left.time + span * i / n,
+      end: left.time + span * (i + 1) / n
+    }));
+  }
+  const leftDuration = audioFileDurations[left.audiofile] ?? left.time;
+  const leftAvail = leftDuration - left.time;
+  const rightAvail = right.time;
+  const total = leftAvail + rightAvail;
+  let n1 = total > 0 ? Math.round(n * (leftAvail / total)) : n;
+  let n2 = n - n1;
+  n1 = Math.max(0, n1);
+  n2 = n - n1;
+  const result = [];
+  if (n1 > 0) {
+    for (let i = 0; i < n1; i++) {
+      result.push({
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        ...slots[i],
+        audiofile: left.audiofile,
+        start: left.time + leftAvail * i / n1,
+        end: left.time + leftAvail * (i + 1) / n1
+      });
+    }
+  }
+  if (n2 > 0) {
+    for (let i = 0; i < n2; i++) {
+      result.push({
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        ...slots[n1 + i],
+        audiofile: right.audiofile,
+        start: rightAvail * i / n2,
+        end: rightAvail * (i + 1) / n2
+      });
+    }
+  }
+  return result;
+}
+function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts, audioFileDurations) {
+  if (sentenceRanges.length === 0) return [];
+  const result = [];
+  const first = sentenceRanges[0];
+  if (first.id > 0) {
+    const slots = Array.from({ length: first.id }, (_, i) => ({
+      chapterId: first.chapterId,
+      id: i
+    }));
+    const left = { time: 0, audiofile: first.audiofile };
+    const right = { time: first.start, audiofile: first.audiofile };
+    result.push(...buildGapRanges(slots, left, right, audioFileDurations));
+  }
+  result.push(first);
+  for (let idx = 1; idx < sentenceRanges.length; idx++) {
+    const prev = sentenceRanges[idx - 1];
+    const curr = sentenceRanges[idx];
+    const left = { time: prev.end, audiofile: prev.audiofile };
+    const right = { time: curr.start, audiofile: curr.audiofile };
+    const gapSlots = [];
+    if (prev.chapterId === curr.chapterId) {
+      for (let id = prev.id + 1; id < curr.id; id++) {
+        gapSlots.push({ chapterId: prev.chapterId, id });
+      }
+    } else {
+      const prevTotal = chapterSentenceCounts[prev.chapterId] ?? prev.id + 1;
+      for (let id = prev.id + 1; id < prevTotal; id++) {
+        gapSlots.push({ chapterId: prev.chapterId, id });
+      }
+      for (let id = 0; id < curr.id; id++) {
+        gapSlots.push({ chapterId: curr.chapterId, id });
+      }
+    }
+    if (gapSlots.length > 0) {
+      result.push(...buildGapRanges(gapSlots, left, right, audioFileDurations));
+    }
+    result.push(curr);
+  }
+  const last = sentenceRanges[sentenceRanges.length - 1];
+  const lastTotal = chapterSentenceCounts[last.chapterId] ?? last.id + 1;
+  if (last.id < lastTotal - 1) {
+    const slots = Array.from(
+      { length: lastTotal - 1 - last.id },
+      (_, i) => ({ chapterId: last.chapterId, id: last.id + 1 + i })
+    );
+    const fileEnd = audioFileDurations[last.audiofile] ?? last.end;
+    const left = { time: last.end, audiofile: last.audiofile };
+    const right = { time: fileEnd, audiofile: last.audiofile };
+    result.push(...buildGapRanges(slots, left, right, audioFileDurations));
+  }
+  return result;
+}
+export {
+  interpolateSentenceRanges
+};

package/dist/align/search.cjs CHANGED Viewed

@@ -37,16 +37,16 @@ function buildNgramIndex(text) {
   }
   return index;
 }
+const NGRAM_SIZE = 5;
 function* ngrams(text) {
   const words = text.split("-");
-  let pos = 0;
-  for (const i of (0, import_itertools.range)(words.length - 4)) {
-    const ngram = words.slice(i, i + 5).join("-");
-    yield [ngram, pos];
-    pos += words[i].length + 1;
+  for (const i of (0, import_itertools.range)(words.length - NGRAM_SIZE - 1)) {
+    const ngram = words.slice(i, i + NGRAM_SIZE).join("-");
+    yield [ngram, i];
   }
 }
 function collectBoundaryVotes(query, document) {
+  const queryWords = query.split("-");
   const documentIndex = buildNgramIndex(document);
   let skippedNgrams = 0;
   let totalNgrams = 0;
@@ -61,7 +61,7 @@ function collectBoundaryVotes(query, document) {
     }
     for (const documentStart of documentStarts) {
       startVotes.push(documentStart - start);
-      endVotes.push(documentStart + (query.length - start));
+      endVotes.push(documentStart + (queryWords.length - start));
     }
   }
   if (skippedNgrams > totalNgrams / 2) {
@@ -97,6 +97,14 @@ function chooseBestFromBins(bins, dir) {
   }
   return dir > 0 ? (0, import_itertools.max)(best) ?? null : (0, import_itertools.min)(best) ?? null;
 }
+function getOffsetFromWordIndex(wordIndex, document) {
+  const words = document.split("-");
+  let offset = 0;
+  for (const i of (0, import_itertools.range)(Math.min(words.length, Math.max(0, wordIndex)))) {
+    offset += words[i].length + 1;
+  }
+  return offset;
+}
 function findBoundaries(query, document) {
   const boundaryVotes = collectBoundaryVotes(query, document);
   if (!boundaryVotes) return null;
@@ -111,7 +119,10 @@ function findBoundaries(query, document) {
   if (bestEnd === null) {
     return null;
   }
-  return { start: bestStart, end: bestEnd };
+  return {
+    start: getOffsetFromWordIndex(bestStart, document),
+    end: getOffsetFromWordIndex(bestEnd, document)
+  };
 }
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {