npm - @storyteller-platform/align - Versions diffs - 0.1.13 → 0.1.14 - Mend

@storyteller-platform/align 0.1.13 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/align/align.cjs +42 -117
package/dist/align/align.d.cts +14 -1
package/dist/align/align.d.ts +14 -1
package/dist/align/align.js +42 -117
package/dist/align/getSentenceRanges.cjs +165 -36
package/dist/align/getSentenceRanges.d.cts +8 -2
package/dist/align/getSentenceRanges.d.ts +8 -2
package/dist/align/getSentenceRanges.js +165 -36
package/dist/align/search.cjs +122 -0
package/dist/align/search.d.cts +12 -0
package/dist/align/search.d.ts +12 -0
package/dist/align/search.js +96 -0
package/dist/errorAlign/utils.d.cts +1 -1
package/dist/errorAlign/utils.d.ts +1 -1
package/package.json +3 -3
package/dist/align/fuzzy.cjs +0 -164
package/dist/align/fuzzy.d.cts +0 -6
package/dist/align/fuzzy.d.ts +0 -6
package/dist/align/fuzzy.js +0 -141

package/dist/align/align.cjs CHANGED Viewed

@@ -81,16 +81,14 @@ module.exports = __toCommonJS(align_exports);
 var import_promises = require("node:fs/promises");
 var import_node_path = require("node:path");
 var import_posix = require("node:path/posix");
-var import_itertools = require("itertools");
 var import_memoize = __toESM(require("memoize"), 1);
-var import_runes2 = require("runes2");
 var import_audiobook = require("@storyteller-platform/audiobook");
 var import_epub = require("@storyteller-platform/epub");
 var import_ghost_story = require("@storyteller-platform/ghost-story");
 var import_ffmpeg = require("../common/ffmpeg.cjs");
 var import_segmentation = require("../markup/segmentation.cjs");
-var import_fuzzy = require("./fuzzy.cjs");
 var import_getSentenceRanges = require("./getSentenceRanges.cjs");
+var import_search = require("./search.cjs");
 var import_slugify = require("./slugify.cjs");
 async function align(input, output, transcriptionsDir, audiobookDir, options) {
   var _stack = [];
@@ -167,83 +165,6 @@ class Aligner {
   report = {
     chapters: []
   };
-  findBestOffset(epubSentences, transcriptionText, lastMatchOffset, dir = 1) {
-    const reverse = dir < 0;
-    if (dir < 0) {
-      epubSentences = epubSentences.toReversed().map((s) => (0, import_runes2.runes)(s).toReversed().join(""));
-      transcriptionText = (0, import_runes2.runes)(transcriptionText).toReversed().join("");
-      lastMatchOffset = transcriptionText.length - lastMatchOffset;
-    }
-    const flatSliceIndices = [
-      0,
-      ...this.alignedChapters.toSorted(
-        (a, b) => reverse ? transcriptionText.length - a.endOffset - (transcriptionText.length - b.endOffset) : a.startOffset - b.startOffset
-      ).flatMap((aligned) => [
-        reverse ? transcriptionText.length - aligned.endOffset : aligned.startOffset,
-        reverse ? transcriptionText.length - aligned.startOffset : aligned.endOffset
-      ]),
-      transcriptionText.length
-    ];
-    const sliceIndices = [];
-    for (let i = 0; i < flatSliceIndices.length - 1; i += 2) {
-      sliceIndices.push([flatSliceIndices[i], flatSliceIndices[i + 1]]);
-    }
-    const allSlices = [];
-    let startSlice = 0;
-    for (const [i, [start, end]] of (0, import_itertools.enumerate)(sliceIndices)) {
-      if (lastMatchOffset >= start && lastMatchOffset < end) {
-        if (!reverse) {
-          startSlice = i + 1;
-          allSlices.push({
-            start,
-            text: transcriptionText.slice(start, lastMatchOffset)
-          });
-        }
-        allSlices.push({
-          start: lastMatchOffset,
-          text: transcriptionText.slice(lastMatchOffset, end)
-        });
-      } else if (!reverse) {
-        allSlices.push({ start, text: transcriptionText.slice(start, end) });
-      }
-    }
-    const slices = allSlices.filter((slice) => slice.text.length);
-    if (reverse && !slices.length) {
-      const indices = sliceIndices.find(([start]) => start > lastMatchOffset);
-      if (indices) {
-        slices.push({
-          start: indices[0],
-          text: transcriptionText.slice(...indices)
-        });
-      }
-    }
-    for (const slice of slices.slice(startSlice).concat(slices.slice(0, startSlice))) {
-      let startSentence = 0;
-      while (startSentence < epubSentences.length) {
-        const needle = epubSentences.slice(startSentence, startSentence + 6).join("-");
-        const firstMatch = (0, import_fuzzy.findNearestMatch)(
-          needle,
-          slice.text,
-          Math.max(Math.floor(0.1 * needle.length), 1)
-        );
-        if (firstMatch) {
-          const start = reverse ? transcriptionText.length - (slice.start + firstMatch.index) : slice.start + firstMatch.index;
-          return {
-            startSentence: reverse ? epubSentences.length - startSentence : startSentence,
-            transcriptionOffset: start
-          };
-        }
-        startSentence += 3;
-      }
-    }
-    if (reverse) {
-      return {
-        startSentence: epubSentences.length,
-        transcriptionOffset: slices[0] ? transcriptionText.length - slices[0].start : null
-      };
-    }
-    return { startSentence: 0, transcriptionOffset: null };
-  }
   async getChapterSentences(chapterId) {
     const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
     const { result: segmentation } = await (0, import_segmentation.getXhtmlSegmentation)(
@@ -306,7 +227,7 @@ class Aligner {
       value: import_epub.Epub.formatSmilDuration(chapterDuration)
     });
   }
-  addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, transcriptionOffset) {
+  addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, endSentence, transcriptionOffset) {
     this.report.chapters.push({
       href: chapter.href,
       transcriptionOffset,
@@ -330,6 +251,14 @@ class Aligner {
         matchedSentence: chapterSentences[startSentence],
         nextSentence: chapterSentences[startSentence + 1] ?? null
       },
+      lastMatchedSentenceId: endSentence,
+      lastMatchedSentenceContext: {
+        prevSentence: chapterSentences[endSentence - 1] ?? null,
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        matchedSentence: chapterSentences[endSentence],
+        nextSentence: chapterSentences[endSentence + 1] ?? null
+      },
+      chapterSentenceCount: sentenceRanges.length,
       audioFiles: sentenceRanges.reduce((acc, range) => {
         const existing = acc.find(
           (context) => context.filepath === range.audiofile
@@ -347,7 +276,7 @@ class Aligner {
       }, [])
     });
   }
-  async alignChapter(startSentence, endSentence, chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
+  async alignChapter(chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
     const timing = (0, import_ghost_story.createTiming)();
     timing.start("read contents");
     const manifest = await this.epub.getManifest();
@@ -362,9 +291,12 @@ class Aligner {
     const chapterSentences = await this.getChapterSentences(chapterId);
     timing.end("split to sentences");
     timing.start("align sentences");
-    const { sentenceRanges, transcriptionOffset: endTranscriptionOffset } = await (0, import_getSentenceRanges.getSentenceRanges)(
-      startSentence,
-      endSentence,
+    const {
+      sentenceRanges,
+      transcriptionOffset: endTranscriptionOffset,
+      firstFoundSentence,
+      lastFoundSentence
+    } = await (0, import_getSentenceRanges.getSentenceRanges)(
       this.transcription,
       chapterSentences,
       transcriptionOffset,
@@ -392,7 +324,8 @@ class Aligner {
       chapter,
       chapterSentences,
       sentenceRanges,
-      startSentence,
+      firstFoundSentence,
+      lastFoundSentence,
       transcriptionOffset
     );
     return {
@@ -401,8 +334,20 @@ class Aligner {
       timing
     };
   }
+  narrowToAvailableBoundary(boundary) {
+    const narrowed = { ...boundary };
+    for (const chapter of this.alignedChapters) {
+      if (chapter.startOffset > narrowed.start && chapter.startOffset <= narrowed.end) {
+        narrowed.end = chapter.startOffset - 1;
+      }
+      if (chapter.endOffset < narrowed.end && chapter.endOffset >= narrowed.start) {
+        narrowed.start = chapter.endOffset + 1;
+      }
+    }
+    return narrowed;
+  }
   async alignBook(onProgress) {
-    var _a, _b, _c, _d, _e, _f, _g, _h;
+    var _a, _b, _c, _d, _e, _f;
     const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
     this.timing.setMetadata("language", locale.toString());
     this.timing.setMetadata("granularity", this.granularity);
@@ -412,7 +357,6 @@ class Aligner {
       this.transcription.transcript,
       locale
     );
-    let lastTranscriptionOffset = 0;
     for (let index = 0; index < spine.length; index++) {
       onProgress == null ? void 0 : onProgress(index / spine.length);
       const spineItem = spine[index];
@@ -441,48 +385,29 @@ class Aligner {
         );
         continue;
       }
-      const { startSentence, transcriptionOffset: slugifiedOffset } = this.findBestOffset(
-        slugifiedChapterSentences,
-        transcriptionText,
-        mapping.map(lastTranscriptionOffset, -1)
+      const boundaries = (0, import_search.findBoundaries)(
+        slugifiedChapterSentences.join("-"),
+        transcriptionText
       );
-      if (slugifiedOffset === null) {
+      if (!boundaries) {
         (_f = this.logger) == null ? void 0 : _f.info(
-          `Couldn't find matching transcription for chapter #${index}`
+          `Could not find chapter #${index} in the transcripton`
         );
         continue;
       }
-      const transcriptionOffset = mapping.invert().map(slugifiedOffset, -1);
-      const {
-        startSentence: startEndSentence,
-        transcriptionOffset: slugifiedEndOffset
-      } = this.findBestOffset(
-        slugifiedChapterSentences,
-        transcriptionText,
-        Math.min(
-          transcriptionText.length,
-          slugifiedOffset + Math.round(slugifiedChapterSentences.join("-").length * 1.2)
-        ),
-        -1
-      );
-      const endSentence = startEndSentence;
-      const endOffset = slugifiedEndOffset === null ? this.transcription.transcript.length : mapping.invert().map(slugifiedEndOffset, 1);
-      if (endSentence - startSentence < slugifiedChapterSentences.length / 2) {
-        (_g = this.logger) == null ? void 0 : _g.info(`Found less than half of chapter #${index}, skipping`);
+      const { start, end } = this.narrowToAvailableBoundary(boundaries);
+      if (start === end) {
+        continue;
       }
-      (_h = this.logger) == null ? void 0 : _h.info(
-        `Chapter #${index} best matches transcription from ${transcriptionOffset} to ${endOffset}, from sentence ${startSentence} to ${endSentence} (of ${slugifiedChapterSentences.length}) in the book`
-      );
+      const transcriptionOffset = mapping.invert().map(Math.max(start, 0), -1);
+      const endOffset = mapping.invert().map(Math.min(end, transcriptionText.length), 1);
       const result = await this.alignChapter(
-        startSentence,
-        endSentence,
         chapterId,
         transcriptionOffset,
         endOffset,
         locale,
         mapping
       );
-      lastTranscriptionOffset = result.endTranscriptionOffset;
       this.timing.add(result.timing.summary());
     }
     const audioOrderedChapters = this.alignedChapters.toSorted((a, b) => {

package/dist/align/align.d.cts CHANGED Viewed

@@ -22,6 +22,13 @@ interface ChapterReport {
         matchedSentence: string;
         nextSentence: string | null;
     };
+    lastMatchedSentenceId: number;
+    lastMatchedSentenceContext: {
+        prevSentence: string | null;
+        matchedSentence: string;
+        nextSentence: string | null;
+    };
+    chapterSentenceCount: number;
     audioFiles: AudioFileContext[];
 }
 interface Report {
@@ -47,11 +54,17 @@ declare class Aligner {
     private granularity;
     report: Report;
     constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
-    private findBestOffset;
     private getChapterSentences;
     private writeAlignedChapter;
     private addChapterReport;
     private alignChapter;
+    narrowToAvailableBoundary(boundary: {
+        start: number;
+        end: number;
+    }): {
+        start: number;
+        end: number;
+    };
     alignBook(onProgress?: ((progress: number) => void) | null): Promise<_storyteller_platform_ghost_story.TimingAggregator>;
 }
 declare function concatTranscriptions(transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], audiofiles: string[]): StorytellerTranscription;

package/dist/align/align.d.ts CHANGED Viewed

@@ -22,6 +22,13 @@ interface ChapterReport {
         matchedSentence: string;
         nextSentence: string | null;
     };
+    lastMatchedSentenceId: number;
+    lastMatchedSentenceContext: {
+        prevSentence: string | null;
+        matchedSentence: string;
+        nextSentence: string | null;
+    };
+    chapterSentenceCount: number;
     audioFiles: AudioFileContext[];
 }
 interface Report {
@@ -47,11 +54,17 @@ declare class Aligner {
     private granularity;
     report: Report;
     constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
-    private findBestOffset;
     private getChapterSentences;
     private writeAlignedChapter;
     private addChapterReport;
     private alignChapter;
+    narrowToAvailableBoundary(boundary: {
+        start: number;
+        end: number;
+    }): {
+        start: number;
+        end: number;
+    };
     alignBook(onProgress?: ((progress: number) => void) | null): Promise<_storyteller_platform_ghost_story.TimingAggregator>;
 }
 declare function concatTranscriptions(transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], audiofiles: string[]): StorytellerTranscription;

package/dist/align/align.js CHANGED Viewed

@@ -5,9 +5,7 @@ import {
 import { copyFile, mkdir, readFile, readdir, writeFile } from "node:fs/promises";
 import { dirname as autoDirname, join as autoJoin } from "node:path";
 import { basename, dirname, parse, relative } from "node:path/posix";
-import { enumerate } from "itertools";
 import memoize from "memoize";
-import { runes } from "runes2";
 import { isAudioFile, lookupAudioMime } from "@storyteller-platform/audiobook";
 import {
   Epub
@@ -18,13 +16,13 @@ import {
 } from "@storyteller-platform/ghost-story";
 import { getTrackDuration } from "../common/ffmpeg.js";
 import { getXhtmlSegmentation } from "../markup/segmentation.js";
-import { findNearestMatch } from "./fuzzy.js";
 import {
   expandEmptySentenceRanges,
   getChapterDuration,
   getSentenceRanges,
   interpolateSentenceRanges
 } from "./getSentenceRanges.js";
+import { findBoundaries } from "./search.js";
 import { slugify } from "./slugify.js";
 async function align(input, output, transcriptionsDir, audiobookDir, options) {
   var _stack = [];
@@ -101,83 +99,6 @@ class Aligner {
   report = {
     chapters: []
   };
-  findBestOffset(epubSentences, transcriptionText, lastMatchOffset, dir = 1) {
-    const reverse = dir < 0;
-    if (dir < 0) {
-      epubSentences = epubSentences.toReversed().map((s) => runes(s).toReversed().join(""));
-      transcriptionText = runes(transcriptionText).toReversed().join("");
-      lastMatchOffset = transcriptionText.length - lastMatchOffset;
-    }
-    const flatSliceIndices = [
-      0,
-      ...this.alignedChapters.toSorted(
-        (a, b) => reverse ? transcriptionText.length - a.endOffset - (transcriptionText.length - b.endOffset) : a.startOffset - b.startOffset
-      ).flatMap((aligned) => [
-        reverse ? transcriptionText.length - aligned.endOffset : aligned.startOffset,
-        reverse ? transcriptionText.length - aligned.startOffset : aligned.endOffset
-      ]),
-      transcriptionText.length
-    ];
-    const sliceIndices = [];
-    for (let i = 0; i < flatSliceIndices.length - 1; i += 2) {
-      sliceIndices.push([flatSliceIndices[i], flatSliceIndices[i + 1]]);
-    }
-    const allSlices = [];
-    let startSlice = 0;
-    for (const [i, [start, end]] of enumerate(sliceIndices)) {
-      if (lastMatchOffset >= start && lastMatchOffset < end) {
-        if (!reverse) {
-          startSlice = i + 1;
-          allSlices.push({
-            start,
-            text: transcriptionText.slice(start, lastMatchOffset)
-          });
-        }
-        allSlices.push({
-          start: lastMatchOffset,
-          text: transcriptionText.slice(lastMatchOffset, end)
-        });
-      } else if (!reverse) {
-        allSlices.push({ start, text: transcriptionText.slice(start, end) });
-      }
-    }
-    const slices = allSlices.filter((slice) => slice.text.length);
-    if (reverse && !slices.length) {
-      const indices = sliceIndices.find(([start]) => start > lastMatchOffset);
-      if (indices) {
-        slices.push({
-          start: indices[0],
-          text: transcriptionText.slice(...indices)
-        });
-      }
-    }
-    for (const slice of slices.slice(startSlice).concat(slices.slice(0, startSlice))) {
-      let startSentence = 0;
-      while (startSentence < epubSentences.length) {
-        const needle = epubSentences.slice(startSentence, startSentence + 6).join("-");
-        const firstMatch = findNearestMatch(
-          needle,
-          slice.text,
-          Math.max(Math.floor(0.1 * needle.length), 1)
-        );
-        if (firstMatch) {
-          const start = reverse ? transcriptionText.length - (slice.start + firstMatch.index) : slice.start + firstMatch.index;
-          return {
-            startSentence: reverse ? epubSentences.length - startSentence : startSentence,
-            transcriptionOffset: start
-          };
-        }
-        startSentence += 3;
-      }
-    }
-    if (reverse) {
-      return {
-        startSentence: epubSentences.length,
-        transcriptionOffset: slices[0] ? transcriptionText.length - slices[0].start : null
-      };
-    }
-    return { startSentence: 0, transcriptionOffset: null };
-  }
   async getChapterSentences(chapterId) {
     const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
     const { result: segmentation } = await getXhtmlSegmentation(
@@ -240,7 +161,7 @@ class Aligner {
       value: Epub.formatSmilDuration(chapterDuration)
     });
   }
-  addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, transcriptionOffset) {
+  addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, endSentence, transcriptionOffset) {
     this.report.chapters.push({
       href: chapter.href,
       transcriptionOffset,
@@ -264,6 +185,14 @@ class Aligner {
         matchedSentence: chapterSentences[startSentence],
         nextSentence: chapterSentences[startSentence + 1] ?? null
       },
+      lastMatchedSentenceId: endSentence,
+      lastMatchedSentenceContext: {
+        prevSentence: chapterSentences[endSentence - 1] ?? null,
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        matchedSentence: chapterSentences[endSentence],
+        nextSentence: chapterSentences[endSentence + 1] ?? null
+      },
+      chapterSentenceCount: sentenceRanges.length,
       audioFiles: sentenceRanges.reduce((acc, range) => {
         const existing = acc.find(
           (context) => context.filepath === range.audiofile
@@ -281,7 +210,7 @@ class Aligner {
       }, [])
     });
   }
-  async alignChapter(startSentence, endSentence, chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
+  async alignChapter(chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
     const timing = createTiming();
     timing.start("read contents");
     const manifest = await this.epub.getManifest();
@@ -296,9 +225,12 @@ class Aligner {
     const chapterSentences = await this.getChapterSentences(chapterId);
     timing.end("split to sentences");
     timing.start("align sentences");
-    const { sentenceRanges, transcriptionOffset: endTranscriptionOffset } = await getSentenceRanges(
-      startSentence,
-      endSentence,
+    const {
+      sentenceRanges,
+      transcriptionOffset: endTranscriptionOffset,
+      firstFoundSentence,
+      lastFoundSentence
+    } = await getSentenceRanges(
       this.transcription,
       chapterSentences,
       transcriptionOffset,
@@ -326,7 +258,8 @@ class Aligner {
       chapter,
       chapterSentences,
       sentenceRanges,
-      startSentence,
+      firstFoundSentence,
+      lastFoundSentence,
       transcriptionOffset
     );
     return {
@@ -335,8 +268,20 @@ class Aligner {
       timing
     };
   }
+  narrowToAvailableBoundary(boundary) {
+    const narrowed = { ...boundary };
+    for (const chapter of this.alignedChapters) {
+      if (chapter.startOffset > narrowed.start && chapter.startOffset <= narrowed.end) {
+        narrowed.end = chapter.startOffset - 1;
+      }
+      if (chapter.endOffset < narrowed.end && chapter.endOffset >= narrowed.start) {
+        narrowed.start = chapter.endOffset + 1;
+      }
+    }
+    return narrowed;
+  }
   async alignBook(onProgress) {
-    var _a, _b, _c, _d, _e, _f, _g, _h;
+    var _a, _b, _c, _d, _e, _f;
     const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
     this.timing.setMetadata("language", locale.toString());
     this.timing.setMetadata("granularity", this.granularity);
@@ -346,7 +291,6 @@ class Aligner {
       this.transcription.transcript,
       locale
     );
-    let lastTranscriptionOffset = 0;
     for (let index = 0; index < spine.length; index++) {
       onProgress == null ? void 0 : onProgress(index / spine.length);
       const spineItem = spine[index];
@@ -375,48 +319,29 @@ class Aligner {
         );
         continue;
       }
-      const { startSentence, transcriptionOffset: slugifiedOffset } = this.findBestOffset(
-        slugifiedChapterSentences,
-        transcriptionText,
-        mapping.map(lastTranscriptionOffset, -1)
+      const boundaries = findBoundaries(
+        slugifiedChapterSentences.join("-"),
+        transcriptionText
       );
-      if (slugifiedOffset === null) {
+      if (!boundaries) {
         (_f = this.logger) == null ? void 0 : _f.info(
-          `Couldn't find matching transcription for chapter #${index}`
+          `Could not find chapter #${index} in the transcripton`
         );
         continue;
       }
-      const transcriptionOffset = mapping.invert().map(slugifiedOffset, -1);
-      const {
-        startSentence: startEndSentence,
-        transcriptionOffset: slugifiedEndOffset
-      } = this.findBestOffset(
-        slugifiedChapterSentences,
-        transcriptionText,
-        Math.min(
-          transcriptionText.length,
-          slugifiedOffset + Math.round(slugifiedChapterSentences.join("-").length * 1.2)
-        ),
-        -1
-      );
-      const endSentence = startEndSentence;
-      const endOffset = slugifiedEndOffset === null ? this.transcription.transcript.length : mapping.invert().map(slugifiedEndOffset, 1);
-      if (endSentence - startSentence < slugifiedChapterSentences.length / 2) {
-        (_g = this.logger) == null ? void 0 : _g.info(`Found less than half of chapter #${index}, skipping`);
+      const { start, end } = this.narrowToAvailableBoundary(boundaries);
+      if (start === end) {
+        continue;
       }
-      (_h = this.logger) == null ? void 0 : _h.info(
-        `Chapter #${index} best matches transcription from ${transcriptionOffset} to ${endOffset}, from sentence ${startSentence} to ${endSentence} (of ${slugifiedChapterSentences.length}) in the book`
-      );
+      const transcriptionOffset = mapping.invert().map(Math.max(start, 0), -1);
+      const endOffset = mapping.invert().map(Math.min(end, transcriptionText.length), 1);
       const result = await this.alignChapter(
-        startSentence,
-        endSentence,
         chapterId,
         transcriptionOffset,
         endOffset,
         locale,
         mapping
       );
-      lastTranscriptionOffset = result.endTranscriptionOffset;
       this.timing.add(result.timing.summary());
     }
     const audioOrderedChapters = this.alignedChapters.toSorted((a, b) => {