@storyteller-platform/align 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,16 +81,14 @@ module.exports = __toCommonJS(align_exports);
81
81
  var import_promises = require("node:fs/promises");
82
82
  var import_node_path = require("node:path");
83
83
  var import_posix = require("node:path/posix");
84
- var import_itertools = require("itertools");
85
84
  var import_memoize = __toESM(require("memoize"), 1);
86
- var import_runes2 = require("runes2");
87
85
  var import_audiobook = require("@storyteller-platform/audiobook");
88
86
  var import_epub = require("@storyteller-platform/epub");
89
87
  var import_ghost_story = require("@storyteller-platform/ghost-story");
90
88
  var import_ffmpeg = require("../common/ffmpeg.cjs");
91
89
  var import_segmentation = require("../markup/segmentation.cjs");
92
- var import_fuzzy = require("./fuzzy.cjs");
93
90
  var import_getSentenceRanges = require("./getSentenceRanges.cjs");
91
+ var import_search = require("./search.cjs");
94
92
  var import_slugify = require("./slugify.cjs");
95
93
  async function align(input, output, transcriptionsDir, audiobookDir, options) {
96
94
  var _stack = [];
@@ -111,7 +109,17 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
111
109
  (contents) => contents.map(
112
110
  (c) => JSON.parse(c)
113
111
  )
114
- );
112
+ ).then((transcriptions2) => {
113
+ return transcriptions2.map((transcription) => {
114
+ if ("wordTimeline" in transcription) {
115
+ return {
116
+ ...transcription,
117
+ timeline: transcription.wordTimeline
118
+ };
119
+ }
120
+ return transcription;
121
+ });
122
+ });
115
123
  const aligner = new Aligner(
116
124
  epub,
117
125
  audiobookFiles,
@@ -157,83 +165,6 @@ class Aligner {
157
165
  report = {
158
166
  chapters: []
159
167
  };
160
- findBestOffset(epubSentences, transcriptionText, lastMatchOffset, dir = 1) {
161
- const reverse = dir < 0;
162
- if (dir < 0) {
163
- epubSentences = epubSentences.toReversed().map((s) => (0, import_runes2.runes)(s).toReversed().join(""));
164
- transcriptionText = (0, import_runes2.runes)(transcriptionText).toReversed().join("");
165
- lastMatchOffset = transcriptionText.length - lastMatchOffset;
166
- }
167
- const flatSliceIndices = [
168
- 0,
169
- ...this.alignedChapters.toSorted(
170
- (a, b) => reverse ? transcriptionText.length - a.endOffset - (transcriptionText.length - b.endOffset) : a.startOffset - b.startOffset
171
- ).flatMap((aligned) => [
172
- reverse ? transcriptionText.length - aligned.endOffset : aligned.startOffset,
173
- reverse ? transcriptionText.length - aligned.startOffset : aligned.endOffset
174
- ]),
175
- transcriptionText.length
176
- ];
177
- const sliceIndices = [];
178
- for (let i = 0; i < flatSliceIndices.length - 1; i += 2) {
179
- sliceIndices.push([flatSliceIndices[i], flatSliceIndices[i + 1]]);
180
- }
181
- const allSlices = [];
182
- let startSlice = 0;
183
- for (const [i, [start, end]] of (0, import_itertools.enumerate)(sliceIndices)) {
184
- if (lastMatchOffset >= start && lastMatchOffset < end) {
185
- if (!reverse) {
186
- startSlice = i + 1;
187
- allSlices.push({
188
- start,
189
- text: transcriptionText.slice(start, lastMatchOffset)
190
- });
191
- }
192
- allSlices.push({
193
- start: lastMatchOffset,
194
- text: transcriptionText.slice(lastMatchOffset, end)
195
- });
196
- } else if (!reverse) {
197
- allSlices.push({ start, text: transcriptionText.slice(start, end) });
198
- }
199
- }
200
- const slices = allSlices.filter((slice) => slice.text.length);
201
- if (reverse && !slices.length) {
202
- const indices = sliceIndices.find(([start]) => start > lastMatchOffset);
203
- if (indices) {
204
- slices.push({
205
- start: indices[0],
206
- text: transcriptionText.slice(...indices)
207
- });
208
- }
209
- }
210
- for (const slice of slices.slice(startSlice).concat(slices.slice(0, startSlice))) {
211
- let startSentence = 0;
212
- while (startSentence < epubSentences.length) {
213
- const needle = epubSentences.slice(startSentence, startSentence + 6).join("-");
214
- const firstMatch = (0, import_fuzzy.findNearestMatch)(
215
- needle,
216
- slice.text,
217
- Math.max(Math.floor(0.1 * needle.length), 1)
218
- );
219
- if (firstMatch) {
220
- const start = reverse ? transcriptionText.length - (slice.start + firstMatch.index) : slice.start + firstMatch.index;
221
- return {
222
- startSentence: reverse ? epubSentences.length - startSentence : startSentence,
223
- transcriptionOffset: start
224
- };
225
- }
226
- startSentence += 3;
227
- }
228
- }
229
- if (reverse) {
230
- return {
231
- startSentence: epubSentences.length,
232
- transcriptionOffset: slices[0] ? transcriptionText.length - slices[0].start : null
233
- };
234
- }
235
- return { startSentence: 0, transcriptionOffset: null };
236
- }
237
168
  async getChapterSentences(chapterId) {
238
169
  const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
239
170
  const { result: segmentation } = await (0, import_segmentation.getXhtmlSegmentation)(
@@ -296,7 +227,7 @@ class Aligner {
296
227
  value: import_epub.Epub.formatSmilDuration(chapterDuration)
297
228
  });
298
229
  }
299
- addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, transcriptionOffset) {
230
+ addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, endSentence, transcriptionOffset) {
300
231
  this.report.chapters.push({
301
232
  href: chapter.href,
302
233
  transcriptionOffset,
@@ -320,6 +251,14 @@ class Aligner {
320
251
  matchedSentence: chapterSentences[startSentence],
321
252
  nextSentence: chapterSentences[startSentence + 1] ?? null
322
253
  },
254
+ lastMatchedSentenceId: endSentence,
255
+ lastMatchedSentenceContext: {
256
+ prevSentence: chapterSentences[endSentence - 1] ?? null,
257
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
258
+ matchedSentence: chapterSentences[endSentence],
259
+ nextSentence: chapterSentences[endSentence + 1] ?? null
260
+ },
261
+ chapterSentenceCount: sentenceRanges.length,
323
262
  audioFiles: sentenceRanges.reduce((acc, range) => {
324
263
  const existing = acc.find(
325
264
  (context) => context.filepath === range.audiofile
@@ -337,7 +276,7 @@ class Aligner {
337
276
  }, [])
338
277
  });
339
278
  }
340
- async alignChapter(startSentence, endSentence, chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
279
+ async alignChapter(chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
341
280
  const timing = (0, import_ghost_story.createTiming)();
342
281
  timing.start("read contents");
343
282
  const manifest = await this.epub.getManifest();
@@ -352,9 +291,12 @@ class Aligner {
352
291
  const chapterSentences = await this.getChapterSentences(chapterId);
353
292
  timing.end("split to sentences");
354
293
  timing.start("align sentences");
355
- const { sentenceRanges, transcriptionOffset: endTranscriptionOffset } = await (0, import_getSentenceRanges.getSentenceRanges)(
356
- startSentence,
357
- endSentence,
294
+ const {
295
+ sentenceRanges,
296
+ transcriptionOffset: endTranscriptionOffset,
297
+ firstFoundSentence,
298
+ lastFoundSentence
299
+ } = await (0, import_getSentenceRanges.getSentenceRanges)(
358
300
  this.transcription,
359
301
  chapterSentences,
360
302
  transcriptionOffset,
@@ -382,7 +324,8 @@ class Aligner {
382
324
  chapter,
383
325
  chapterSentences,
384
326
  sentenceRanges,
385
- startSentence,
327
+ firstFoundSentence,
328
+ lastFoundSentence,
386
329
  transcriptionOffset
387
330
  );
388
331
  return {
@@ -391,8 +334,20 @@ class Aligner {
391
334
  timing
392
335
  };
393
336
  }
337
+ narrowToAvailableBoundary(boundary) {
338
+ const narrowed = { ...boundary };
339
+ for (const chapter of this.alignedChapters) {
340
+ if (chapter.startOffset > narrowed.start && chapter.startOffset <= narrowed.end) {
341
+ narrowed.end = chapter.startOffset - 1;
342
+ }
343
+ if (chapter.endOffset < narrowed.end && chapter.endOffset >= narrowed.start) {
344
+ narrowed.start = chapter.endOffset + 1;
345
+ }
346
+ }
347
+ return narrowed;
348
+ }
394
349
  async alignBook(onProgress) {
395
- var _a, _b, _c, _d, _e, _f, _g, _h;
350
+ var _a, _b, _c, _d, _e, _f;
396
351
  const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
397
352
  this.timing.setMetadata("language", locale.toString());
398
353
  this.timing.setMetadata("granularity", this.granularity);
@@ -402,7 +357,6 @@ class Aligner {
402
357
  this.transcription.transcript,
403
358
  locale
404
359
  );
405
- let lastTranscriptionOffset = 0;
406
360
  for (let index = 0; index < spine.length; index++) {
407
361
  onProgress == null ? void 0 : onProgress(index / spine.length);
408
362
  const spineItem = spine[index];
@@ -431,48 +385,29 @@ class Aligner {
431
385
  );
432
386
  continue;
433
387
  }
434
- const { startSentence, transcriptionOffset: slugifiedOffset } = this.findBestOffset(
435
- slugifiedChapterSentences,
436
- transcriptionText,
437
- mapping.map(lastTranscriptionOffset, -1)
388
+ const boundaries = (0, import_search.findBoundaries)(
389
+ slugifiedChapterSentences.join("-"),
390
+ transcriptionText
438
391
  );
439
- if (slugifiedOffset === null) {
392
+ if (!boundaries) {
440
393
  (_f = this.logger) == null ? void 0 : _f.info(
441
- `Couldn't find matching transcription for chapter #${index}`
394
+ `Could not find chapter #${index} in the transcripton`
442
395
  );
443
396
  continue;
444
397
  }
445
- const transcriptionOffset = mapping.invert().map(slugifiedOffset, -1);
446
- const {
447
- startSentence: startEndSentence,
448
- transcriptionOffset: slugifiedEndOffset
449
- } = this.findBestOffset(
450
- slugifiedChapterSentences,
451
- transcriptionText,
452
- Math.min(
453
- transcriptionText.length,
454
- slugifiedOffset + Math.round(slugifiedChapterSentences.join("-").length * 1.2)
455
- ),
456
- -1
457
- );
458
- const endSentence = startEndSentence;
459
- const endOffset = slugifiedEndOffset === null ? this.transcription.transcript.length : mapping.invert().map(slugifiedEndOffset, 1);
460
- if (endSentence - startSentence < slugifiedChapterSentences.length / 2) {
461
- (_g = this.logger) == null ? void 0 : _g.info(`Found less than half of chapter #${index}, skipping`);
398
+ const { start, end } = this.narrowToAvailableBoundary(boundaries);
399
+ if (start === end) {
400
+ continue;
462
401
  }
463
- (_h = this.logger) == null ? void 0 : _h.info(
464
- `Chapter #${index} best matches transcription from ${transcriptionOffset} to ${endOffset}, from sentence ${startSentence} to ${endSentence} (of ${slugifiedChapterSentences.length}) in the book`
465
- );
402
+ const transcriptionOffset = mapping.invert().map(Math.max(start, 0), -1);
403
+ const endOffset = mapping.invert().map(Math.min(end, transcriptionText.length), 1);
466
404
  const result = await this.alignChapter(
467
- startSentence,
468
- endSentence,
469
405
  chapterId,
470
406
  transcriptionOffset,
471
407
  endOffset,
472
408
  locale,
473
409
  mapping
474
410
  );
475
- lastTranscriptionOffset = result.endTranscriptionOffset;
476
411
  this.timing.add(result.timing.summary());
477
412
  }
478
413
  const audioOrderedChapters = this.alignedChapters.toSorted((a, b) => {
@@ -22,6 +22,13 @@ interface ChapterReport {
22
22
  matchedSentence: string;
23
23
  nextSentence: string | null;
24
24
  };
25
+ lastMatchedSentenceId: number;
26
+ lastMatchedSentenceContext: {
27
+ prevSentence: string | null;
28
+ matchedSentence: string;
29
+ nextSentence: string | null;
30
+ };
31
+ chapterSentenceCount: number;
25
32
  audioFiles: AudioFileContext[];
26
33
  }
27
34
  interface Report {
@@ -47,11 +54,17 @@ declare class Aligner {
47
54
  private granularity;
48
55
  report: Report;
49
56
  constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
50
- private findBestOffset;
51
57
  private getChapterSentences;
52
58
  private writeAlignedChapter;
53
59
  private addChapterReport;
54
60
  private alignChapter;
61
+ narrowToAvailableBoundary(boundary: {
62
+ start: number;
63
+ end: number;
64
+ }): {
65
+ start: number;
66
+ end: number;
67
+ };
55
68
  alignBook(onProgress?: ((progress: number) => void) | null): Promise<_storyteller_platform_ghost_story.TimingAggregator>;
56
69
  }
57
70
  declare function concatTranscriptions(transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], audiofiles: string[]): StorytellerTranscription;
@@ -22,6 +22,13 @@ interface ChapterReport {
22
22
  matchedSentence: string;
23
23
  nextSentence: string | null;
24
24
  };
25
+ lastMatchedSentenceId: number;
26
+ lastMatchedSentenceContext: {
27
+ prevSentence: string | null;
28
+ matchedSentence: string;
29
+ nextSentence: string | null;
30
+ };
31
+ chapterSentenceCount: number;
25
32
  audioFiles: AudioFileContext[];
26
33
  }
27
34
  interface Report {
@@ -47,11 +54,17 @@ declare class Aligner {
47
54
  private granularity;
48
55
  report: Report;
49
56
  constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
50
- private findBestOffset;
51
57
  private getChapterSentences;
52
58
  private writeAlignedChapter;
53
59
  private addChapterReport;
54
60
  private alignChapter;
61
+ narrowToAvailableBoundary(boundary: {
62
+ start: number;
63
+ end: number;
64
+ }): {
65
+ start: number;
66
+ end: number;
67
+ };
55
68
  alignBook(onProgress?: ((progress: number) => void) | null): Promise<_storyteller_platform_ghost_story.TimingAggregator>;
56
69
  }
57
70
  declare function concatTranscriptions(transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], audiofiles: string[]): StorytellerTranscription;
@@ -5,9 +5,7 @@ import {
5
5
  import { copyFile, mkdir, readFile, readdir, writeFile } from "node:fs/promises";
6
6
  import { dirname as autoDirname, join as autoJoin } from "node:path";
7
7
  import { basename, dirname, parse, relative } from "node:path/posix";
8
- import { enumerate } from "itertools";
9
8
  import memoize from "memoize";
10
- import { runes } from "runes2";
11
9
  import { isAudioFile, lookupAudioMime } from "@storyteller-platform/audiobook";
12
10
  import {
13
11
  Epub
@@ -18,13 +16,13 @@ import {
18
16
  } from "@storyteller-platform/ghost-story";
19
17
  import { getTrackDuration } from "../common/ffmpeg.js";
20
18
  import { getXhtmlSegmentation } from "../markup/segmentation.js";
21
- import { findNearestMatch } from "./fuzzy.js";
22
19
  import {
23
20
  expandEmptySentenceRanges,
24
21
  getChapterDuration,
25
22
  getSentenceRanges,
26
23
  interpolateSentenceRanges
27
24
  } from "./getSentenceRanges.js";
25
+ import { findBoundaries } from "./search.js";
28
26
  import { slugify } from "./slugify.js";
29
27
  async function align(input, output, transcriptionsDir, audiobookDir, options) {
30
28
  var _stack = [];
@@ -45,7 +43,17 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
45
43
  (contents) => contents.map(
46
44
  (c) => JSON.parse(c)
47
45
  )
48
- );
46
+ ).then((transcriptions2) => {
47
+ return transcriptions2.map((transcription) => {
48
+ if ("wordTimeline" in transcription) {
49
+ return {
50
+ ...transcription,
51
+ timeline: transcription.wordTimeline
52
+ };
53
+ }
54
+ return transcription;
55
+ });
56
+ });
49
57
  const aligner = new Aligner(
50
58
  epub,
51
59
  audiobookFiles,
@@ -91,83 +99,6 @@ class Aligner {
91
99
  report = {
92
100
  chapters: []
93
101
  };
94
- findBestOffset(epubSentences, transcriptionText, lastMatchOffset, dir = 1) {
95
- const reverse = dir < 0;
96
- if (dir < 0) {
97
- epubSentences = epubSentences.toReversed().map((s) => runes(s).toReversed().join(""));
98
- transcriptionText = runes(transcriptionText).toReversed().join("");
99
- lastMatchOffset = transcriptionText.length - lastMatchOffset;
100
- }
101
- const flatSliceIndices = [
102
- 0,
103
- ...this.alignedChapters.toSorted(
104
- (a, b) => reverse ? transcriptionText.length - a.endOffset - (transcriptionText.length - b.endOffset) : a.startOffset - b.startOffset
105
- ).flatMap((aligned) => [
106
- reverse ? transcriptionText.length - aligned.endOffset : aligned.startOffset,
107
- reverse ? transcriptionText.length - aligned.startOffset : aligned.endOffset
108
- ]),
109
- transcriptionText.length
110
- ];
111
- const sliceIndices = [];
112
- for (let i = 0; i < flatSliceIndices.length - 1; i += 2) {
113
- sliceIndices.push([flatSliceIndices[i], flatSliceIndices[i + 1]]);
114
- }
115
- const allSlices = [];
116
- let startSlice = 0;
117
- for (const [i, [start, end]] of enumerate(sliceIndices)) {
118
- if (lastMatchOffset >= start && lastMatchOffset < end) {
119
- if (!reverse) {
120
- startSlice = i + 1;
121
- allSlices.push({
122
- start,
123
- text: transcriptionText.slice(start, lastMatchOffset)
124
- });
125
- }
126
- allSlices.push({
127
- start: lastMatchOffset,
128
- text: transcriptionText.slice(lastMatchOffset, end)
129
- });
130
- } else if (!reverse) {
131
- allSlices.push({ start, text: transcriptionText.slice(start, end) });
132
- }
133
- }
134
- const slices = allSlices.filter((slice) => slice.text.length);
135
- if (reverse && !slices.length) {
136
- const indices = sliceIndices.find(([start]) => start > lastMatchOffset);
137
- if (indices) {
138
- slices.push({
139
- start: indices[0],
140
- text: transcriptionText.slice(...indices)
141
- });
142
- }
143
- }
144
- for (const slice of slices.slice(startSlice).concat(slices.slice(0, startSlice))) {
145
- let startSentence = 0;
146
- while (startSentence < epubSentences.length) {
147
- const needle = epubSentences.slice(startSentence, startSentence + 6).join("-");
148
- const firstMatch = findNearestMatch(
149
- needle,
150
- slice.text,
151
- Math.max(Math.floor(0.1 * needle.length), 1)
152
- );
153
- if (firstMatch) {
154
- const start = reverse ? transcriptionText.length - (slice.start + firstMatch.index) : slice.start + firstMatch.index;
155
- return {
156
- startSentence: reverse ? epubSentences.length - startSentence : startSentence,
157
- transcriptionOffset: start
158
- };
159
- }
160
- startSentence += 3;
161
- }
162
- }
163
- if (reverse) {
164
- return {
165
- startSentence: epubSentences.length,
166
- transcriptionOffset: slices[0] ? transcriptionText.length - slices[0].start : null
167
- };
168
- }
169
- return { startSentence: 0, transcriptionOffset: null };
170
- }
171
102
  async getChapterSentences(chapterId) {
172
103
  const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
173
104
  const { result: segmentation } = await getXhtmlSegmentation(
@@ -230,7 +161,7 @@ class Aligner {
230
161
  value: Epub.formatSmilDuration(chapterDuration)
231
162
  });
232
163
  }
233
- addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, transcriptionOffset) {
164
+ addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, endSentence, transcriptionOffset) {
234
165
  this.report.chapters.push({
235
166
  href: chapter.href,
236
167
  transcriptionOffset,
@@ -254,6 +185,14 @@ class Aligner {
254
185
  matchedSentence: chapterSentences[startSentence],
255
186
  nextSentence: chapterSentences[startSentence + 1] ?? null
256
187
  },
188
+ lastMatchedSentenceId: endSentence,
189
+ lastMatchedSentenceContext: {
190
+ prevSentence: chapterSentences[endSentence - 1] ?? null,
191
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
192
+ matchedSentence: chapterSentences[endSentence],
193
+ nextSentence: chapterSentences[endSentence + 1] ?? null
194
+ },
195
+ chapterSentenceCount: sentenceRanges.length,
257
196
  audioFiles: sentenceRanges.reduce((acc, range) => {
258
197
  const existing = acc.find(
259
198
  (context) => context.filepath === range.audiofile
@@ -271,7 +210,7 @@ class Aligner {
271
210
  }, [])
272
211
  });
273
212
  }
274
- async alignChapter(startSentence, endSentence, chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
213
+ async alignChapter(chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
275
214
  const timing = createTiming();
276
215
  timing.start("read contents");
277
216
  const manifest = await this.epub.getManifest();
@@ -286,9 +225,12 @@ class Aligner {
286
225
  const chapterSentences = await this.getChapterSentences(chapterId);
287
226
  timing.end("split to sentences");
288
227
  timing.start("align sentences");
289
- const { sentenceRanges, transcriptionOffset: endTranscriptionOffset } = await getSentenceRanges(
290
- startSentence,
291
- endSentence,
228
+ const {
229
+ sentenceRanges,
230
+ transcriptionOffset: endTranscriptionOffset,
231
+ firstFoundSentence,
232
+ lastFoundSentence
233
+ } = await getSentenceRanges(
292
234
  this.transcription,
293
235
  chapterSentences,
294
236
  transcriptionOffset,
@@ -316,7 +258,8 @@ class Aligner {
316
258
  chapter,
317
259
  chapterSentences,
318
260
  sentenceRanges,
319
- startSentence,
261
+ firstFoundSentence,
262
+ lastFoundSentence,
320
263
  transcriptionOffset
321
264
  );
322
265
  return {
@@ -325,8 +268,20 @@ class Aligner {
325
268
  timing
326
269
  };
327
270
  }
271
+ narrowToAvailableBoundary(boundary) {
272
+ const narrowed = { ...boundary };
273
+ for (const chapter of this.alignedChapters) {
274
+ if (chapter.startOffset > narrowed.start && chapter.startOffset <= narrowed.end) {
275
+ narrowed.end = chapter.startOffset - 1;
276
+ }
277
+ if (chapter.endOffset < narrowed.end && chapter.endOffset >= narrowed.start) {
278
+ narrowed.start = chapter.endOffset + 1;
279
+ }
280
+ }
281
+ return narrowed;
282
+ }
328
283
  async alignBook(onProgress) {
329
- var _a, _b, _c, _d, _e, _f, _g, _h;
284
+ var _a, _b, _c, _d, _e, _f;
330
285
  const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
331
286
  this.timing.setMetadata("language", locale.toString());
332
287
  this.timing.setMetadata("granularity", this.granularity);
@@ -336,7 +291,6 @@ class Aligner {
336
291
  this.transcription.transcript,
337
292
  locale
338
293
  );
339
- let lastTranscriptionOffset = 0;
340
294
  for (let index = 0; index < spine.length; index++) {
341
295
  onProgress == null ? void 0 : onProgress(index / spine.length);
342
296
  const spineItem = spine[index];
@@ -365,48 +319,29 @@ class Aligner {
365
319
  );
366
320
  continue;
367
321
  }
368
- const { startSentence, transcriptionOffset: slugifiedOffset } = this.findBestOffset(
369
- slugifiedChapterSentences,
370
- transcriptionText,
371
- mapping.map(lastTranscriptionOffset, -1)
322
+ const boundaries = findBoundaries(
323
+ slugifiedChapterSentences.join("-"),
324
+ transcriptionText
372
325
  );
373
- if (slugifiedOffset === null) {
326
+ if (!boundaries) {
374
327
  (_f = this.logger) == null ? void 0 : _f.info(
375
- `Couldn't find matching transcription for chapter #${index}`
328
+ `Could not find chapter #${index} in the transcripton`
376
329
  );
377
330
  continue;
378
331
  }
379
- const transcriptionOffset = mapping.invert().map(slugifiedOffset, -1);
380
- const {
381
- startSentence: startEndSentence,
382
- transcriptionOffset: slugifiedEndOffset
383
- } = this.findBestOffset(
384
- slugifiedChapterSentences,
385
- transcriptionText,
386
- Math.min(
387
- transcriptionText.length,
388
- slugifiedOffset + Math.round(slugifiedChapterSentences.join("-").length * 1.2)
389
- ),
390
- -1
391
- );
392
- const endSentence = startEndSentence;
393
- const endOffset = slugifiedEndOffset === null ? this.transcription.transcript.length : mapping.invert().map(slugifiedEndOffset, 1);
394
- if (endSentence - startSentence < slugifiedChapterSentences.length / 2) {
395
- (_g = this.logger) == null ? void 0 : _g.info(`Found less than half of chapter #${index}, skipping`);
332
+ const { start, end } = this.narrowToAvailableBoundary(boundaries);
333
+ if (start === end) {
334
+ continue;
396
335
  }
397
- (_h = this.logger) == null ? void 0 : _h.info(
398
- `Chapter #${index} best matches transcription from ${transcriptionOffset} to ${endOffset}, from sentence ${startSentence} to ${endSentence} (of ${slugifiedChapterSentences.length}) in the book`
399
- );
336
+ const transcriptionOffset = mapping.invert().map(Math.max(start, 0), -1);
337
+ const endOffset = mapping.invert().map(Math.min(end, transcriptionText.length), 1);
400
338
  const result = await this.alignChapter(
401
- startSentence,
402
- endSentence,
403
339
  chapterId,
404
340
  transcriptionOffset,
405
341
  endOffset,
406
342
  locale,
407
343
  mapping
408
344
  );
409
- lastTranscriptionOffset = result.endTranscriptionOffset;
410
345
  this.timing.add(result.timing.summary());
411
346
  }
412
347
  const audioOrderedChapters = this.alignedChapters.toSorted((a, b) => {