@storyteller-platform/align 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,16 +81,14 @@ module.exports = __toCommonJS(align_exports);
81
81
  var import_promises = require("node:fs/promises");
82
82
  var import_node_path = require("node:path");
83
83
  var import_posix = require("node:path/posix");
84
- var import_itertools = require("itertools");
85
84
  var import_memoize = __toESM(require("memoize"), 1);
86
- var import_runes2 = require("runes2");
87
85
  var import_audiobook = require("@storyteller-platform/audiobook");
88
86
  var import_epub = require("@storyteller-platform/epub");
89
87
  var import_ghost_story = require("@storyteller-platform/ghost-story");
90
88
  var import_ffmpeg = require("../common/ffmpeg.cjs");
91
89
  var import_segmentation = require("../markup/segmentation.cjs");
92
- var import_fuzzy = require("./fuzzy.cjs");
93
90
  var import_getSentenceRanges = require("./getSentenceRanges.cjs");
91
+ var import_search = require("./search.cjs");
94
92
  var import_slugify = require("./slugify.cjs");
95
93
  async function align(input, output, transcriptionsDir, audiobookDir, options) {
96
94
  var _stack = [];
@@ -167,83 +165,6 @@ class Aligner {
167
165
  report = {
168
166
  chapters: []
169
167
  };
170
- findBestOffset(epubSentences, transcriptionText, lastMatchOffset, dir = 1) {
171
- const reverse = dir < 0;
172
- if (dir < 0) {
173
- epubSentences = epubSentences.toReversed().map((s) => (0, import_runes2.runes)(s).toReversed().join(""));
174
- transcriptionText = (0, import_runes2.runes)(transcriptionText).toReversed().join("");
175
- lastMatchOffset = transcriptionText.length - lastMatchOffset;
176
- }
177
- const flatSliceIndices = [
178
- 0,
179
- ...this.alignedChapters.toSorted(
180
- (a, b) => reverse ? transcriptionText.length - a.endOffset - (transcriptionText.length - b.endOffset) : a.startOffset - b.startOffset
181
- ).flatMap((aligned) => [
182
- reverse ? transcriptionText.length - aligned.endOffset : aligned.startOffset,
183
- reverse ? transcriptionText.length - aligned.startOffset : aligned.endOffset
184
- ]),
185
- transcriptionText.length
186
- ];
187
- const sliceIndices = [];
188
- for (let i = 0; i < flatSliceIndices.length - 1; i += 2) {
189
- sliceIndices.push([flatSliceIndices[i], flatSliceIndices[i + 1]]);
190
- }
191
- const allSlices = [];
192
- let startSlice = 0;
193
- for (const [i, [start, end]] of (0, import_itertools.enumerate)(sliceIndices)) {
194
- if (lastMatchOffset >= start && lastMatchOffset < end) {
195
- if (!reverse) {
196
- startSlice = i + 1;
197
- allSlices.push({
198
- start,
199
- text: transcriptionText.slice(start, lastMatchOffset)
200
- });
201
- }
202
- allSlices.push({
203
- start: lastMatchOffset,
204
- text: transcriptionText.slice(lastMatchOffset, end)
205
- });
206
- } else if (!reverse) {
207
- allSlices.push({ start, text: transcriptionText.slice(start, end) });
208
- }
209
- }
210
- const slices = allSlices.filter((slice) => slice.text.length);
211
- if (reverse && !slices.length) {
212
- const indices = sliceIndices.find(([start]) => start > lastMatchOffset);
213
- if (indices) {
214
- slices.push({
215
- start: indices[0],
216
- text: transcriptionText.slice(...indices)
217
- });
218
- }
219
- }
220
- for (const slice of slices.slice(startSlice).concat(slices.slice(0, startSlice))) {
221
- let startSentence = 0;
222
- while (startSentence < epubSentences.length) {
223
- const needle = epubSentences.slice(startSentence, startSentence + 6).join("-");
224
- const firstMatch = (0, import_fuzzy.findNearestMatch)(
225
- needle,
226
- slice.text,
227
- Math.max(Math.floor(0.1 * needle.length), 1)
228
- );
229
- if (firstMatch) {
230
- const start = reverse ? transcriptionText.length - (slice.start + firstMatch.index) : slice.start + firstMatch.index;
231
- return {
232
- startSentence: reverse ? epubSentences.length - startSentence : startSentence,
233
- transcriptionOffset: start
234
- };
235
- }
236
- startSentence += 3;
237
- }
238
- }
239
- if (reverse) {
240
- return {
241
- startSentence: epubSentences.length,
242
- transcriptionOffset: slices[0] ? transcriptionText.length - slices[0].start : null
243
- };
244
- }
245
- return { startSentence: 0, transcriptionOffset: null };
246
- }
247
168
  async getChapterSentences(chapterId) {
248
169
  const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
249
170
  const { result: segmentation } = await (0, import_segmentation.getXhtmlSegmentation)(
@@ -306,7 +227,7 @@ class Aligner {
306
227
  value: import_epub.Epub.formatSmilDuration(chapterDuration)
307
228
  });
308
229
  }
309
- addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, transcriptionOffset) {
230
+ addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, endSentence, transcriptionOffset) {
310
231
  this.report.chapters.push({
311
232
  href: chapter.href,
312
233
  transcriptionOffset,
@@ -330,6 +251,14 @@ class Aligner {
330
251
  matchedSentence: chapterSentences[startSentence],
331
252
  nextSentence: chapterSentences[startSentence + 1] ?? null
332
253
  },
254
+ lastMatchedSentenceId: endSentence,
255
+ lastMatchedSentenceContext: {
256
+ prevSentence: chapterSentences[endSentence - 1] ?? null,
257
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
258
+ matchedSentence: chapterSentences[endSentence],
259
+ nextSentence: chapterSentences[endSentence + 1] ?? null
260
+ },
261
+ chapterSentenceCount: sentenceRanges.length,
333
262
  audioFiles: sentenceRanges.reduce((acc, range) => {
334
263
  const existing = acc.find(
335
264
  (context) => context.filepath === range.audiofile
@@ -347,7 +276,7 @@ class Aligner {
347
276
  }, [])
348
277
  });
349
278
  }
350
- async alignChapter(startSentence, endSentence, chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
279
+ async alignChapter(chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
351
280
  const timing = (0, import_ghost_story.createTiming)();
352
281
  timing.start("read contents");
353
282
  const manifest = await this.epub.getManifest();
@@ -362,9 +291,12 @@ class Aligner {
362
291
  const chapterSentences = await this.getChapterSentences(chapterId);
363
292
  timing.end("split to sentences");
364
293
  timing.start("align sentences");
365
- const { sentenceRanges, transcriptionOffset: endTranscriptionOffset } = await (0, import_getSentenceRanges.getSentenceRanges)(
366
- startSentence,
367
- endSentence,
294
+ const {
295
+ sentenceRanges,
296
+ transcriptionOffset: endTranscriptionOffset,
297
+ firstFoundSentence,
298
+ lastFoundSentence
299
+ } = await (0, import_getSentenceRanges.getSentenceRanges)(
368
300
  this.transcription,
369
301
  chapterSentences,
370
302
  transcriptionOffset,
@@ -392,7 +324,8 @@ class Aligner {
392
324
  chapter,
393
325
  chapterSentences,
394
326
  sentenceRanges,
395
- startSentence,
327
+ firstFoundSentence,
328
+ lastFoundSentence,
396
329
  transcriptionOffset
397
330
  );
398
331
  return {
@@ -401,8 +334,20 @@ class Aligner {
401
334
  timing
402
335
  };
403
336
  }
337
+ narrowToAvailableBoundary(boundary) {
338
+ const narrowed = { ...boundary };
339
+ for (const chapter of this.alignedChapters) {
340
+ if (chapter.startOffset > narrowed.start && chapter.startOffset <= narrowed.end) {
341
+ narrowed.end = chapter.startOffset - 1;
342
+ }
343
+ if (chapter.endOffset < narrowed.end && chapter.endOffset >= narrowed.start) {
344
+ narrowed.start = chapter.endOffset + 1;
345
+ }
346
+ }
347
+ return narrowed;
348
+ }
404
349
  async alignBook(onProgress) {
405
- var _a, _b, _c, _d, _e, _f, _g, _h;
350
+ var _a, _b, _c, _d, _e, _f;
406
351
  const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
407
352
  this.timing.setMetadata("language", locale.toString());
408
353
  this.timing.setMetadata("granularity", this.granularity);
@@ -412,7 +357,6 @@ class Aligner {
412
357
  this.transcription.transcript,
413
358
  locale
414
359
  );
415
- let lastTranscriptionOffset = 0;
416
360
  for (let index = 0; index < spine.length; index++) {
417
361
  onProgress == null ? void 0 : onProgress(index / spine.length);
418
362
  const spineItem = spine[index];
@@ -441,48 +385,29 @@ class Aligner {
441
385
  );
442
386
  continue;
443
387
  }
444
- const { startSentence, transcriptionOffset: slugifiedOffset } = this.findBestOffset(
445
- slugifiedChapterSentences,
446
- transcriptionText,
447
- mapping.map(lastTranscriptionOffset, -1)
388
+ const boundaries = (0, import_search.findBoundaries)(
389
+ slugifiedChapterSentences.join("-"),
390
+ transcriptionText
448
391
  );
449
- if (slugifiedOffset === null) {
392
+ if (!boundaries) {
450
393
  (_f = this.logger) == null ? void 0 : _f.info(
451
- `Couldn't find matching transcription for chapter #${index}`
394
+ `Could not find chapter #${index} in the transcripton`
452
395
  );
453
396
  continue;
454
397
  }
455
- const transcriptionOffset = mapping.invert().map(slugifiedOffset, -1);
456
- const {
457
- startSentence: startEndSentence,
458
- transcriptionOffset: slugifiedEndOffset
459
- } = this.findBestOffset(
460
- slugifiedChapterSentences,
461
- transcriptionText,
462
- Math.min(
463
- transcriptionText.length,
464
- slugifiedOffset + Math.round(slugifiedChapterSentences.join("-").length * 1.2)
465
- ),
466
- -1
467
- );
468
- const endSentence = startEndSentence;
469
- const endOffset = slugifiedEndOffset === null ? this.transcription.transcript.length : mapping.invert().map(slugifiedEndOffset, 1);
470
- if (endSentence - startSentence < slugifiedChapterSentences.length / 2) {
471
- (_g = this.logger) == null ? void 0 : _g.info(`Found less than half of chapter #${index}, skipping`);
398
+ const { start, end } = this.narrowToAvailableBoundary(boundaries);
399
+ if (start === end) {
400
+ continue;
472
401
  }
473
- (_h = this.logger) == null ? void 0 : _h.info(
474
- `Chapter #${index} best matches transcription from ${transcriptionOffset} to ${endOffset}, from sentence ${startSentence} to ${endSentence} (of ${slugifiedChapterSentences.length}) in the book`
475
- );
402
+ const transcriptionOffset = mapping.invert().map(Math.max(start, 0), -1);
403
+ const endOffset = mapping.invert().map(Math.min(end, transcriptionText.length), 1);
476
404
  const result = await this.alignChapter(
477
- startSentence,
478
- endSentence,
479
405
  chapterId,
480
406
  transcriptionOffset,
481
407
  endOffset,
482
408
  locale,
483
409
  mapping
484
410
  );
485
- lastTranscriptionOffset = result.endTranscriptionOffset;
486
411
  this.timing.add(result.timing.summary());
487
412
  }
488
413
  const audioOrderedChapters = this.alignedChapters.toSorted((a, b) => {
@@ -22,6 +22,13 @@ interface ChapterReport {
22
22
  matchedSentence: string;
23
23
  nextSentence: string | null;
24
24
  };
25
+ lastMatchedSentenceId: number;
26
+ lastMatchedSentenceContext: {
27
+ prevSentence: string | null;
28
+ matchedSentence: string;
29
+ nextSentence: string | null;
30
+ };
31
+ chapterSentenceCount: number;
25
32
  audioFiles: AudioFileContext[];
26
33
  }
27
34
  interface Report {
@@ -47,11 +54,17 @@ declare class Aligner {
47
54
  private granularity;
48
55
  report: Report;
49
56
  constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
50
- private findBestOffset;
51
57
  private getChapterSentences;
52
58
  private writeAlignedChapter;
53
59
  private addChapterReport;
54
60
  private alignChapter;
61
+ narrowToAvailableBoundary(boundary: {
62
+ start: number;
63
+ end: number;
64
+ }): {
65
+ start: number;
66
+ end: number;
67
+ };
55
68
  alignBook(onProgress?: ((progress: number) => void) | null): Promise<_storyteller_platform_ghost_story.TimingAggregator>;
56
69
  }
57
70
  declare function concatTranscriptions(transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], audiofiles: string[]): StorytellerTranscription;
@@ -22,6 +22,13 @@ interface ChapterReport {
22
22
  matchedSentence: string;
23
23
  nextSentence: string | null;
24
24
  };
25
+ lastMatchedSentenceId: number;
26
+ lastMatchedSentenceContext: {
27
+ prevSentence: string | null;
28
+ matchedSentence: string;
29
+ nextSentence: string | null;
30
+ };
31
+ chapterSentenceCount: number;
25
32
  audioFiles: AudioFileContext[];
26
33
  }
27
34
  interface Report {
@@ -47,11 +54,17 @@ declare class Aligner {
47
54
  private granularity;
48
55
  report: Report;
49
56
  constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
50
- private findBestOffset;
51
57
  private getChapterSentences;
52
58
  private writeAlignedChapter;
53
59
  private addChapterReport;
54
60
  private alignChapter;
61
+ narrowToAvailableBoundary(boundary: {
62
+ start: number;
63
+ end: number;
64
+ }): {
65
+ start: number;
66
+ end: number;
67
+ };
55
68
  alignBook(onProgress?: ((progress: number) => void) | null): Promise<_storyteller_platform_ghost_story.TimingAggregator>;
56
69
  }
57
70
  declare function concatTranscriptions(transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], audiofiles: string[]): StorytellerTranscription;
@@ -5,9 +5,7 @@ import {
5
5
  import { copyFile, mkdir, readFile, readdir, writeFile } from "node:fs/promises";
6
6
  import { dirname as autoDirname, join as autoJoin } from "node:path";
7
7
  import { basename, dirname, parse, relative } from "node:path/posix";
8
- import { enumerate } from "itertools";
9
8
  import memoize from "memoize";
10
- import { runes } from "runes2";
11
9
  import { isAudioFile, lookupAudioMime } from "@storyteller-platform/audiobook";
12
10
  import {
13
11
  Epub
@@ -18,13 +16,13 @@ import {
18
16
  } from "@storyteller-platform/ghost-story";
19
17
  import { getTrackDuration } from "../common/ffmpeg.js";
20
18
  import { getXhtmlSegmentation } from "../markup/segmentation.js";
21
- import { findNearestMatch } from "./fuzzy.js";
22
19
  import {
23
20
  expandEmptySentenceRanges,
24
21
  getChapterDuration,
25
22
  getSentenceRanges,
26
23
  interpolateSentenceRanges
27
24
  } from "./getSentenceRanges.js";
25
+ import { findBoundaries } from "./search.js";
28
26
  import { slugify } from "./slugify.js";
29
27
  async function align(input, output, transcriptionsDir, audiobookDir, options) {
30
28
  var _stack = [];
@@ -101,83 +99,6 @@ class Aligner {
101
99
  report = {
102
100
  chapters: []
103
101
  };
104
- findBestOffset(epubSentences, transcriptionText, lastMatchOffset, dir = 1) {
105
- const reverse = dir < 0;
106
- if (dir < 0) {
107
- epubSentences = epubSentences.toReversed().map((s) => runes(s).toReversed().join(""));
108
- transcriptionText = runes(transcriptionText).toReversed().join("");
109
- lastMatchOffset = transcriptionText.length - lastMatchOffset;
110
- }
111
- const flatSliceIndices = [
112
- 0,
113
- ...this.alignedChapters.toSorted(
114
- (a, b) => reverse ? transcriptionText.length - a.endOffset - (transcriptionText.length - b.endOffset) : a.startOffset - b.startOffset
115
- ).flatMap((aligned) => [
116
- reverse ? transcriptionText.length - aligned.endOffset : aligned.startOffset,
117
- reverse ? transcriptionText.length - aligned.startOffset : aligned.endOffset
118
- ]),
119
- transcriptionText.length
120
- ];
121
- const sliceIndices = [];
122
- for (let i = 0; i < flatSliceIndices.length - 1; i += 2) {
123
- sliceIndices.push([flatSliceIndices[i], flatSliceIndices[i + 1]]);
124
- }
125
- const allSlices = [];
126
- let startSlice = 0;
127
- for (const [i, [start, end]] of enumerate(sliceIndices)) {
128
- if (lastMatchOffset >= start && lastMatchOffset < end) {
129
- if (!reverse) {
130
- startSlice = i + 1;
131
- allSlices.push({
132
- start,
133
- text: transcriptionText.slice(start, lastMatchOffset)
134
- });
135
- }
136
- allSlices.push({
137
- start: lastMatchOffset,
138
- text: transcriptionText.slice(lastMatchOffset, end)
139
- });
140
- } else if (!reverse) {
141
- allSlices.push({ start, text: transcriptionText.slice(start, end) });
142
- }
143
- }
144
- const slices = allSlices.filter((slice) => slice.text.length);
145
- if (reverse && !slices.length) {
146
- const indices = sliceIndices.find(([start]) => start > lastMatchOffset);
147
- if (indices) {
148
- slices.push({
149
- start: indices[0],
150
- text: transcriptionText.slice(...indices)
151
- });
152
- }
153
- }
154
- for (const slice of slices.slice(startSlice).concat(slices.slice(0, startSlice))) {
155
- let startSentence = 0;
156
- while (startSentence < epubSentences.length) {
157
- const needle = epubSentences.slice(startSentence, startSentence + 6).join("-");
158
- const firstMatch = findNearestMatch(
159
- needle,
160
- slice.text,
161
- Math.max(Math.floor(0.1 * needle.length), 1)
162
- );
163
- if (firstMatch) {
164
- const start = reverse ? transcriptionText.length - (slice.start + firstMatch.index) : slice.start + firstMatch.index;
165
- return {
166
- startSentence: reverse ? epubSentences.length - startSentence : startSentence,
167
- transcriptionOffset: start
168
- };
169
- }
170
- startSentence += 3;
171
- }
172
- }
173
- if (reverse) {
174
- return {
175
- startSentence: epubSentences.length,
176
- transcriptionOffset: slices[0] ? transcriptionText.length - slices[0].start : null
177
- };
178
- }
179
- return { startSentence: 0, transcriptionOffset: null };
180
- }
181
102
  async getChapterSentences(chapterId) {
182
103
  const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
183
104
  const { result: segmentation } = await getXhtmlSegmentation(
@@ -240,7 +161,7 @@ class Aligner {
240
161
  value: Epub.formatSmilDuration(chapterDuration)
241
162
  });
242
163
  }
243
- addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, transcriptionOffset) {
164
+ addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, endSentence, transcriptionOffset) {
244
165
  this.report.chapters.push({
245
166
  href: chapter.href,
246
167
  transcriptionOffset,
@@ -264,6 +185,14 @@ class Aligner {
264
185
  matchedSentence: chapterSentences[startSentence],
265
186
  nextSentence: chapterSentences[startSentence + 1] ?? null
266
187
  },
188
+ lastMatchedSentenceId: endSentence,
189
+ lastMatchedSentenceContext: {
190
+ prevSentence: chapterSentences[endSentence - 1] ?? null,
191
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
192
+ matchedSentence: chapterSentences[endSentence],
193
+ nextSentence: chapterSentences[endSentence + 1] ?? null
194
+ },
195
+ chapterSentenceCount: sentenceRanges.length,
267
196
  audioFiles: sentenceRanges.reduce((acc, range) => {
268
197
  const existing = acc.find(
269
198
  (context) => context.filepath === range.audiofile
@@ -281,7 +210,7 @@ class Aligner {
281
210
  }, [])
282
211
  });
283
212
  }
284
- async alignChapter(startSentence, endSentence, chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
213
+ async alignChapter(chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
285
214
  const timing = createTiming();
286
215
  timing.start("read contents");
287
216
  const manifest = await this.epub.getManifest();
@@ -296,9 +225,12 @@ class Aligner {
296
225
  const chapterSentences = await this.getChapterSentences(chapterId);
297
226
  timing.end("split to sentences");
298
227
  timing.start("align sentences");
299
- const { sentenceRanges, transcriptionOffset: endTranscriptionOffset } = await getSentenceRanges(
300
- startSentence,
301
- endSentence,
228
+ const {
229
+ sentenceRanges,
230
+ transcriptionOffset: endTranscriptionOffset,
231
+ firstFoundSentence,
232
+ lastFoundSentence
233
+ } = await getSentenceRanges(
302
234
  this.transcription,
303
235
  chapterSentences,
304
236
  transcriptionOffset,
@@ -326,7 +258,8 @@ class Aligner {
326
258
  chapter,
327
259
  chapterSentences,
328
260
  sentenceRanges,
329
- startSentence,
261
+ firstFoundSentence,
262
+ lastFoundSentence,
330
263
  transcriptionOffset
331
264
  );
332
265
  return {
@@ -335,8 +268,20 @@ class Aligner {
335
268
  timing
336
269
  };
337
270
  }
271
+ narrowToAvailableBoundary(boundary) {
272
+ const narrowed = { ...boundary };
273
+ for (const chapter of this.alignedChapters) {
274
+ if (chapter.startOffset > narrowed.start && chapter.startOffset <= narrowed.end) {
275
+ narrowed.end = chapter.startOffset - 1;
276
+ }
277
+ if (chapter.endOffset < narrowed.end && chapter.endOffset >= narrowed.start) {
278
+ narrowed.start = chapter.endOffset + 1;
279
+ }
280
+ }
281
+ return narrowed;
282
+ }
338
283
  async alignBook(onProgress) {
339
- var _a, _b, _c, _d, _e, _f, _g, _h;
284
+ var _a, _b, _c, _d, _e, _f;
340
285
  const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
341
286
  this.timing.setMetadata("language", locale.toString());
342
287
  this.timing.setMetadata("granularity", this.granularity);
@@ -346,7 +291,6 @@ class Aligner {
346
291
  this.transcription.transcript,
347
292
  locale
348
293
  );
349
- let lastTranscriptionOffset = 0;
350
294
  for (let index = 0; index < spine.length; index++) {
351
295
  onProgress == null ? void 0 : onProgress(index / spine.length);
352
296
  const spineItem = spine[index];
@@ -375,48 +319,29 @@ class Aligner {
375
319
  );
376
320
  continue;
377
321
  }
378
- const { startSentence, transcriptionOffset: slugifiedOffset } = this.findBestOffset(
379
- slugifiedChapterSentences,
380
- transcriptionText,
381
- mapping.map(lastTranscriptionOffset, -1)
322
+ const boundaries = findBoundaries(
323
+ slugifiedChapterSentences.join("-"),
324
+ transcriptionText
382
325
  );
383
- if (slugifiedOffset === null) {
326
+ if (!boundaries) {
384
327
  (_f = this.logger) == null ? void 0 : _f.info(
385
- `Couldn't find matching transcription for chapter #${index}`
328
+ `Could not find chapter #${index} in the transcripton`
386
329
  );
387
330
  continue;
388
331
  }
389
- const transcriptionOffset = mapping.invert().map(slugifiedOffset, -1);
390
- const {
391
- startSentence: startEndSentence,
392
- transcriptionOffset: slugifiedEndOffset
393
- } = this.findBestOffset(
394
- slugifiedChapterSentences,
395
- transcriptionText,
396
- Math.min(
397
- transcriptionText.length,
398
- slugifiedOffset + Math.round(slugifiedChapterSentences.join("-").length * 1.2)
399
- ),
400
- -1
401
- );
402
- const endSentence = startEndSentence;
403
- const endOffset = slugifiedEndOffset === null ? this.transcription.transcript.length : mapping.invert().map(slugifiedEndOffset, 1);
404
- if (endSentence - startSentence < slugifiedChapterSentences.length / 2) {
405
- (_g = this.logger) == null ? void 0 : _g.info(`Found less than half of chapter #${index}, skipping`);
332
+ const { start, end } = this.narrowToAvailableBoundary(boundaries);
333
+ if (start === end) {
334
+ continue;
406
335
  }
407
- (_h = this.logger) == null ? void 0 : _h.info(
408
- `Chapter #${index} best matches transcription from ${transcriptionOffset} to ${endOffset}, from sentence ${startSentence} to ${endSentence} (of ${slugifiedChapterSentences.length}) in the book`
409
- );
336
+ const transcriptionOffset = mapping.invert().map(Math.max(start, 0), -1);
337
+ const endOffset = mapping.invert().map(Math.min(end, transcriptionText.length), 1);
410
338
  const result = await this.alignChapter(
411
- startSentence,
412
- endSentence,
413
339
  chapterId,
414
340
  transcriptionOffset,
415
341
  endOffset,
416
342
  locale,
417
343
  mapping
418
344
  );
419
- lastTranscriptionOffset = result.endTranscriptionOffset;
420
345
  this.timing.add(result.timing.summary());
421
346
  }
422
347
  const audioOrderedChapters = this.alignedChapters.toSorted((a, b) => {