@storyteller-platform/align 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/dist/align/__tests__/align.test.cjs +6 -5
  2. package/dist/align/__tests__/align.test.js +6 -5
  3. package/dist/align/align.cjs +133 -81
  4. package/dist/align/align.d.cts +1 -0
  5. package/dist/align/align.d.ts +1 -0
  6. package/dist/align/align.js +133 -81
  7. package/dist/align/getSentenceRanges.cjs +78 -149
  8. package/dist/align/getSentenceRanges.d.cts +1 -1
  9. package/dist/align/getSentenceRanges.d.ts +1 -1
  10. package/dist/align/getSentenceRanges.js +78 -149
  11. package/dist/align/slugify.cjs +16 -8
  12. package/dist/align/slugify.js +16 -8
  13. package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
  14. package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
  15. package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
  16. package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
  17. package/dist/errorAlign/__tests__/native.test.cjs +118 -0
  18. package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
  19. package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
  20. package/dist/errorAlign/__tests__/native.test.js +107 -0
  21. package/dist/errorAlign/backtraceGraph.cjs +298 -0
  22. package/dist/errorAlign/backtraceGraph.d.cts +103 -0
  23. package/dist/errorAlign/backtraceGraph.d.ts +103 -0
  24. package/dist/errorAlign/backtraceGraph.js +270 -0
  25. package/dist/errorAlign/beamSearch.cjs +302 -0
  26. package/dist/errorAlign/beamSearch.d.cts +53 -0
  27. package/dist/errorAlign/beamSearch.d.ts +53 -0
  28. package/dist/errorAlign/beamSearch.js +268 -0
  29. package/dist/errorAlign/core.cjs +33 -0
  30. package/dist/errorAlign/core.d.cts +5 -0
  31. package/dist/errorAlign/core.d.ts +5 -0
  32. package/dist/errorAlign/core.js +11 -0
  33. package/dist/errorAlign/editDistance.cjs +115 -0
  34. package/dist/errorAlign/editDistance.d.cts +46 -0
  35. package/dist/errorAlign/editDistance.d.ts +46 -0
  36. package/dist/errorAlign/editDistance.js +90 -0
  37. package/dist/errorAlign/errorAlign.cjs +159 -0
  38. package/dist/errorAlign/errorAlign.d.cts +15 -0
  39. package/dist/errorAlign/errorAlign.d.ts +15 -0
  40. package/dist/errorAlign/errorAlign.js +145 -0
  41. package/dist/errorAlign/graphMetadata.cjs +97 -0
  42. package/dist/errorAlign/graphMetadata.d.cts +44 -0
  43. package/dist/errorAlign/graphMetadata.d.ts +44 -0
  44. package/dist/errorAlign/graphMetadata.js +64 -0
  45. package/dist/errorAlign/hash.cjs +173 -0
  46. package/dist/errorAlign/hash.d.cts +28 -0
  47. package/dist/errorAlign/hash.d.ts +28 -0
  48. package/dist/errorAlign/hash.js +150 -0
  49. package/dist/errorAlign/native.cjs +60 -0
  50. package/dist/errorAlign/native.d.cts +18 -0
  51. package/dist/errorAlign/native.d.ts +18 -0
  52. package/dist/errorAlign/native.js +24 -0
  53. package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
  54. package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
  55. package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
  56. package/dist/errorAlign/node-gyp-build.d.js +0 -0
  57. package/dist/errorAlign/pathToAlignment.cjs +122 -0
  58. package/dist/errorAlign/pathToAlignment.d.cts +11 -0
  59. package/dist/errorAlign/pathToAlignment.d.ts +11 -0
  60. package/dist/errorAlign/pathToAlignment.js +89 -0
  61. package/dist/errorAlign/utils.cjs +301 -0
  62. package/dist/errorAlign/utils.d.cts +107 -0
  63. package/dist/errorAlign/utils.d.ts +107 -0
  64. package/dist/errorAlign/utils.js +248 -0
  65. package/dist/index.d.cts +1 -0
  66. package/dist/index.d.ts +1 -0
  67. package/dist/markup/__tests__/markup.test.cjs +108 -81
  68. package/dist/markup/__tests__/markup.test.js +109 -82
  69. package/dist/markup/__tests__/parseDom.test.cjs +112 -0
  70. package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
  71. package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
  72. package/dist/markup/__tests__/parseDom.test.js +89 -0
  73. package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
  74. package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
  75. package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
  76. package/dist/markup/__tests__/serializeDom.test.js +97 -0
  77. package/dist/markup/__tests__/transform.test.cjs +122 -0
  78. package/dist/markup/__tests__/transform.test.d.cts +2 -0
  79. package/dist/markup/__tests__/transform.test.d.ts +2 -0
  80. package/dist/markup/__tests__/transform.test.js +99 -0
  81. package/dist/markup/map.cjs +261 -0
  82. package/dist/markup/map.d.cts +50 -0
  83. package/dist/markup/map.d.ts +50 -0
  84. package/dist/markup/map.js +236 -0
  85. package/dist/markup/markup.cjs +23 -201
  86. package/dist/markup/markup.d.cts +5 -9
  87. package/dist/markup/markup.d.ts +5 -9
  88. package/dist/markup/markup.js +24 -203
  89. package/dist/markup/model.cjs +172 -0
  90. package/dist/markup/model.d.cts +57 -0
  91. package/dist/markup/model.d.ts +57 -0
  92. package/dist/markup/model.js +145 -0
  93. package/dist/markup/parseDom.cjs +59 -0
  94. package/dist/markup/parseDom.d.cts +7 -0
  95. package/dist/markup/parseDom.d.ts +7 -0
  96. package/dist/markup/parseDom.js +35 -0
  97. package/dist/markup/segmentation.cjs +11 -57
  98. package/dist/markup/segmentation.d.cts +6 -2
  99. package/dist/markup/segmentation.d.ts +6 -2
  100. package/dist/markup/segmentation.js +11 -58
  101. package/dist/markup/serializeDom.cjs +87 -0
  102. package/dist/markup/serializeDom.d.cts +7 -0
  103. package/dist/markup/serializeDom.d.ts +7 -0
  104. package/dist/markup/serializeDom.js +63 -0
  105. package/dist/markup/transform.cjs +92 -0
  106. package/dist/markup/transform.d.cts +11 -0
  107. package/dist/markup/transform.d.ts +11 -0
  108. package/dist/markup/transform.js +71 -0
  109. package/dist/types/node-gyp-build.d.cjs +1 -0
  110. package/dist/types/node-gyp-build.d.d.cts +3 -0
  111. package/dist/types/node-gyp-build.d.d.ts +3 -0
  112. package/dist/types/node-gyp-build.d.js +0 -0
  113. package/package.json +11 -4
@@ -131,13 +131,13 @@ async function assertAlignSnapshot(context, epub, transcriptionFilepaths) {
131
131
  "utf-8"
132
132
  );
133
133
  const chapterXml = import_epub.Epub.xhtmlParser.parse(chapterContents);
134
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
134
+ const { result: segmentation } = await (0, import_segmentation.getXhtmlSegmentation)(
135
135
  import_epub.Epub.getXhtmlBody(chapterXml),
136
136
  {
137
137
  primaryLocale: new Intl.Locale("en-US")
138
138
  }
139
139
  );
140
- const chapterSentences = segmentation.sentences.map((s) => s.text);
140
+ const chapterSentences = segmentation.map((s) => s.text).filter((s) => s.match(/\S/));
141
141
  for (const par of import_epub.Epub.getXmlChildren(seq)) {
142
142
  newSnapshot += `
143
143
  `;
@@ -151,14 +151,14 @@ async function assertAlignSnapshot(context, epub, transcriptionFilepaths) {
151
151
  if (sentenceId === void 0) continue;
152
152
  const textSentence = chapterSentences[parseInt(sentenceId)];
153
153
  if (!textSentence) continue;
154
- newSnapshot += `Text: ${textSentence}
154
+ newSnapshot += `Text: ${textSentence.replace(/\n/, "")}
155
155
  `;
156
156
  const audioSrc = (_d = audio[":@"]) == null ? void 0 : _d["@_src"];
157
157
  if (!audioSrc) continue;
158
158
  const audioStart = (_e = audio[":@"]) == null ? void 0 : _e["@_clipBegin"];
159
159
  const audioEnd = (_f = audio[":@"]) == null ? void 0 : _f["@_clipEnd"];
160
160
  if (!audioStart || !audioEnd) continue;
161
- const audioStartTime = parseFloat(audioStart.slice(0, -1));
161
+ const audioStartTime = parseFloat(audioStart.slice(0, -1)) - 2e-3;
162
162
  const audioEndTime = parseFloat(audioEnd.slice(0, -1));
163
163
  const audioFilename = (0, import_posix.basename)(audioSrc, (0, import_node_path.extname)(audioSrc));
164
164
  const transcriptionFilepath = transcriptionFilepaths.find(
@@ -271,7 +271,8 @@ void (0, import_node_test.describe)("align", () => {
271
271
  void 0,
272
272
  createTestLogger()
273
273
  );
274
- await aligner.alignBook();
274
+ const timing = await aligner.alignBook();
275
+ if (!process.env["CI"]) timing.print();
275
276
  await assertAlignSnapshot(context, epub, transcriptionFilepaths);
276
277
  } catch (_) {
277
278
  var _error = _, _hasError = true;
@@ -67,13 +67,13 @@ async function assertAlignSnapshot(context, epub, transcriptionFilepaths) {
67
67
  "utf-8"
68
68
  );
69
69
  const chapterXml = Epub.xhtmlParser.parse(chapterContents);
70
- const segmentation = await getXhtmlSegmentation(
70
+ const { result: segmentation } = await getXhtmlSegmentation(
71
71
  Epub.getXhtmlBody(chapterXml),
72
72
  {
73
73
  primaryLocale: new Intl.Locale("en-US")
74
74
  }
75
75
  );
76
- const chapterSentences = segmentation.sentences.map((s) => s.text);
76
+ const chapterSentences = segmentation.map((s) => s.text).filter((s) => s.match(/\S/));
77
77
  for (const par of Epub.getXmlChildren(seq)) {
78
78
  newSnapshot += `
79
79
  `;
@@ -87,14 +87,14 @@ async function assertAlignSnapshot(context, epub, transcriptionFilepaths) {
87
87
  if (sentenceId === void 0) continue;
88
88
  const textSentence = chapterSentences[parseInt(sentenceId)];
89
89
  if (!textSentence) continue;
90
- newSnapshot += `Text: ${textSentence}
90
+ newSnapshot += `Text: ${textSentence.replace(/\n/, "")}
91
91
  `;
92
92
  const audioSrc = (_d = audio[":@"]) == null ? void 0 : _d["@_src"];
93
93
  if (!audioSrc) continue;
94
94
  const audioStart = (_e = audio[":@"]) == null ? void 0 : _e["@_clipBegin"];
95
95
  const audioEnd = (_f = audio[":@"]) == null ? void 0 : _f["@_clipEnd"];
96
96
  if (!audioStart || !audioEnd) continue;
97
- const audioStartTime = parseFloat(audioStart.slice(0, -1));
97
+ const audioStartTime = parseFloat(audioStart.slice(0, -1)) - 2e-3;
98
98
  const audioEndTime = parseFloat(audioEnd.slice(0, -1));
99
99
  const audioFilename = posixBasename(audioSrc, extname(audioSrc));
100
100
  const transcriptionFilepath = transcriptionFilepaths.find(
@@ -207,7 +207,8 @@ void describe("align", () => {
207
207
  void 0,
208
208
  createTestLogger()
209
209
  );
210
- await aligner.alignBook();
210
+ const timing = await aligner.alignBook();
211
+ if (!process.env["CI"]) timing.print();
211
212
  await assertAlignSnapshot(context, epub, transcriptionFilepaths);
212
213
  } catch (_) {
213
214
  var _error = _, _hasError = true;
@@ -81,7 +81,9 @@ module.exports = __toCommonJS(align_exports);
81
81
  var import_promises = require("node:fs/promises");
82
82
  var import_node_path = require("node:path");
83
83
  var import_posix = require("node:path/posix");
84
+ var import_itertools = require("itertools");
84
85
  var import_memoize = __toESM(require("memoize"), 1);
86
+ var import_runes2 = require("runes2");
85
87
  var import_audiobook = require("@storyteller-platform/audiobook");
86
88
  var import_epub = require("@storyteller-platform/epub");
87
89
  var import_ghost_story = require("@storyteller-platform/ghost-story");
@@ -90,7 +92,6 @@ var import_segmentation = require("../markup/segmentation.cjs");
90
92
  var import_fuzzy = require("./fuzzy.cjs");
91
93
  var import_getSentenceRanges = require("./getSentenceRanges.cjs");
92
94
  var import_slugify = require("./slugify.cjs");
93
- const OFFSET_SEARCH_WINDOW_SIZE = 5e3;
94
95
  async function align(input, output, transcriptionsDir, audiobookDir, options) {
95
96
  var _stack = [];
96
97
  try {
@@ -141,6 +142,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
141
142
  class Aligner {
142
143
  constructor(epub, audiofiles, transcriptions, granularity, languageOverride, logger) {
143
144
  this.epub = epub;
145
+ this.audiofiles = audiofiles;
144
146
  this.languageOverride = languageOverride;
145
147
  this.logger = logger;
146
148
  this.transcription = concatTranscriptions(transcriptions, audiofiles);
@@ -155,71 +157,92 @@ class Aligner {
155
157
  report = {
156
158
  chapters: []
157
159
  };
158
- findBestOffset(epubSentences, transcriptionText, lastMatchOffset, mapping) {
159
- let i = 0;
160
- while (i < transcriptionText.length) {
161
- let startSentence = 0;
162
- const proposedStartIndex = (lastMatchOffset + i) % transcriptionText.length;
163
- const proposedEndIndex = (proposedStartIndex + OFFSET_SEARCH_WINDOW_SIZE) % transcriptionText.length;
164
- const wrapping = proposedEndIndex < proposedStartIndex;
165
- let endIndex = wrapping ? transcriptionText.length : proposedEndIndex;
166
- let startIndex = proposedStartIndex;
167
- let startSeen = null;
168
- let endSeen = null;
169
- for (const aligned of this.alignedChapters) {
170
- const alignedStart = mapping.map(aligned.startOffset, -1);
171
- const alignedEnd = mapping.map(aligned.endOffset, -1);
172
- if (startSeen !== null && endSeen === alignedStart) {
173
- endSeen = alignedEnd;
174
- } else {
175
- startSeen = alignedStart;
176
- endSeen = alignedEnd;
177
- }
178
- if (startIndex >= startSeen && startIndex < endSeen) {
179
- startIndex = endSeen;
180
- }
181
- if (endIndex >= startSeen && endIndex <= endSeen) {
182
- endIndex = startSeen;
160
+ findBestOffset(epubSentences, transcriptionText, lastMatchOffset, dir = 1) {
161
+ const reverse = dir < 0;
162
+ if (dir < 0) {
163
+ epubSentences = epubSentences.toReversed().map((s) => (0, import_runes2.runes)(s).toReversed().join(""));
164
+ transcriptionText = (0, import_runes2.runes)(transcriptionText).toReversed().join("");
165
+ lastMatchOffset = transcriptionText.length - lastMatchOffset;
166
+ }
167
+ const flatSliceIndices = [
168
+ 0,
169
+ ...this.alignedChapters.toSorted(
170
+ (a, b) => reverse ? transcriptionText.length - a.endOffset - (transcriptionText.length - b.endOffset) : a.startOffset - b.startOffset
171
+ ).flatMap((aligned) => [
172
+ reverse ? transcriptionText.length - aligned.endOffset : aligned.startOffset,
173
+ reverse ? transcriptionText.length - aligned.startOffset : aligned.endOffset
174
+ ]),
175
+ transcriptionText.length
176
+ ];
177
+ const sliceIndices = [];
178
+ for (let i = 0; i < flatSliceIndices.length - 1; i += 2) {
179
+ sliceIndices.push([flatSliceIndices[i], flatSliceIndices[i + 1]]);
180
+ }
181
+ const allSlices = [];
182
+ let startSlice = 0;
183
+ for (const [i, [start, end]] of (0, import_itertools.enumerate)(sliceIndices)) {
184
+ if (lastMatchOffset >= start && lastMatchOffset < end) {
185
+ if (!reverse) {
186
+ startSlice = i + 1;
187
+ allSlices.push({
188
+ start,
189
+ text: transcriptionText.slice(start, lastMatchOffset)
190
+ });
183
191
  }
192
+ allSlices.push({
193
+ start: lastMatchOffset,
194
+ text: transcriptionText.slice(lastMatchOffset, end)
195
+ });
196
+ } else if (!reverse) {
197
+ allSlices.push({ start, text: transcriptionText.slice(start, end) });
198
+ }
199
+ }
200
+ const slices = allSlices.filter((slice) => slice.text.length);
201
+ if (reverse && !slices.length) {
202
+ const indices = sliceIndices.find(([start]) => start > lastMatchOffset);
203
+ if (indices) {
204
+ slices.push({
205
+ start: indices[0],
206
+ text: transcriptionText.slice(...indices)
207
+ });
184
208
  }
185
- if (startIndex < endIndex) {
186
- const transcriptionTextSlice = transcriptionText.slice(
187
- startIndex,
188
- endIndex
209
+ }
210
+ for (const slice of slices.slice(startSlice).concat(slices.slice(0, startSlice))) {
211
+ let startSentence = 0;
212
+ while (startSentence < epubSentences.length) {
213
+ const needle = epubSentences.slice(startSentence, startSentence + 6).join("-");
214
+ const firstMatch = (0, import_fuzzy.findNearestMatch)(
215
+ needle,
216
+ slice.text,
217
+ Math.max(Math.floor(0.1 * needle.length), 1)
189
218
  );
190
- while (startSentence < epubSentences.length) {
191
- const queryString = epubSentences.slice(startSentence, startSentence + 6).join("-");
192
- const firstMatch = (0, import_fuzzy.findNearestMatch)(
193
- queryString.toLowerCase(),
194
- transcriptionTextSlice.toLowerCase(),
195
- Math.max(Math.floor(0.1 * queryString.length), 1)
196
- );
197
- if (firstMatch) {
198
- return {
199
- startSentence,
200
- transcriptionOffset: (firstMatch.index + startIndex) % transcriptionText.length
201
- };
202
- }
203
- startSentence += 3;
219
+ if (firstMatch) {
220
+ const start = reverse ? transcriptionText.length - (slice.start + firstMatch.index) : slice.start + firstMatch.index;
221
+ return {
222
+ startSentence: reverse ? epubSentences.length - startSentence : startSentence,
223
+ transcriptionOffset: start
224
+ };
204
225
  }
226
+ startSentence += 3;
205
227
  }
206
- if (wrapping) {
207
- i += transcriptionText.length - proposedStartIndex;
208
- } else {
209
- i += Math.floor(OFFSET_SEARCH_WINDOW_SIZE / 2);
210
- }
228
+ }
229
+ if (reverse) {
230
+ return {
231
+ startSentence: epubSentences.length,
232
+ transcriptionOffset: slices[0] ? transcriptionText.length - slices[0].start : null
233
+ };
211
234
  }
212
235
  return { startSentence: 0, transcriptionOffset: null };
213
236
  }
214
237
  async getChapterSentences(chapterId) {
215
238
  const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
216
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
239
+ const { result: segmentation } = await (0, import_segmentation.getXhtmlSegmentation)(
217
240
  import_epub.Epub.getXhtmlBody(chapterXml),
218
241
  {
219
242
  primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
220
243
  }
221
244
  );
222
- return segmentation.sentences.map((s) => s.text);
245
+ return segmentation.map((s) => s.text).filter((s) => s.match(/\S/));
223
246
  }
224
247
  async writeAlignedChapter(alignedChapter) {
225
248
  const { chapter, sentenceRanges, xml } = alignedChapter;
@@ -314,7 +337,7 @@ class Aligner {
314
337
  }, [])
315
338
  });
316
339
  }
317
- async alignChapter(startSentence, chapterId, transcriptionOffset, locale, lastSentenceRange) {
340
+ async alignChapter(startSentence, endSentence, chapterId, transcriptionOffset, transcriptionEndOffset, locale, mapping) {
318
341
  const timing = (0, import_ghost_story.createTiming)();
319
342
  timing.start("read contents");
320
343
  const manifest = await this.epub.getManifest();
@@ -331,20 +354,14 @@ class Aligner {
331
354
  timing.start("align sentences");
332
355
  const { sentenceRanges, transcriptionOffset: endTranscriptionOffset } = await (0, import_getSentenceRanges.getSentenceRanges)(
333
356
  startSentence,
357
+ endSentence,
334
358
  this.transcription,
335
359
  chapterSentences,
336
360
  transcriptionOffset,
337
- locale,
338
- lastSentenceRange
361
+ transcriptionEndOffset,
362
+ locale
339
363
  );
340
364
  timing.end("align sentences");
341
- timing.start("expand ranges");
342
- const interpolated = await (0, import_getSentenceRanges.interpolateSentenceRanges)(
343
- sentenceRanges,
344
- lastSentenceRange
345
- );
346
- const expanded = (0, import_getSentenceRanges.expandEmptySentenceRanges)(interpolated);
347
- timing.end("expand ranges");
348
365
  const storytellerStylesheetUrl = (0, import_posix.relative)(
349
366
  (0, import_posix.dirname)(chapter.href),
350
367
  "Styles/storyteller-readaloud.css"
@@ -357,25 +374,25 @@ class Aligner {
357
374
  this.alignedChapters.push({
358
375
  chapter,
359
376
  xml: chapterXml,
360
- sentenceRanges: expanded,
361
- startOffset: transcriptionOffset,
362
- endOffset: endTranscriptionOffset
377
+ sentenceRanges,
378
+ startOffset: mapping.map(transcriptionOffset),
379
+ endOffset: mapping.map(endTranscriptionOffset, -1)
363
380
  });
364
381
  this.addChapterReport(
365
382
  chapter,
366
383
  chapterSentences,
367
- expanded,
384
+ sentenceRanges,
368
385
  startSentence,
369
386
  transcriptionOffset
370
387
  );
371
388
  return {
372
- lastSentenceRange: expanded[expanded.length - 1] ?? null,
389
+ lastSentenceRange: sentenceRanges.at(-1) ?? null,
373
390
  endTranscriptionOffset,
374
391
  timing
375
392
  };
376
393
  }
377
394
  async alignBook(onProgress) {
378
- var _a, _b, _c, _d, _e, _f, _g;
395
+ var _a, _b, _c, _d, _e, _f, _g, _h;
379
396
  const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
380
397
  this.timing.setMetadata("language", locale.toString());
381
398
  this.timing.setMetadata("granularity", this.granularity);
@@ -386,7 +403,6 @@ class Aligner {
386
403
  locale
387
404
  );
388
405
  let lastTranscriptionOffset = 0;
389
- let lastSentenceRange = null;
390
406
  for (let index = 0; index < spine.length; index++) {
391
407
  onProgress == null ? void 0 : onProgress(index / spine.length);
392
408
  const spineItem = spine[index];
@@ -418,36 +434,72 @@ class Aligner {
418
434
  const { startSentence, transcriptionOffset: slugifiedOffset } = this.findBestOffset(
419
435
  slugifiedChapterSentences,
420
436
  transcriptionText,
421
- mapping.map(lastTranscriptionOffset, -1),
422
- mapping
437
+ mapping.map(lastTranscriptionOffset, -1)
423
438
  );
424
- const transcriptionOffset = slugifiedOffset && mapping.invert().map(slugifiedOffset, -1);
425
- if (transcriptionOffset === null) {
439
+ if (slugifiedOffset === null) {
426
440
  (_f = this.logger) == null ? void 0 : _f.info(
427
441
  `Couldn't find matching transcription for chapter #${index}`
428
442
  );
429
443
  continue;
430
444
  }
431
- (_g = this.logger) == null ? void 0 : _g.info(
432
- `Chapter #${index} best matches transcription at offset ${transcriptionOffset}, starting at sentence ${startSentence}`
445
+ const transcriptionOffset = mapping.invert().map(slugifiedOffset, -1);
446
+ const {
447
+ startSentence: startEndSentence,
448
+ transcriptionOffset: slugifiedEndOffset
449
+ } = this.findBestOffset(
450
+ slugifiedChapterSentences,
451
+ transcriptionText,
452
+ Math.min(
453
+ transcriptionText.length,
454
+ slugifiedOffset + Math.round(slugifiedChapterSentences.join("-").length * 1.2)
455
+ ),
456
+ -1
457
+ );
458
+ const endSentence = startEndSentence;
459
+ const endOffset = slugifiedEndOffset === null ? this.transcription.transcript.length : mapping.invert().map(slugifiedEndOffset, 1);
460
+ if (endSentence - startSentence < slugifiedChapterSentences.length / 2) {
461
+ (_g = this.logger) == null ? void 0 : _g.info(`Found less than half of chapter #${index}, skipping`);
462
+ }
463
+ (_h = this.logger) == null ? void 0 : _h.info(
464
+ `Chapter #${index} best matches transcription from ${transcriptionOffset} to ${endOffset}, from sentence ${startSentence} to ${endSentence} (of ${slugifiedChapterSentences.length}) in the book`
433
465
  );
434
466
  const result = await this.alignChapter(
435
467
  startSentence,
468
+ endSentence,
436
469
  chapterId,
437
470
  transcriptionOffset,
471
+ endOffset,
438
472
  locale,
439
- lastSentenceRange
473
+ mapping
440
474
  );
441
- lastSentenceRange = result.lastSentenceRange;
442
475
  lastTranscriptionOffset = result.endTranscriptionOffset;
443
476
  this.timing.add(result.timing.summary());
444
477
  }
445
- if (lastSentenceRange) {
446
- lastSentenceRange.end = await (0, import_ffmpeg.getTrackDuration)(
447
- lastSentenceRange.audiofile
478
+ const audioOrderedChapters = this.alignedChapters.toSorted((a, b) => {
479
+ const firstRangeA = a.sentenceRanges[0];
480
+ const firstRangeB = b.sentenceRanges[0];
481
+ if (!firstRangeA) return 1;
482
+ if (!firstRangeB) return -1;
483
+ const firstAudiofileIndexA = this.audiofiles.indexOf(
484
+ firstRangeA.audiofile
448
485
  );
449
- }
450
- for (const alignedChapter of this.alignedChapters) {
486
+ const firstAudiofileIndexB = this.audiofiles.indexOf(
487
+ firstRangeB.audiofile
488
+ );
489
+ if (firstAudiofileIndexA === firstAudiofileIndexB) {
490
+ return firstRangeA.start - firstRangeB.start;
491
+ }
492
+ return firstAudiofileIndexA - firstAudiofileIndexB;
493
+ });
494
+ let lastSentenceRange = null;
495
+ for (const alignedChapter of audioOrderedChapters) {
496
+ const interpolated = await (0, import_getSentenceRanges.interpolateSentenceRanges)(
497
+ alignedChapter.sentenceRanges,
498
+ lastSentenceRange
499
+ );
500
+ const expanded = (0, import_getSentenceRanges.expandEmptySentenceRanges)(interpolated);
501
+ alignedChapter.sentenceRanges = expanded;
502
+ lastSentenceRange = expanded.at(-1) ?? null;
451
503
  await this.writeAlignedChapter(alignedChapter);
452
504
  }
453
505
  await this.epub.addMetadata({
@@ -37,6 +37,7 @@ interface AlignOptions {
37
37
  declare function align(input: string, output: string, transcriptionsDir: string, audiobookDir: string, options: AlignOptions): Promise<_storyteller_platform_ghost_story.TimingAggregator>;
38
38
  declare class Aligner {
39
39
  epub: Epub;
40
+ private audiofiles;
40
41
  private languageOverride?;
41
42
  private logger?;
42
43
  private transcription;
@@ -37,6 +37,7 @@ interface AlignOptions {
37
37
  declare function align(input: string, output: string, transcriptionsDir: string, audiobookDir: string, options: AlignOptions): Promise<_storyteller_platform_ghost_story.TimingAggregator>;
38
38
  declare class Aligner {
39
39
  epub: Epub;
40
+ private audiofiles;
40
41
  private languageOverride?;
41
42
  private logger?;
42
43
  private transcription;