@storyteller-platform/align 0.1.26 → 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,6 +26,7 @@ __export(getSentenceRanges_exports, {
26
26
  mapTranscriptionTimeline: () => mapTranscriptionTimeline
27
27
  });
28
28
  module.exports = __toCommonJS(getSentenceRanges_exports);
29
+ var import_fastest_levenshtein = require("fastest-levenshtein");
29
30
  var import_itertools = require("itertools");
30
31
  var import_runes2 = require("runes2");
31
32
  var import_ffmpeg = require("../common/ffmpeg.cjs");
@@ -66,6 +67,9 @@ function getAlignmentsForSentence(sentence, alignments) {
66
67
  if (alignment.opType === "DELETE" || alignment.opType === "INSERT" && sentenceIndex > 0) {
67
68
  score -= (alignment.ref ?? alignment.hyp).length + 1;
68
69
  }
70
+ if (alignment.opType === "SUBSTITUTE") {
71
+ score -= (0, import_fastest_levenshtein.distance)(alignment.ref, alignment.hyp);
72
+ }
69
73
  result.push(alignment);
70
74
  }
71
75
  return {
@@ -210,6 +214,8 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
210
214
  }
211
215
  let alignmentIndex = 0;
212
216
  let currentTranscriptWindowIndex = 0;
217
+ let lastGoodSentenceIndex = slice[0] - 1;
218
+ let lastGoodTranscriptWindowIndex = -1;
213
219
  for (const [j, slugifiedSentence] of (0, import_itertools.enumerate)(
214
220
  slugifiedChapterSentenceWindowList
215
221
  )) {
@@ -220,6 +226,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
220
226
  );
221
227
  const sentenceLengthInSlugifiedTranscript = sentenceAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
222
228
  if (score > 0) {
229
+ lastGoodSentenceIndex = j + slice[0];
223
230
  const start = findStartTimestamp(
224
231
  chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
225
232
  mappedTimeline
@@ -295,9 +302,21 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
295
302
  if (slugifiedChapterTranscriptWindow[currentTranscriptWindowIndex] === "-") {
296
303
  currentTranscriptWindowIndex++;
297
304
  }
305
+ if (score > 0) {
306
+ lastGoodTranscriptWindowIndex = currentTranscriptWindowIndex;
307
+ }
308
+ }
309
+ if (lastGoodSentenceIndex === -1) {
310
+ return {
311
+ sentenceRanges,
312
+ wordRanges,
313
+ transcriptionOffset: chapterTranscriptEndIndex,
314
+ firstFoundSentence,
315
+ lastFoundSentence: chapterSentenceIndex - 1
316
+ };
298
317
  }
299
- chapterSentenceIndex = i;
300
- slugifiedChapterTranscriptWindowStartIndex += currentTranscriptWindowIndex;
318
+ chapterSentenceIndex += lastGoodSentenceIndex + 1;
319
+ slugifiedChapterTranscriptWindowStartIndex += lastGoodTranscriptWindowIndex;
301
320
  if (slugifiedChapterTranscript[slugifiedChapterTranscriptWindowStartIndex] === "-") {
302
321
  slugifiedChapterTranscriptWindowStartIndex++;
303
322
  }
@@ -1,4 +1,5 @@
1
1
  import "../chunk-BIEQXUOY.js";
2
+ import { distance } from "fastest-levenshtein";
2
3
  import { enumerate } from "itertools";
3
4
  import { runes } from "runes2";
4
5
  import { getTrackDuration } from "../common/ffmpeg.js";
@@ -39,6 +40,9 @@ function getAlignmentsForSentence(sentence, alignments) {
39
40
  if (alignment.opType === "DELETE" || alignment.opType === "INSERT" && sentenceIndex > 0) {
40
41
  score -= (alignment.ref ?? alignment.hyp).length + 1;
41
42
  }
43
+ if (alignment.opType === "SUBSTITUTE") {
44
+ score -= distance(alignment.ref, alignment.hyp);
45
+ }
42
46
  result.push(alignment);
43
47
  }
44
48
  return {
@@ -183,6 +187,8 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
183
187
  }
184
188
  let alignmentIndex = 0;
185
189
  let currentTranscriptWindowIndex = 0;
190
+ let lastGoodSentenceIndex = slice[0] - 1;
191
+ let lastGoodTranscriptWindowIndex = -1;
186
192
  for (const [j, slugifiedSentence] of enumerate(
187
193
  slugifiedChapterSentenceWindowList
188
194
  )) {
@@ -193,6 +199,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
193
199
  );
194
200
  const sentenceLengthInSlugifiedTranscript = sentenceAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
195
201
  if (score > 0) {
202
+ lastGoodSentenceIndex = j + slice[0];
196
203
  const start = findStartTimestamp(
197
204
  chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
198
205
  mappedTimeline
@@ -268,9 +275,21 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
268
275
  if (slugifiedChapterTranscriptWindow[currentTranscriptWindowIndex] === "-") {
269
276
  currentTranscriptWindowIndex++;
270
277
  }
278
+ if (score > 0) {
279
+ lastGoodTranscriptWindowIndex = currentTranscriptWindowIndex;
280
+ }
281
+ }
282
+ if (lastGoodSentenceIndex === -1) {
283
+ return {
284
+ sentenceRanges,
285
+ wordRanges,
286
+ transcriptionOffset: chapterTranscriptEndIndex,
287
+ firstFoundSentence,
288
+ lastFoundSentence: chapterSentenceIndex - 1
289
+ };
271
290
  }
272
- chapterSentenceIndex = i;
273
- slugifiedChapterTranscriptWindowStartIndex += currentTranscriptWindowIndex;
291
+ chapterSentenceIndex += lastGoodSentenceIndex + 1;
292
+ slugifiedChapterTranscriptWindowStartIndex += lastGoodTranscriptWindowIndex;
274
293
  if (slugifiedChapterTranscript[slugifiedChapterTranscriptWindowStartIndex] === "-") {
275
294
  slugifiedChapterTranscriptWindowStartIndex++;
276
295
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@storyteller-platform/align",
3
- "version": "0.1.26",
3
+ "version": "0.1.27",
4
4
  "description": "A library and CLI for automatically aligning audiobooks and EPUBs to produce Media Overlays",
5
5
  "author": "Shane Friedman",
6
6
  "license": "MIT",
@@ -66,6 +66,7 @@
66
66
  "chalk": "^5.4.1",
67
67
  "cli-progress": "^3.12.0",
68
68
  "esbuild": "^0.27.3",
69
+ "fastest-levenshtein": "^1.0.16",
69
70
  "itertools": "^2.6.0",
70
71
  "locale-currency": "^1.0.0",
71
72
  "memoize": "^10.2.0",