@storyteller-platform/align 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,11 +18,13 @@ var __copyProps = (to, from, except, desc) => {
18
18
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
19
  var getSentenceRanges_exports = {};
20
20
  __export(getSentenceRanges_exports, {
21
+ collapseSentenceRangeGaps: () => collapseSentenceRangeGaps,
21
22
  expandEmptySentenceRanges: () => expandEmptySentenceRanges,
22
23
  findEndTimestamp: () => findEndTimestamp,
23
24
  getChapterDuration: () => getChapterDuration,
24
25
  getSentenceRanges: () => getSentenceRanges,
25
- interpolateSentenceRanges: () => interpolateSentenceRanges
26
+ interpolateSentenceRanges: () => interpolateSentenceRanges,
27
+ mapTranscriptionTimeline: () => mapTranscriptionTimeline
26
28
  });
27
29
  module.exports = __toCommonJS(getSentenceRanges_exports);
28
30
  var import_itertools = require("itertools");
@@ -31,9 +33,9 @@ var import_ffmpeg = require("../common/ffmpeg.cjs");
31
33
  var import_errorAlign = require("../errorAlign/errorAlign.cjs");
32
34
  var import_utils = require("../errorAlign/utils.cjs");
33
35
  var import_slugify = require("./slugify.cjs");
34
- function findStartTimestamp(matchStartIndex, transcription) {
35
- const entry = transcription.timeline.find(
36
- (entry2) => (entry2.endOffsetUtf16 ?? 0) > matchStartIndex
36
+ function findStartTimestamp(matchStartIndex, timeline) {
37
+ const entry = timeline.find(
38
+ (entry2) => entry2.mappedEndOffsetUtf16 > matchStartIndex
37
39
  );
38
40
  if (!entry) return null;
39
41
  return {
@@ -42,9 +44,9 @@ function findStartTimestamp(matchStartIndex, transcription) {
42
44
  audiofile: entry.audiofile
43
45
  };
44
46
  }
45
- function findEndTimestamp(matchEndIndex, transcription) {
46
- const entry = transcription.timeline.findLast(
47
- (entry2) => (entry2.startOffsetUtf16 ?? 0) < matchEndIndex
47
+ function findEndTimestamp(matchEndIndex, timeline) {
48
+ const entry = timeline.findLast(
49
+ (entry2) => entry2.mappedStartOffsetUtf16 < matchEndIndex
48
50
  );
49
51
  if (!entry) return null;
50
52
  return {
@@ -135,17 +137,23 @@ function errorAlignWithNarrowing(refSentences, hyp, narrowStart, narrowEnd) {
135
137
  );
136
138
  return { alignments, slice: [slice[0] + narrowed[0], slice[0] + narrowed[1]] };
137
139
  }
138
- async function getSentenceRanges(transcription, sentences, chapterOffset, chapterEndOffset, locale) {
140
+ function mapTranscriptionTimeline(transcription, mapping) {
141
+ return transcription.timeline.map((entry) => ({
142
+ ...entry,
143
+ mappedStartOffsetUtf16: mapping.map(entry.startOffsetUtf16 ?? 0, 1),
144
+ mappedEndOffsetUtf16: mapping.map(entry.endOffsetUtf16 ?? 0, -1)
145
+ }));
146
+ }
147
+ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, chapterId, chapterOffset, chapterEndOffset, granularity, locale) {
139
148
  const sentenceRanges = [];
140
- const fullTranscript = transcription.transcript;
141
- const chapterTranscript = fullTranscript.slice(
149
+ const wordRanges = [];
150
+ const slugifiedChapterTranscript = transcriptionText.slice(
142
151
  chapterOffset,
143
152
  chapterEndOffset
144
153
  );
145
- const { result: slugifiedChapterTranscript, mapping: transcriptMapping } = await (0, import_slugify.slugify)(chapterTranscript, locale);
146
154
  const slugifiedChapterSentences = [];
147
155
  for (const s of sentences) {
148
- const { result } = await (0, import_slugify.slugify)(s, locale);
156
+ const { result } = await (0, import_slugify.slugify)(s.text, locale);
149
157
  slugifiedChapterSentences.push(result);
150
158
  }
151
159
  let firstFoundSentence = 0;
@@ -214,21 +222,16 @@ async function getSentenceRanges(transcription, sentences, chapterOffset, chapte
214
222
  const sentenceLengthInSlugifiedTranscript = sentenceAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
215
223
  if (score > 0) {
216
224
  const start = findStartTimestamp(
217
- chapterOffset + transcriptMapping.invert().map(
218
- slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
219
- 1
220
- ),
221
- transcription
222
- );
223
- chapterTranscriptEndIndex = chapterOffset + transcriptMapping.invert().map(
224
- slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex + sentenceLengthInSlugifiedTranscript,
225
- -1
225
+ chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
226
+ mappedTimeline
226
227
  );
227
- const end = findEndTimestamp(chapterTranscriptEndIndex, transcription);
228
+ chapterTranscriptEndIndex = chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex + sentenceLengthInSlugifiedTranscript;
229
+ const end = findEndTimestamp(chapterTranscriptEndIndex, mappedTimeline);
228
230
  if (start && end) {
229
231
  if (start.audiofile !== end.audiofile) {
230
232
  sentenceRanges.push({
231
233
  id: j + chapterSentenceIndex + slice[0],
234
+ chapterId,
232
235
  start: 0,
233
236
  audiofile: end.audiofile,
234
237
  end: end.end
@@ -236,12 +239,56 @@ async function getSentenceRanges(transcription, sentences, chapterOffset, chapte
236
239
  } else {
237
240
  sentenceRanges.push({
238
241
  id: j + chapterSentenceIndex + slice[0],
242
+ chapterId,
239
243
  start: start.start,
240
244
  audiofile: start.audiofile,
241
245
  end: end.end
242
246
  });
243
247
  }
244
248
  }
249
+ if (granularity === "word") {
250
+ const sentenceSegmentation = sentences[j + chapterSentenceIndex + slice[0]];
251
+ const words = [];
252
+ for (const entry of sentenceSegmentation.words.entries) {
253
+ if (!entry.text.match(/\S/)) continue;
254
+ const { result } = await (0, import_slugify.slugify)(entry.text, locale);
255
+ words.push(result);
256
+ }
257
+ let currentTranscriptWordWindowIndex = currentTranscriptWindowIndex;
258
+ let sentenceAlignmentIndex = 0;
259
+ const perSentenceWordRanges = [];
260
+ for (const [k, word] of (0, import_itertools.enumerate)(words)) {
261
+ if (!word) continue;
262
+ const { alignments: wordAlignments } = getAlignmentsForSentence(
263
+ word,
264
+ sentenceAlignments.slice(sentenceAlignmentIndex)
265
+ );
266
+ const wordLengthInSlugifiedTranscript = wordAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
267
+ const start2 = findStartTimestamp(
268
+ chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWordWindowIndex,
269
+ mappedTimeline
270
+ );
271
+ const end2 = findEndTimestamp(
272
+ chapterOffset + slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWordWindowIndex + wordLengthInSlugifiedTranscript,
273
+ mappedTimeline
274
+ );
275
+ if (start2 && end2) {
276
+ perSentenceWordRanges.push({
277
+ id: k,
278
+ sentenceId: j + chapterSentenceIndex + slice[0],
279
+ start: end2.audiofile === start2.audiofile ? start2.start : 0,
280
+ audiofile: end2.audiofile,
281
+ end: end2.end
282
+ });
283
+ }
284
+ sentenceAlignmentIndex += wordAlignments.length;
285
+ currentTranscriptWordWindowIndex += wordLengthInSlugifiedTranscript;
286
+ if (slugifiedChapterTranscriptWindow[currentTranscriptWordWindowIndex] === "-") {
287
+ currentTranscriptWordWindowIndex++;
288
+ }
289
+ }
290
+ wordRanges.push(perSentenceWordRanges);
291
+ }
245
292
  }
246
293
  alignmentIndex += sentenceAlignments.length;
247
294
  currentTranscriptWindowIndex += sentenceLengthInSlugifiedTranscript;
@@ -257,6 +304,7 @@ async function getSentenceRanges(transcription, sentences, chapterOffset, chapte
257
304
  }
258
305
  return {
259
306
  sentenceRanges,
307
+ wordRanges,
260
308
  transcriptionOffset: chapterTranscriptEndIndex,
261
309
  firstFoundSentence,
262
310
  lastFoundSentence
@@ -268,66 +316,53 @@ async function getLargestGap(trailing, leading) {
268
316
  if (trailingGap > leadingGap) return [trailingGap, trailing.audiofile];
269
317
  return [leadingGap, leading.audiofile];
270
318
  }
271
- async function interpolateSentenceRanges(sentenceRanges, lastSentenceRange) {
319
+ async function interpolateSentenceRanges(sentenceRanges, chapterSentenceCounts) {
272
320
  const interpolated = [];
273
- const [first, ...rest] = sentenceRanges;
274
- if (!first) return interpolated;
275
- if (first.id !== 0) {
276
- const count = first.id;
277
- const crossesAudioBoundary = !lastSentenceRange || first.audiofile !== lastSentenceRange.audiofile;
278
- let diff = crossesAudioBoundary ? first.start : first.start - lastSentenceRange.end;
279
- if (!crossesAudioBoundary && diff <= 0) {
280
- diff = 0.25;
281
- lastSentenceRange.end = first.start - diff;
282
- }
283
- const interpolatedLength = diff / count;
284
- const start = crossesAudioBoundary ? 0 : lastSentenceRange.end;
285
- for (let i = 0; i < count; i++) {
286
- interpolated.push({
287
- id: i,
288
- start: start + interpolatedLength * i,
289
- end: start + interpolatedLength * (i + 1),
290
- audiofile: first.audiofile
291
- });
292
- }
293
- interpolated.push(first);
294
- } else {
295
- rest.unshift(first);
296
- }
297
- for (const sentenceRange of rest) {
298
- if (interpolated.length === 0) {
299
- interpolated.push(sentenceRange);
300
- continue;
301
- }
302
- const lastSentenceRange2 = interpolated[interpolated.length - 1];
303
- const count = sentenceRange.id - lastSentenceRange2.id - 1;
321
+ for (let i = 0; i < sentenceRanges.length; i++) {
322
+ const endRange = sentenceRanges[i];
323
+ const startRange = sentenceRanges[i - 1] ?? {
324
+ id: 0,
325
+ audiofile: endRange.audiofile,
326
+ chapterId: endRange.chapterId,
327
+ start: 0,
328
+ end: 0
329
+ };
330
+ const newChapter = startRange.chapterId !== endRange.chapterId;
331
+ const newAudiofile = startRange.audiofile !== endRange.audiofile;
332
+ const count = newChapter ? chapterSentenceCounts[startRange.chapterId] - startRange.id - 1 : endRange.id - startRange.id - 1;
304
333
  if (count === 0) {
305
- interpolated.push(sentenceRange);
334
+ interpolated.push(endRange);
306
335
  continue;
307
336
  }
308
- const crossesAudioBoundary = sentenceRange.audiofile !== lastSentenceRange2.audiofile;
309
- let [diff, audiofile] = crossesAudioBoundary ? await getLargestGap(lastSentenceRange2, sentenceRange) : [sentenceRange.start - lastSentenceRange2.end, sentenceRange.audiofile];
337
+ let [diff, audiofile] = newAudiofile ? await getLargestGap(startRange, endRange) : [endRange.start - startRange.end, endRange.audiofile];
310
338
  if (diff <= 0) {
311
- if (crossesAudioBoundary) {
312
- const rangeLength = sentenceRange.end - sentenceRange.start;
339
+ if (newAudiofile) {
340
+ const rangeLength = endRange.end - endRange.start;
313
341
  diff = rangeLength < 0.5 ? rangeLength / 2 : 0.25;
314
- sentenceRange.start = diff;
342
+ endRange.start = diff;
315
343
  } else {
316
344
  diff = 0.25;
317
- lastSentenceRange2.end = sentenceRange.start - diff;
345
+ startRange.end = startRange.start - diff;
318
346
  }
319
347
  }
320
348
  const interpolatedLength = diff / count;
321
- const start = crossesAudioBoundary ? 0 : lastSentenceRange2.end;
322
- for (let i = 0; i < count; i++) {
349
+ const start = newAudiofile ? 0 : startRange.end;
350
+ for (let i2 = 0; i2 < count; i2++) {
351
+ let id = startRange.id + i2 + 1;
352
+ let chapterId = startRange.chapterId;
353
+ if (newChapter && i2 > chapterSentenceCounts[startRange.chapterId] - startRange.id) {
354
+ id = i2;
355
+ chapterId = endRange.chapterId;
356
+ }
323
357
  interpolated.push({
324
- id: lastSentenceRange2.id + i + 1,
325
- start: start + interpolatedLength * i,
326
- end: start + interpolatedLength * (i + 1),
358
+ id,
359
+ chapterId,
360
+ start: start + interpolatedLength * i2,
361
+ end: start + interpolatedLength * (i2 + 1),
327
362
  audiofile
328
363
  });
329
364
  }
330
- interpolated.push(sentenceRange);
365
+ interpolated.push(endRange);
331
366
  }
332
367
  return interpolated;
333
368
  }
@@ -345,6 +380,17 @@ function expandEmptySentenceRanges(sentenceRanges) {
345
380
  }
346
381
  return expandedRanges;
347
382
  }
383
+ async function collapseSentenceRangeGaps(sentenceRanges) {
384
+ const collapsed = [];
385
+ for (const [i, sentenceRange] of (0, import_itertools.enumerate)(sentenceRanges)) {
386
+ const nextSentence = sentenceRanges[i + 1];
387
+ const prevSentence = sentenceRanges[i - 1];
388
+ const start = prevSentence?.audiofile !== sentenceRange.audiofile ? 0 : sentenceRange.start;
389
+ const end = nextSentence?.audiofile !== sentenceRange.audiofile ? await (0, import_ffmpeg.getTrackDuration)(sentenceRange.audiofile) : nextSentence.start;
390
+ collapsed.push({ ...sentenceRange, start, end });
391
+ }
392
+ return collapsed;
393
+ }
348
394
  function getChapterDuration(sentenceRanges) {
349
395
  let i = 0;
350
396
  let duration = 0;
@@ -366,9 +412,11 @@ function getChapterDuration(sentenceRanges) {
366
412
  }
367
413
  // Annotate the CommonJS export names for ESM import in node:
368
414
  0 && (module.exports = {
415
+ collapseSentenceRangeGaps,
369
416
  expandEmptySentenceRanges,
370
417
  findEndTimestamp,
371
418
  getChapterDuration,
372
419
  getSentenceRanges,
373
- interpolateSentenceRanges
420
+ interpolateSentenceRanges,
421
+ mapTranscriptionTimeline
374
422
  });
@@ -1,4 +1,7 @@
1
+ import * as _storyteller_platform_ghost_story from '@storyteller-platform/ghost-story';
1
2
  import { TimelineEntry } from '@storyteller-platform/ghost-story';
3
+ import { SegmentationResult } from '@echogarden/text-segmentation';
4
+ import { Mapping } from '@storyteller-platform/transliteration';
2
5
 
3
6
  type StorytellerTimelineEntry = TimelineEntry & {
4
7
  audiofile: string;
@@ -9,29 +12,56 @@ type StorytellerTranscription = {
9
12
  };
10
13
  type SentenceRange = {
11
14
  id: number;
15
+ chapterId: string;
12
16
  start: number;
13
17
  end: number;
14
18
  audiofile: string;
15
19
  };
16
- declare function findEndTimestamp(matchEndIndex: number, transcription: StorytellerTranscription): {
20
+ type WordRange = {
21
+ id: number;
22
+ sentenceId: number;
23
+ start: number;
24
+ end: number;
25
+ audiofile: string;
26
+ };
27
+ declare function findEndTimestamp(matchEndIndex: number, timeline: MappedTimeline): {
17
28
  start: number;
18
29
  end: number;
19
30
  audiofile: string;
20
31
  } | null;
21
- declare function getSentenceRanges(transcription: StorytellerTranscription, sentences: string[], chapterOffset: number, chapterEndOffset: number, locale: Intl.Locale): Promise<{
32
+ declare function mapTranscriptionTimeline(transcription: StorytellerTranscription, mapping: Mapping): {
33
+ mappedStartOffsetUtf16: number;
34
+ mappedEndOffsetUtf16: number;
35
+ type: _storyteller_platform_ghost_story.TimelineEntryType;
36
+ text: string;
37
+ startTime: number;
38
+ endTime: number;
39
+ startOffsetUtf16?: number;
40
+ endOffsetUtf16?: number;
41
+ startOffsetUtf32?: number;
42
+ endOffsetUtf32?: number;
43
+ confidence?: number;
44
+ id?: number;
45
+ timeline?: _storyteller_platform_ghost_story.Timeline;
46
+ audiofile: string;
47
+ }[];
48
+ type MappedTimeline = ReturnType<typeof mapTranscriptionTimeline>;
49
+ declare function getSentenceRanges(transcriptionText: string, mappedTimeline: MappedTimeline, sentences: SegmentationResult["sentences"], chapterId: string, chapterOffset: number, chapterEndOffset: number, granularity: "sentence" | "word", locale: Intl.Locale): Promise<{
22
50
  sentenceRanges: SentenceRange[];
51
+ wordRanges: WordRange[][];
23
52
  transcriptionOffset: number;
24
53
  firstFoundSentence: number;
25
54
  lastFoundSentence: number;
26
55
  }>;
27
- declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], lastSentenceRange: SentenceRange | null): Promise<SentenceRange[]>;
56
+ declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>): Promise<SentenceRange[]>;
28
57
  /**
29
58
  * Whisper sometimes provides words with no time information,
30
59
  * or start and end timestamps that are equal. EpubCheck complains
31
60
  * about these, so we nudge them out a bit to make sure that they're
32
61
  * not truly equal.
33
62
  */
34
- declare function expandEmptySentenceRanges(sentenceRanges: SentenceRange[]): SentenceRange[];
63
+ declare function expandEmptySentenceRanges<Range extends SentenceRange | WordRange>(sentenceRanges: Range[]): Range[];
64
+ declare function collapseSentenceRangeGaps(sentenceRanges: SentenceRange[]): Promise<SentenceRange[]>;
35
65
  declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
36
66
 
37
- export { type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges };
67
+ export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges, mapTranscriptionTimeline };
@@ -1,4 +1,7 @@
1
+ import * as _storyteller_platform_ghost_story from '@storyteller-platform/ghost-story';
1
2
  import { TimelineEntry } from '@storyteller-platform/ghost-story';
3
+ import { SegmentationResult } from '@echogarden/text-segmentation';
4
+ import { Mapping } from '@storyteller-platform/transliteration';
2
5
 
3
6
  type StorytellerTimelineEntry = TimelineEntry & {
4
7
  audiofile: string;
@@ -9,29 +12,56 @@ type StorytellerTranscription = {
9
12
  };
10
13
  type SentenceRange = {
11
14
  id: number;
15
+ chapterId: string;
12
16
  start: number;
13
17
  end: number;
14
18
  audiofile: string;
15
19
  };
16
- declare function findEndTimestamp(matchEndIndex: number, transcription: StorytellerTranscription): {
20
+ type WordRange = {
21
+ id: number;
22
+ sentenceId: number;
23
+ start: number;
24
+ end: number;
25
+ audiofile: string;
26
+ };
27
+ declare function findEndTimestamp(matchEndIndex: number, timeline: MappedTimeline): {
17
28
  start: number;
18
29
  end: number;
19
30
  audiofile: string;
20
31
  } | null;
21
- declare function getSentenceRanges(transcription: StorytellerTranscription, sentences: string[], chapterOffset: number, chapterEndOffset: number, locale: Intl.Locale): Promise<{
32
+ declare function mapTranscriptionTimeline(transcription: StorytellerTranscription, mapping: Mapping): {
33
+ mappedStartOffsetUtf16: number;
34
+ mappedEndOffsetUtf16: number;
35
+ type: _storyteller_platform_ghost_story.TimelineEntryType;
36
+ text: string;
37
+ startTime: number;
38
+ endTime: number;
39
+ startOffsetUtf16?: number;
40
+ endOffsetUtf16?: number;
41
+ startOffsetUtf32?: number;
42
+ endOffsetUtf32?: number;
43
+ confidence?: number;
44
+ id?: number;
45
+ timeline?: _storyteller_platform_ghost_story.Timeline;
46
+ audiofile: string;
47
+ }[];
48
+ type MappedTimeline = ReturnType<typeof mapTranscriptionTimeline>;
49
+ declare function getSentenceRanges(transcriptionText: string, mappedTimeline: MappedTimeline, sentences: SegmentationResult["sentences"], chapterId: string, chapterOffset: number, chapterEndOffset: number, granularity: "sentence" | "word", locale: Intl.Locale): Promise<{
22
50
  sentenceRanges: SentenceRange[];
51
+ wordRanges: WordRange[][];
23
52
  transcriptionOffset: number;
24
53
  firstFoundSentence: number;
25
54
  lastFoundSentence: number;
26
55
  }>;
27
- declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], lastSentenceRange: SentenceRange | null): Promise<SentenceRange[]>;
56
+ declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], chapterSentenceCounts: Record<string, number>): Promise<SentenceRange[]>;
28
57
  /**
29
58
  * Whisper sometimes provides words with no time information,
30
59
  * or start and end timestamps that are equal. EpubCheck complains
31
60
  * about these, so we nudge them out a bit to make sure that they're
32
61
  * not truly equal.
33
62
  */
34
- declare function expandEmptySentenceRanges(sentenceRanges: SentenceRange[]): SentenceRange[];
63
+ declare function expandEmptySentenceRanges<Range extends SentenceRange | WordRange>(sentenceRanges: Range[]): Range[];
64
+ declare function collapseSentenceRangeGaps(sentenceRanges: SentenceRange[]): Promise<SentenceRange[]>;
35
65
  declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
36
66
 
37
- export { type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges };
67
+ export { type MappedTimeline, type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, type WordRange, collapseSentenceRangeGaps, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges, mapTranscriptionTimeline };