@storyteller-platform/align 0.1.26 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -78,11 +78,16 @@ __export(align_exports, {
78
78
  concatTranscriptions: () => concatTranscriptions
79
79
  });
80
80
  module.exports = __toCommonJS(align_exports);
81
+ var import_node_crypto = require("node:crypto");
82
+ var import_node_fs = require("node:fs");
81
83
  var import_promises = require("node:fs/promises");
84
+ var import_node_os = require("node:os");
82
85
  var import_node_path = require("node:path");
83
86
  var import_posix = require("node:path/posix");
87
+ var import_shared = require("@readium/shared");
84
88
  var import_itertools = require("itertools");
85
89
  var import_memoize = __toESM(require("memoize"), 1);
90
+ var import_yazl = require("yazl");
86
91
  var import_audiobook = require("@storyteller-platform/audiobook");
87
92
  var import_epub = require("@storyteller-platform/epub");
88
93
  var import_ghost_story = require("@storyteller-platform/ghost-story");
@@ -90,20 +95,24 @@ var import_ffmpeg = require("../common/ffmpeg.cjs");
90
95
  var import_parseDom = require("../markup/parseDom.cjs");
91
96
  var import_segmentation = require("../markup/segmentation.cjs");
92
97
  var import_transform = require("../markup/transform.cjs");
98
+ var import_guidedNavigation = require("../readium/guidedNavigation.cjs");
93
99
  var import_getSentenceRanges = require("./getSentenceRanges.cjs");
94
100
  var import_interpolateSentenceRanges = require("./interpolateSentenceRanges.cjs");
95
101
  var import_search = require("./search.cjs");
96
102
  var import_slugify = require("./slugify.cjs");
97
103
  var import_textFragments = require("./textFragments.cjs");
98
104
  async function align(input, output, transcriptionsDir, audiobookDir, options) {
99
- var _stack = [];
105
+ var _stack2 = [];
100
106
  try {
101
- await (0, import_promises.mkdir)((0, import_posix.dirname)(output), { recursive: true });
102
- await (0, import_promises.copyFile)(input, output);
107
+ const outFormat = options.outFormat ?? "epub";
108
+ if (outFormat === "epub") {
109
+ await (0, import_promises.mkdir)((0, import_posix.dirname)(output), { recursive: true });
110
+ await (0, import_promises.copyFile)(input, output);
111
+ }
103
112
  const audiobookFiles = await (0, import_promises.readdir)(audiobookDir).then(
104
113
  (filenames) => filenames.filter((f) => (0, import_audiobook.isAudioFile)(f)).map((f) => (0, import_node_path.join)(audiobookDir, f))
105
114
  );
106
- const epub = __using(_stack, await import_epub.Epub.from(output));
115
+ const epub = __using(_stack2, await import_epub.Epub.from(outFormat === "epub" ? output : input));
107
116
  const transcriptions = await (0, import_promises.readdir)(transcriptionsDir).then(
108
117
  (filenames) => filenames.filter((f) => f.endsWith(".json")).map((f) => (0, import_node_path.join)(transcriptionsDir, f))
109
118
  ).then(
@@ -135,7 +144,57 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
135
144
  options.logger
136
145
  );
137
146
  const timing = await aligner.alignBook(options.onProgress);
138
- await epub.saveAndClose();
147
+ if (outFormat === "epub") {
148
+ await epub.saveAndClose();
149
+ } else {
150
+ var _stack = [];
151
+ try {
152
+ const guidedNavigationDocuments = await (0, import_guidedNavigation.generateGuidedNavigationDocuments)(epub);
153
+ const manifest = (0, import_guidedNavigation.generateGuidedNavigationManifest)(
154
+ new import_shared.LocalizedString(
155
+ await epub.getTitle() ?? (0, import_posix.basename)(input, (0, import_node_path.extname)(input))
156
+ ),
157
+ guidedNavigationDocuments
158
+ );
159
+ const tmpArchivePath = (0, import_node_path.join)(
160
+ (0, import_node_os.tmpdir)(),
161
+ `storyteller-platform-epub-${(0, import_node_crypto.randomUUID)()}`
162
+ );
163
+ const { promise, resolve } = Promise.withResolvers();
164
+ const zipfile = new import_yazl.ZipFile();
165
+ const writeStream = (0, import_node_fs.createWriteStream)(tmpArchivePath);
166
+ writeStream.on("close", () => {
167
+ resolve();
168
+ });
169
+ const stack = __using(_stack, new AsyncDisposableStack(), true);
170
+ stack.defer(async () => {
171
+ writeStream.close();
172
+ await (0, import_promises.rm)(tmpArchivePath, { force: true });
173
+ });
174
+ zipfile.outputStream.pipe(writeStream);
175
+ zipfile.addBuffer(
176
+ Buffer.from(JSON.stringify(manifest.serialize())),
177
+ "manifest.json"
178
+ );
179
+ for (const doc of guidedNavigationDocuments) {
180
+ const selfLink = doc.links?.findWithRel("self");
181
+ if (!selfLink) continue;
182
+ zipfile.addBuffer(
183
+ Buffer.from(JSON.stringify(doc.serialize())),
184
+ selfLink.href
185
+ );
186
+ }
187
+ zipfile.end();
188
+ await promise;
189
+ await (0, import_promises.cp)(tmpArchivePath, output);
190
+ epub.discardAndClose();
191
+ } catch (_) {
192
+ var _error = _, _hasError = true;
193
+ } finally {
194
+ var _promise = __callDispose(_stack, _error, _hasError);
195
+ _promise && await _promise;
196
+ }
197
+ }
139
198
  if (options.reportsPath) {
140
199
  await (0, import_promises.mkdir)((0, import_node_path.dirname)(options.reportsPath), { recursive: true });
141
200
  await (0, import_promises.writeFile)(
@@ -147,10 +206,10 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
147
206
  );
148
207
  }
149
208
  return timing;
150
- } catch (_) {
151
- var _error = _, _hasError = true;
209
+ } catch (_2) {
210
+ var _error2 = _2, _hasError2 = true;
152
211
  } finally {
153
- __callDispose(_stack, _error, _hasError);
212
+ __callDispose(_stack2, _error2, _hasError2);
154
213
  }
155
214
  }
156
215
  class Aligner {
@@ -187,6 +246,7 @@ class Aligner {
187
246
  const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
188
247
  const { chapter, sentenceRanges, wordRanges, xml } = alignedChapter;
189
248
  const sentences = await this.getChapterSentences(chapter.id);
249
+ let sentenceIdToBlockFragment = null;
190
250
  const sentenceIdToFragment = new Map(
191
251
  sentenceRanges.map((range) => [
192
252
  range.id,
@@ -207,34 +267,67 @@ class Aligner {
207
267
  );
208
268
  const wordRangeMap = new Map(wordRanges.map((w) => [w[0].sentenceId, w]));
209
269
  if (this.textRef === "text-fragment") {
210
- const trie = new import_textFragments.TextFragmentTrie(
211
- sentences.map((s) => s.text.replace("\n", " ")),
270
+ sentenceIdToBlockFragment = /* @__PURE__ */ new Map();
271
+ const blocks = [[]];
272
+ for (const [i, sentence] of (0, import_itertools.enumerate)(sentences)) {
273
+ const text = sentence.text;
274
+ blocks.at(-1)?.push(text);
275
+ if (text.includes("\n") && i < sentences.length - 1) {
276
+ blocks.push([]);
277
+ }
278
+ }
279
+ const blockFactory = new import_textFragments.TextFragmentFactory(
280
+ blocks.map((block) => block.join("")),
212
281
  locale
213
282
  );
214
- for (const range of sentenceRanges) {
215
- const sentence = sentences[range.id];
216
- sentenceIdToFragment.set(
217
- range.id,
218
- trie.findMinimalFragment(
219
- // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
220
- range.id
221
- )
283
+ let sentenceRangeIndex = 0;
284
+ for (const [i, block] of (0, import_itertools.enumerate)(blocks)) {
285
+ sentenceIdToBlockFragment.set(
286
+ sentenceRangeIndex,
287
+ blockFactory.findMinimalFragment(i)
288
+ );
289
+ const sentenceFactory = new import_textFragments.TextFragmentFactory(
290
+ block.map((s) => s.replace("\n", "")),
291
+ locale
222
292
  );
293
+ const blockRanges = sentenceRanges.slice(
294
+ sentenceRangeIndex,
295
+ sentenceRangeIndex + block.length
296
+ );
297
+ for (const [j, range] of (0, import_itertools.enumerate)(blockRanges)) {
298
+ sentenceIdToFragment.set(
299
+ range.id,
300
+ sentenceFactory.findMinimalFragment(j)
301
+ );
302
+ }
223
303
  if (this.granularity === "word") {
224
- const wordRanges2 = wordRangeMap.get(range.id);
225
- const toFragment = wordIdToFragment.get(range.id);
226
- const words = sentence.words.entries.filter((w) => w.text.match(/\S/));
227
- const wordTrie = new import_textFragments.TextFragmentTrie(
228
- words.map((w) => w.text.replace("\n", " ")),
229
- locale
304
+ const wordFactory = new import_textFragments.TextFragmentFactory(
305
+ blockRanges.flatMap((range) => {
306
+ const sentence = sentences[range.id];
307
+ const wordRanges2 = wordRangeMap.get(range.id);
308
+ const toFragment = wordIdToFragment.get(range.id);
309
+ if (!wordRanges2 || !toFragment) return [];
310
+ const words = sentence.words.entries.filter(
311
+ (w) => w.text.match(/\S/)
312
+ );
313
+ return words.map((w) => w.text.replace("\n", ""));
314
+ })
230
315
  );
231
- for (const wordRange of wordRanges2) {
232
- toFragment.set(
233
- wordRange.id,
234
- wordTrie.findMinimalFragment(wordRange.id)
235
- );
316
+ let wordRangeIndex = 0;
317
+ for (const range of blockRanges) {
318
+ const wordRanges2 = wordRangeMap.get(range.id);
319
+ const toFragment = wordIdToFragment.get(range.id);
320
+ if (!wordRanges2 || !toFragment) continue;
321
+ for (const [k, wordRange] of (0, import_itertools.enumerate)(wordRanges2)) {
322
+ toFragment.set(
323
+ wordRange.id,
324
+ wordFactory.findMinimalFragment(k + wordRangeIndex)
325
+ );
326
+ }
327
+ wordRangeIndex += wordRanges2.length;
236
328
  }
237
329
  }
330
+ sentenceRangeIndex += block.length;
238
331
  }
239
332
  }
240
333
  const audiofiles = Array.from(
@@ -274,6 +367,7 @@ class Aligner {
274
367
  this.granularity,
275
368
  sentenceRanges,
276
369
  wordRangeMap,
370
+ sentenceIdToBlockFragment,
277
371
  sentenceIdToFragment,
278
372
  wordIdToFragment
279
373
  ),
@@ -572,7 +666,23 @@ class Aligner {
572
666
  return this.timing;
573
667
  }
574
668
  }
575
- function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
669
+ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToBlockFragment, sentenceIdToFragment, wordIdToFragment) {
670
+ const subSequences = sentenceIdToBlockFragment ? createTextRangeLargeSequences(
671
+ chapter,
672
+ granularity,
673
+ sentenceRanges,
674
+ wordRanges,
675
+ sentenceIdToBlockFragment,
676
+ sentenceIdToFragment,
677
+ wordIdToFragment
678
+ ) : createTextRangeSmallSequences(
679
+ chapter,
680
+ granularity,
681
+ sentenceRanges,
682
+ wordRanges,
683
+ sentenceIdToFragment,
684
+ wordIdToFragment
685
+ );
576
686
  return [
577
687
  import_epub.Epub.createXmlElement(
578
688
  "smil",
@@ -590,60 +700,89 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, se
590
700
  "epub:textref": `../${chapter.href}`,
591
701
  "epub:type": "chapter"
592
702
  },
593
- sentenceRanges.map((sentenceRange) => {
594
- if (granularity === "sentence" || !wordRanges.has(sentenceRange.id)) {
595
- return import_epub.Epub.createXmlElement(
596
- "par",
597
- {
598
- id: `${chapter.id}-s${sentenceRange.id}`
599
- },
600
- [
601
- import_epub.Epub.createXmlElement("text", {
602
- src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
603
- }),
604
- import_epub.Epub.createXmlElement("audio", {
605
- src: `../Audio/${(0, import_posix.basename)(sentenceRange.audiofile)}`,
606
- clipBegin: `${sentenceRange.start.toFixed(3)}s`,
607
- clipEnd: `${sentenceRange.end.toFixed(3)}s`
608
- })
609
- ]
610
- );
611
- }
612
- const words = wordRanges.get(sentenceRange.id);
613
- const wordToFragment = wordIdToFragment.get(sentenceRange.id);
614
- return import_epub.Epub.createXmlElement(
615
- "seq",
616
- {
617
- id: `${chapter.id}-s${sentenceRange.id}`,
618
- "epub:type": "text-range-small",
619
- "epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
620
- },
621
- words.map(
622
- (word) => import_epub.Epub.createXmlElement(
623
- "par",
624
- {
625
- id: `${chapter.id}-s${sentenceRange.id}-w${word.id}`
626
- },
627
- [
628
- import_epub.Epub.createXmlElement("text", {
629
- src: `../${chapter.href}#${wordToFragment.get(word.id)}`
630
- }),
631
- import_epub.Epub.createXmlElement("audio", {
632
- src: `../Audio/${(0, import_posix.basename)(word.audiofile)}`,
633
- clipBegin: `${word.start.toFixed(3)}s`,
634
- clipEnd: `${word.end.toFixed(3)}s`
635
- })
636
- ]
637
- )
638
- )
639
- );
640
- })
703
+ subSequences
641
704
  )
642
705
  ])
643
706
  ]
644
707
  )
645
708
  ];
646
709
  }
710
+ function createTextRangeLargeSequences(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToBlockFragment, sentenceIdToFragment, wordIdToFragment) {
711
+ const blockStarts = sentenceIdToBlockFragment.entries().toArray().toSorted(([a], [b]) => a - b);
712
+ return blockStarts.map(([sentenceId, fragment], index) => {
713
+ const blockEnd = index === blockStarts.length - 1 ? sentenceRanges.length - 1 : (
714
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
715
+ blockStarts[index + 1][0]
716
+ );
717
+ const sentences = sentenceRanges.slice(sentenceId, blockEnd);
718
+ return import_epub.Epub.createXmlElement(
719
+ "seq",
720
+ {
721
+ id: `${chapter.id}-b${index}`,
722
+ "epub:type": "text-range-large",
723
+ "epub:textref": `../${chapter.href}#${fragment}`
724
+ },
725
+ createTextRangeSmallSequences(
726
+ chapter,
727
+ granularity,
728
+ sentences,
729
+ wordRanges,
730
+ sentenceIdToFragment,
731
+ wordIdToFragment
732
+ )
733
+ );
734
+ });
735
+ }
736
+ function createTextRangeSmallSequences(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
737
+ return sentenceRanges.map((sentenceRange) => {
738
+ if (granularity === "sentence" || !wordRanges.has(sentenceRange.id)) {
739
+ return import_epub.Epub.createXmlElement(
740
+ "par",
741
+ {
742
+ id: `${chapter.id}-s${sentenceRange.id}`
743
+ },
744
+ [
745
+ import_epub.Epub.createXmlElement("text", {
746
+ src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
747
+ }),
748
+ import_epub.Epub.createXmlElement("audio", {
749
+ src: `../Audio/${(0, import_posix.basename)(sentenceRange.audiofile)}`,
750
+ clipBegin: `${sentenceRange.start.toFixed(3)}s`,
751
+ clipEnd: `${sentenceRange.end.toFixed(3)}s`
752
+ })
753
+ ]
754
+ );
755
+ }
756
+ const words = wordRanges.get(sentenceRange.id);
757
+ const wordToFragment = wordIdToFragment.get(sentenceRange.id);
758
+ return import_epub.Epub.createXmlElement(
759
+ "seq",
760
+ {
761
+ id: `${chapter.id}-s${sentenceRange.id}`,
762
+ "epub:type": "text-range-small",
763
+ "epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
764
+ },
765
+ words.map(
766
+ (word) => import_epub.Epub.createXmlElement(
767
+ "par",
768
+ {
769
+ id: `${chapter.id}-s${sentenceRange.id}-w${word.id}`
770
+ },
771
+ [
772
+ import_epub.Epub.createXmlElement("text", {
773
+ src: `../${chapter.href}#${wordToFragment.get(word.id)}`
774
+ }),
775
+ import_epub.Epub.createXmlElement("audio", {
776
+ src: `../Audio/${(0, import_posix.basename)(word.audiofile)}`,
777
+ clipBegin: `${word.start.toFixed(3)}s`,
778
+ clipEnd: `${word.end.toFixed(3)}s`
779
+ })
780
+ ]
781
+ )
782
+ )
783
+ );
784
+ });
785
+ }
647
786
  function concatTranscriptions(transcriptions, audiofiles) {
648
787
  return transcriptions.reduce(
649
788
  (acc, transcription, index) => ({
@@ -41,6 +41,7 @@ interface AlignOptions {
41
41
  reportsPath?: string | null | undefined;
42
42
  granularity?: "sentence" | "word" | null | undefined;
43
43
  textRef?: "id-fragment" | "text-fragment" | null | undefined;
44
+ outFormat?: "epub" | "gnp" | null | undefined;
44
45
  primaryLocale?: Intl.Locale | null | undefined;
45
46
  logger?: Logger | null | undefined;
46
47
  onProgress?: ((progress: number) => void) | null | undefined;
@@ -41,6 +41,7 @@ interface AlignOptions {
41
41
  reportsPath?: string | null | undefined;
42
42
  granularity?: "sentence" | "word" | null | undefined;
43
43
  textRef?: "id-fragment" | "text-fragment" | null | undefined;
44
+ outFormat?: "epub" | "gnp" | null | undefined;
44
45
  primaryLocale?: Intl.Locale | null | undefined;
45
46
  logger?: Logger | null | undefined;
46
47
  onProgress?: ((progress: number) => void) | null | undefined;