@storyteller-platform/align 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -91,6 +91,7 @@ var import_segmentation = require("../markup/segmentation.cjs");
91
91
  var import_getSentenceRanges = require("./getSentenceRanges.cjs");
92
92
  var import_search = require("./search.cjs");
93
93
  var import_slugify = require("./slugify.cjs");
94
+ var import_textFragments = require("./textFragments.cjs");
94
95
  async function align(input, output, transcriptionsDir, audiobookDir, options) {
95
96
  var _stack = [];
96
97
  try {
@@ -126,6 +127,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
126
127
  audiobookFiles,
127
128
  transcriptions,
128
129
  options.granularity,
130
+ options.textRef,
129
131
  options.primaryLocale,
130
132
  options.logger
131
133
  );
@@ -149,7 +151,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
149
151
  }
150
152
  }
151
153
  class Aligner {
152
- constructor(epub, audiofiles, transcriptions, granularity, languageOverride, logger) {
154
+ constructor(epub, audiofiles, transcriptions, granularity, textRef, languageOverride, logger) {
153
155
  this.epub = epub;
154
156
  this.audiofiles = audiofiles;
155
157
  this.languageOverride = languageOverride;
@@ -157,12 +159,14 @@ class Aligner {
157
159
  this.transcription = concatTranscriptions(transcriptions, audiofiles);
158
160
  this.getChapterSentences = (0, import_memoize.default)(this.getChapterSentences.bind(this));
159
161
  this.granularity = granularity ?? "sentence";
162
+ this.textRef = textRef ?? "id-fragment";
160
163
  }
161
164
  transcription;
162
165
  totalDuration = 0;
163
166
  alignedChapters = [];
164
167
  timing = (0, import_ghost_story.createAggregator)();
165
168
  granularity;
169
+ textRef;
166
170
  report = {
167
171
  chapters: []
168
172
  };
@@ -177,8 +181,59 @@ class Aligner {
177
181
  return segmentation.filter((s) => s.text.match(/\S/));
178
182
  }
179
183
  async writeAlignedChapter(alignedChapter) {
184
+ const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
180
185
  const { chapter, sentenceRanges, wordRanges, xml } = alignedChapter;
186
+ const sentences = await this.getChapterSentences(chapter.id);
187
+ const sentenceIdToFragment = new Map(
188
+ sentenceRanges.map((range) => [
189
+ range.id,
190
+ `${range.chapterId}-s${range.id}`
191
+ ])
192
+ );
193
+ const wordIdToFragment = new Map(
194
+ wordRanges.map((ranges) => [
195
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
196
+ ranges[0].sentenceId,
197
+ new Map(
198
+ ranges.map((range) => [
199
+ range.id,
200
+ `${range.chapterId}-s${range.sentenceId}-w${range.id}`
201
+ ])
202
+ )
203
+ ])
204
+ );
181
205
  const wordRangeMap = new Map(wordRanges.map((w) => [w[0].sentenceId, w]));
206
+ if (this.textRef === "text-fragment") {
207
+ const trie = new import_textFragments.TextFragmentTrie(
208
+ sentences.map((s) => s.text.replace("\n", " ")),
209
+ locale
210
+ );
211
+ for (const range of sentenceRanges) {
212
+ const sentence = sentences[range.id];
213
+ sentenceIdToFragment.set(
214
+ range.id,
215
+ trie.findMinimalFragment(
216
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
217
+ range.id
218
+ )
219
+ );
220
+ if (this.granularity === "word") {
221
+ const wordRanges2 = wordRangeMap.get(range.id);
222
+ const toFragment = wordIdToFragment.get(range.id);
223
+ const words = sentence.words.entries.filter((w) => w.text.match(/\S/));
224
+ const wordTrie = new import_textFragments.TextFragmentTrie(
225
+ words.map((w) => w.text.replace("\n", " ")),
226
+ locale
227
+ );
228
+ for (const wordRange of wordRanges2) {
229
+ toFragment.set(
230
+ wordRange.id,
231
+ wordTrie.findMinimalFragment(wordRange.id)
232
+ );
233
+ }
234
+ }
235
+ }
236
+ }
182
237
  const audiofiles = Array.from(
183
238
  new Set(sentenceRanges.map(({ audiofile }) => audiofile))
184
239
  );
@@ -215,7 +270,9 @@ class Aligner {
215
270
  chapter,
216
271
  this.granularity,
217
272
  sentenceRanges,
218
- wordRangeMap
273
+ wordRangeMap,
274
+ sentenceIdToFragment,
275
+ wordIdToFragment
219
276
  ),
220
277
  "xml"
221
278
  );
@@ -475,7 +532,7 @@ class Aligner {
475
532
  alignedChapter.wordRanges[i] = (0, import_getSentenceRanges.expandEmptySentenceRanges)(wordRanges);
476
533
  }
477
534
  await this.writeAlignedChapter(alignedChapter);
478
- collapsedStart += sentences.length - 1;
535
+ collapsedStart += sentences.length;
479
536
  }
480
537
  await this.epub.addMetadata({
481
538
  type: "meta",
@@ -503,7 +560,7 @@ class Aligner {
503
560
  return this.timing;
504
561
  }
505
562
  }
506
- function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
563
+ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
507
564
  return [
508
565
  import_epub.Epub.createXmlElement(
509
566
  "smil",
@@ -530,7 +587,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
530
587
  },
531
588
  [
532
589
  import_epub.Epub.createXmlElement("text", {
533
- src: `../${chapter.href}#${chapter.id}-s${sentenceRange.id}`
590
+ src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
534
591
  }),
535
592
  import_epub.Epub.createXmlElement("audio", {
536
593
  src: `../Audio/${(0, import_posix.basename)(sentenceRange.audiofile)}`,
@@ -541,12 +598,13 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
541
598
  );
542
599
  }
543
600
  const words = wordRanges.get(sentenceRange.id);
601
+ const wordToFragment = wordIdToFragment.get(sentenceRange.id);
544
602
  return import_epub.Epub.createXmlElement(
545
603
  "seq",
546
604
  {
547
605
  id: `${chapter.id}-s${sentenceRange.id}`,
548
606
  "epub:type": "text-range-small",
549
- "epub:textref": `../${chapter.href}#${chapter.id}-s${sentenceRange.id}`
607
+ "epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
550
608
  },
551
609
  words.map(
552
610
  (word) => import_epub.Epub.createXmlElement(
@@ -556,7 +614,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
556
614
  },
557
615
  [
558
616
  import_epub.Epub.createXmlElement("text", {
559
- src: `../${chapter.href}#${chapter.id}-s${sentenceRange.id}-w${word.id}`
617
+ src: `../${chapter.href}#${wordToFragment.get(word.id)}`
560
618
  }),
561
619
  import_epub.Epub.createXmlElement("audio", {
562
620
  src: `../Audio/${(0, import_posix.basename)(word.audiofile)}`,
@@ -39,7 +39,8 @@ interface Report {
39
39
  }
40
40
  interface AlignOptions {
41
41
  reportsPath?: string | null | undefined;
42
- granularity: "sentence" | "word" | null | undefined;
42
+ granularity?: "sentence" | "word" | null | undefined;
43
+ textRef?: "id-fragment" | "text-fragment" | null | undefined;
43
44
  primaryLocale?: Intl.Locale | null | undefined;
44
45
  logger?: Logger | null | undefined;
45
46
  onProgress?: ((progress: number) => void) | null | undefined;
@@ -55,8 +56,9 @@ declare class Aligner {
55
56
  private alignedChapters;
56
57
  private timing;
57
58
  private granularity;
59
+ private textRef;
58
60
  report: Report;
59
- constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
61
+ constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, textRef: "id-fragment" | "text-fragment" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
60
62
  private getChapterSentences;
61
63
  private writeAlignedChapter;
62
64
  private addChapterReport;
@@ -39,7 +39,8 @@ interface Report {
39
39
  }
40
40
  interface AlignOptions {
41
41
  reportsPath?: string | null | undefined;
42
- granularity: "sentence" | "word" | null | undefined;
42
+ granularity?: "sentence" | "word" | null | undefined;
43
+ textRef?: "id-fragment" | "text-fragment" | null | undefined;
43
44
  primaryLocale?: Intl.Locale | null | undefined;
44
45
  logger?: Logger | null | undefined;
45
46
  onProgress?: ((progress: number) => void) | null | undefined;
@@ -55,8 +56,9 @@ declare class Aligner {
55
56
  private alignedChapters;
56
57
  private timing;
57
58
  private granularity;
59
+ private textRef;
58
60
  report: Report;
59
- constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
61
+ constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, textRef: "id-fragment" | "text-fragment" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
60
62
  private getChapterSentences;
61
63
  private writeAlignedChapter;
62
64
  private addChapterReport;
@@ -27,6 +27,7 @@ import {
27
27
  } from "./getSentenceRanges.js";
28
28
  import { findBoundaries } from "./search.js";
29
29
  import { slugify } from "./slugify.js";
30
+ import { TextFragmentTrie } from "./textFragments.js";
30
31
  async function align(input, output, transcriptionsDir, audiobookDir, options) {
31
32
  var _stack = [];
32
33
  try {
@@ -62,6 +63,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
62
63
  audiobookFiles,
63
64
  transcriptions,
64
65
  options.granularity,
66
+ options.textRef,
65
67
  options.primaryLocale,
66
68
  options.logger
67
69
  );
@@ -85,7 +87,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
85
87
  }
86
88
  }
87
89
  class Aligner {
88
- constructor(epub, audiofiles, transcriptions, granularity, languageOverride, logger) {
90
+ constructor(epub, audiofiles, transcriptions, granularity, textRef, languageOverride, logger) {
89
91
  this.epub = epub;
90
92
  this.audiofiles = audiofiles;
91
93
  this.languageOverride = languageOverride;
@@ -93,12 +95,14 @@ class Aligner {
93
95
  this.transcription = concatTranscriptions(transcriptions, audiofiles);
94
96
  this.getChapterSentences = memoize(this.getChapterSentences.bind(this));
95
97
  this.granularity = granularity ?? "sentence";
98
+ this.textRef = textRef ?? "id-fragment";
96
99
  }
97
100
  transcription;
98
101
  totalDuration = 0;
99
102
  alignedChapters = [];
100
103
  timing = createAggregator();
101
104
  granularity;
105
+ textRef;
102
106
  report = {
103
107
  chapters: []
104
108
  };
@@ -113,8 +117,59 @@ class Aligner {
113
117
  return segmentation.filter((s) => s.text.match(/\S/));
114
118
  }
115
119
  async writeAlignedChapter(alignedChapter) {
120
+ const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
116
121
  const { chapter, sentenceRanges, wordRanges, xml } = alignedChapter;
122
+ const sentences = await this.getChapterSentences(chapter.id);
123
+ const sentenceIdToFragment = new Map(
124
+ sentenceRanges.map((range) => [
125
+ range.id,
126
+ `${range.chapterId}-s${range.id}`
127
+ ])
128
+ );
129
+ const wordIdToFragment = new Map(
130
+ wordRanges.map((ranges) => [
131
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
132
+ ranges[0].sentenceId,
133
+ new Map(
134
+ ranges.map((range) => [
135
+ range.id,
136
+ `${range.chapterId}-s${range.sentenceId}-w${range.id}`
137
+ ])
138
+ )
139
+ ])
140
+ );
117
141
  const wordRangeMap = new Map(wordRanges.map((w) => [w[0].sentenceId, w]));
142
+ if (this.textRef === "text-fragment") {
143
+ const trie = new TextFragmentTrie(
144
+ sentences.map((s) => s.text.replace("\n", " ")),
145
+ locale
146
+ );
147
+ for (const range of sentenceRanges) {
148
+ const sentence = sentences[range.id];
149
+ sentenceIdToFragment.set(
150
+ range.id,
151
+ trie.findMinimalFragment(
152
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
153
+ range.id
154
+ )
155
+ );
156
+ if (this.granularity === "word") {
157
+ const wordRanges2 = wordRangeMap.get(range.id);
158
+ const toFragment = wordIdToFragment.get(range.id);
159
+ const words = sentence.words.entries.filter((w) => w.text.match(/\S/));
160
+ const wordTrie = new TextFragmentTrie(
161
+ words.map((w) => w.text.replace("\n", " ")),
162
+ locale
163
+ );
164
+ for (const wordRange of wordRanges2) {
165
+ toFragment.set(
166
+ wordRange.id,
167
+ wordTrie.findMinimalFragment(wordRange.id)
168
+ );
169
+ }
170
+ }
171
+ }
172
+ }
118
173
  const audiofiles = Array.from(
119
174
  new Set(sentenceRanges.map(({ audiofile }) => audiofile))
120
175
  );
@@ -151,7 +206,9 @@ class Aligner {
151
206
  chapter,
152
207
  this.granularity,
153
208
  sentenceRanges,
154
- wordRangeMap
209
+ wordRangeMap,
210
+ sentenceIdToFragment,
211
+ wordIdToFragment
155
212
  ),
156
213
  "xml"
157
214
  );
@@ -411,7 +468,7 @@ class Aligner {
411
468
  alignedChapter.wordRanges[i] = expandEmptySentenceRanges(wordRanges);
412
469
  }
413
470
  await this.writeAlignedChapter(alignedChapter);
414
- collapsedStart += sentences.length - 1;
471
+ collapsedStart += sentences.length;
415
472
  }
416
473
  await this.epub.addMetadata({
417
474
  type: "meta",
@@ -439,7 +496,7 @@ class Aligner {
439
496
  return this.timing;
440
497
  }
441
498
  }
442
- function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
499
+ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
443
500
  return [
444
501
  Epub.createXmlElement(
445
502
  "smil",
@@ -466,7 +523,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
466
523
  },
467
524
  [
468
525
  Epub.createXmlElement("text", {
469
- src: `../${chapter.href}#${chapter.id}-s${sentenceRange.id}`
526
+ src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
470
527
  }),
471
528
  Epub.createXmlElement("audio", {
472
529
  src: `../Audio/${basename(sentenceRange.audiofile)}`,
@@ -477,12 +534,13 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
477
534
  );
478
535
  }
479
536
  const words = wordRanges.get(sentenceRange.id);
537
+ const wordToFragment = wordIdToFragment.get(sentenceRange.id);
480
538
  return Epub.createXmlElement(
481
539
  "seq",
482
540
  {
483
541
  id: `${chapter.id}-s${sentenceRange.id}`,
484
542
  "epub:type": "text-range-small",
485
- "epub:textref": `../${chapter.href}#${chapter.id}-s${sentenceRange.id}`
543
+ "epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
486
544
  },
487
545
  words.map(
488
546
  (word) => Epub.createXmlElement(
@@ -492,7 +550,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
492
550
  },
493
551
  [
494
552
  Epub.createXmlElement("text", {
495
- src: `../${chapter.href}#${chapter.id}-s${sentenceRange.id}-w${word.id}`
553
+ src: `../${chapter.href}#${wordToFragment.get(word.id)}`
496
554
  }),
497
555
  Epub.createXmlElement("audio", {
498
556
  src: `../Audio/${basename(word.audiofile)}`,
@@ -275,6 +275,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
275
275
  if (start2 && end2) {
276
276
  perSentenceWordRanges.push({
277
277
  id: k,
278
+ chapterId,
278
279
  sentenceId: j + chapterSentenceIndex + slice[0],
279
280
  start: end2.audiofile === start2.audiofile ? start2.start : 0,
280
281
  audiofile: end2.audiofile,
@@ -19,6 +19,7 @@ type SentenceRange = {
19
19
  };
20
20
  type WordRange = {
21
21
  id: number;
22
+ chapterId: string;
22
23
  sentenceId: number;
23
24
  start: number;
24
25
  end: number;
@@ -19,6 +19,7 @@ type SentenceRange = {
19
19
  };
20
20
  type WordRange = {
21
21
  id: number;
22
+ chapterId: string;
22
23
  sentenceId: number;
23
24
  start: number;
24
25
  end: number;
@@ -247,6 +247,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
247
247
  if (start2 && end2) {
248
248
  perSentenceWordRanges.push({
249
249
  id: k,
250
+ chapterId,
250
251
  sentenceId: j + chapterSentenceIndex + slice[0],
251
252
  start: end2.audiofile === start2.audiofile ? start2.start : 0,
252
253
  audiofile: end2.audiofile,
@@ -34,6 +34,12 @@ const alignParser = (0, import_core.object)("Alignment", {
34
34
  "--epub",
35
35
  (0, import_valueparser.path)({ mustExist: true, type: "file", extensions: [".epub"] })
36
36
  ),
37
+ textRef: (0, import_core.withDefault)(
38
+ (0, import_core.option)("--text-ref", (0, import_core.choice)(["id-fragment", "text-fragment"]), {
39
+ description: import_core.message`Whether to use text fragments rather than element id fragments to identify text ranges in generated media overlays.`
40
+ }),
41
+ "id-fragment"
42
+ ),
37
43
  reports: (0, import_core.optional)((0, import_core.option)("--reports", (0, import_valueparser.path)({ type: "directory" })))
38
44
  });
39
45
  const alignCommand = (0, import_core.command)(
@@ -3,10 +3,12 @@ import * as _optique_core from '@optique/core';
3
3
  declare const alignParser: _optique_core.Parser<"sync", {
4
4
  readonly audiobook: string;
5
5
  readonly epub: string;
6
+ readonly textRef: "id-fragment" | "text-fragment";
6
7
  readonly reports: string | undefined;
7
8
  }, {
8
9
  readonly audiobook: _optique_core.ValueParserResult<string> | undefined;
9
10
  readonly epub: _optique_core.ValueParserResult<string> | undefined;
11
+ readonly textRef: [_optique_core.ValueParserResult<"id-fragment" | "text-fragment"> | undefined] | undefined;
10
12
  readonly reports: [_optique_core.ValueParserResult<string> | undefined] | undefined;
11
13
  }>;
12
14
  declare const alignCommand: _optique_core.Parser<"sync", {
@@ -16,6 +18,7 @@ declare const alignCommand: _optique_core.Parser<"sync", {
16
18
  } & {
17
19
  readonly audiobook: string;
18
20
  readonly epub: string;
21
+ readonly textRef: "id-fragment" | "text-fragment";
19
22
  readonly reports: string | undefined;
20
23
  } & {
21
24
  readonly noProgress: boolean;
@@ -3,10 +3,12 @@ import * as _optique_core from '@optique/core';
3
3
  declare const alignParser: _optique_core.Parser<"sync", {
4
4
  readonly audiobook: string;
5
5
  readonly epub: string;
6
+ readonly textRef: "id-fragment" | "text-fragment";
6
7
  readonly reports: string | undefined;
7
8
  }, {
8
9
  readonly audiobook: _optique_core.ValueParserResult<string> | undefined;
9
10
  readonly epub: _optique_core.ValueParserResult<string> | undefined;
11
+ readonly textRef: [_optique_core.ValueParserResult<"id-fragment" | "text-fragment"> | undefined] | undefined;
10
12
  readonly reports: [_optique_core.ValueParserResult<string> | undefined] | undefined;
11
13
  }>;
12
14
  declare const alignCommand: _optique_core.Parser<"sync", {
@@ -16,6 +18,7 @@ declare const alignCommand: _optique_core.Parser<"sync", {
16
18
  } & {
17
19
  readonly audiobook: string;
18
20
  readonly epub: string;
21
+ readonly textRef: "id-fragment" | "text-fragment";
19
22
  readonly reports: string | undefined;
20
23
  } & {
21
24
  readonly noProgress: boolean;
@@ -1,12 +1,14 @@
1
1
  import "../chunk-BIEQXUOY.js";
2
2
  import {
3
+ choice,
3
4
  command,
4
5
  constant,
5
6
  merge,
6
7
  message,
7
8
  object,
8
9
  option,
9
- optional
10
+ optional,
11
+ withDefault
10
12
  } from "@optique/core";
11
13
  import { path } from "@optique/run/valueparser";
12
14
  import {
@@ -23,6 +25,12 @@ const alignParser = object("Alignment", {
23
25
  "--epub",
24
26
  path({ mustExist: true, type: "file", extensions: [".epub"] })
25
27
  ),
28
+ textRef: withDefault(
29
+ option("--text-ref", choice(["id-fragment", "text-fragment"]), {
30
+ description: message`Whether to use text fragments rather than element id fragments to identify text ranges in generated media overlays.`
31
+ }),
32
+ "id-fragment"
33
+ ),
26
34
  reports: optional(option("--reports", path({ type: "directory" })))
27
35
  });
28
36
  const alignCommand = command(
@@ -0,0 +1,147 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var textFragments_exports = {};
20
+ __export(textFragments_exports, {
21
+ TextFragmentTrie: () => TextFragmentTrie
22
+ });
23
+ module.exports = __toCommonJS(textFragments_exports);
24
+ var import_itertools = require("itertools");
25
+ var import_runes2 = require("runes2");
26
+ class TextFragmentTrie {
27
+ root = new Node(null, "");
28
+ spans;
29
+ constructor(casedSpans, locale = new Intl.Locale("en-Latn-US")) {
30
+ this.spans = casedSpans.map((span) => span.toLocaleLowerCase(locale));
31
+ for (const [i, span] of (0, import_itertools.enumerate)(this.spans)) {
32
+ const parents = [this.root];
33
+ for (const [j, char] of (0, import_itertools.enumerate)((0, import_runes2.runes)(span))) {
34
+ for (const [k, parent] of (0, import_itertools.enumerate)(parents)) {
35
+ const newNode = new Node(parent, char, { span: i, pos: j });
36
+ let node = parent.children.find((child) => child.eq(newNode));
37
+ if (!node) {
38
+ node = newNode;
39
+ parent.children.push(node);
40
+ } else {
41
+ node.indices.push({ span: i, pos: j });
42
+ }
43
+ parents[k] = node;
44
+ }
45
+ parents.push(this.root);
46
+ }
47
+ }
48
+ }
49
+ findMinimalFragment(spanIndex) {
50
+ let node = this.root;
51
+ while (node.children.length) {
52
+ const candidates = node.children.filter(
53
+ (child2) => child2.indices.some(
54
+ ({ span: childSpanIndex }) => childSpanIndex === spanIndex
55
+ )
56
+ );
57
+ const child = (0, import_itertools.min)(
58
+ candidates,
59
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
60
+ (c) => c.indices.find((i) => i.span === spanIndex).pos
61
+ );
62
+ if (!child) {
63
+ return this.nodeToFragment(node, spanIndex, true);
64
+ }
65
+ if (child.indices.length === 1) {
66
+ return this.nodeToFragment(child, spanIndex);
67
+ }
68
+ node = child;
69
+ }
70
+ return this.nodeToFragment(node, spanIndex, true);
71
+ }
72
+ nodeToFragment(node, spanIndex, findPrefix) {
73
+ const span = this.spans[spanIndex];
74
+ let fragment = ":~:text=";
75
+ let prefix = "";
76
+ if (findPrefix) {
77
+ const prev = this.spans[spanIndex - 1];
78
+ if (prev) {
79
+ const prefixes = node.indices.filter(({ span: s }) => s !== spanIndex).map(({ span: spanIndex2, pos }) => {
80
+ let startNode2 = node;
81
+ let startPos = pos;
82
+ while (startNode2.parent && startNode2.parent !== this.root) {
83
+ startPos -= startNode2.value.length;
84
+ startNode2 = startNode2.parent;
85
+ }
86
+ const prev2 = this.spans[spanIndex2 - 1];
87
+ const span2 = this.spans[spanIndex2];
88
+ return (prev2 ?? "") + span2.slice(0, startPos);
89
+ });
90
+ const reversedPrefixes = prefixes.map((p) => (0, import_runes2.runes)(p).toReversed());
91
+ for (const [i2, char] of (0, import_itertools.enumerate)((0, import_runes2.runes)(prev).toReversed())) {
92
+ prefix = char + prefix;
93
+ for (const [j, p] of (0, import_itertools.enumerate)([...reversedPrefixes.toReversed()])) {
94
+ if (p[i2] !== char) {
95
+ reversedPrefixes.splice(reversedPrefixes.length - 1 - j, 1);
96
+ }
97
+ }
98
+ if (reversedPrefixes.length === 0) {
99
+ break;
100
+ }
101
+ }
102
+ }
103
+ }
104
+ if (prefix) {
105
+ fragment += `${encodeTextFragmentPart(prefix)}-,`;
106
+ }
107
+ let startNode = node;
108
+ let start = "";
109
+ while (startNode) {
110
+ start = startNode.value + start;
111
+ startNode = startNode.parent;
112
+ }
113
+ fragment += encodeTextFragmentPart(start);
114
+ const remainingSentence = span.slice(start.length + node.value.length);
115
+ let end = "";
116
+ let i = remainingSentence.length - 1;
117
+ while (remainingSentence.indexOf(end) !== i + 1 && i >= node.value.length) {
118
+ end = remainingSentence.slice(i);
119
+ i--;
120
+ }
121
+ if (end) {
122
+ fragment += `,${encodeTextFragmentPart(end)}`;
123
+ }
124
+ return fragment;
125
+ }
126
+ }
127
+ function encodeTextFragmentPart(part) {
128
+ return encodeURIComponent(part).replaceAll(/-/g, "%2d").replaceAll(/,/g, "%2c");
129
+ }
130
+ class Node {
131
+ constructor(parent, value, firstIndex) {
132
+ this.parent = parent;
133
+ this.value = value;
134
+ if (firstIndex !== void 0) {
135
+ this.indices.push(firstIndex);
136
+ }
137
+ }
138
+ children = [];
139
+ indices = [];
140
+ eq(other) {
141
+ return this.value === other.value;
142
+ }
143
+ }
144
+ // Annotate the CommonJS export names for ESM import in node:
145
+ 0 && (module.exports = {
146
+ TextFragmentTrie
147
+ });
@@ -0,0 +1,23 @@
1
+ declare class TextFragmentTrie {
2
+ private root;
3
+ private spans;
4
+ constructor(casedSpans: string[], locale?: Intl.Locale);
5
+ findMinimalFragment(spanIndex: number): string;
6
+ nodeToFragment(node: Node, spanIndex: number, findPrefix?: boolean): string;
7
+ }
8
+ declare class Node {
9
+ parent: Node | null;
10
+ value: string;
11
+ children: Node[];
12
+ indices: {
13
+ span: number;
14
+ pos: number;
15
+ }[];
16
+ constructor(parent: Node | null, value: string, firstIndex?: {
17
+ span: number;
18
+ pos: number;
19
+ });
20
+ eq(other: Node): boolean;
21
+ }
22
+
23
+ export { TextFragmentTrie };
@@ -0,0 +1,23 @@
1
+ declare class TextFragmentTrie {
2
+ private root;
3
+ private spans;
4
+ constructor(casedSpans: string[], locale?: Intl.Locale);
5
+ findMinimalFragment(spanIndex: number): string;
6
+ nodeToFragment(node: Node, spanIndex: number, findPrefix?: boolean): string;
7
+ }
8
+ declare class Node {
9
+ parent: Node | null;
10
+ value: string;
11
+ children: Node[];
12
+ indices: {
13
+ span: number;
14
+ pos: number;
15
+ }[];
16
+ constructor(parent: Node | null, value: string, firstIndex?: {
17
+ span: number;
18
+ pos: number;
19
+ });
20
+ eq(other: Node): boolean;
21
+ }
22
+
23
+ export { TextFragmentTrie };