npm - @storyteller-platform/align - Versions diffs - 0.1.21 → 0.1.22 - Mend

@storyteller-platform/align 0.1.21 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/dist/align/align.cjs +65 -7
package/dist/align/align.d.cts +4 -2
package/dist/align/align.d.ts +4 -2
package/dist/align/align.js +65 -7
package/dist/align/getSentenceRanges.cjs +1 -0
package/dist/align/getSentenceRanges.d.cts +1 -0
package/dist/align/getSentenceRanges.d.ts +1 -0
package/dist/align/getSentenceRanges.js +1 -0
package/dist/align/parse.cjs +6 -0
package/dist/align/parse.d.cts +3 -0
package/dist/align/parse.d.ts +3 -0
package/dist/align/parse.js +9 -1
package/dist/align/textFragments.cjs +147 -0
package/dist/align/textFragments.d.cts +23 -0
package/dist/align/textFragments.d.ts +23 -0
package/dist/align/textFragments.js +124 -0
package/dist/cli/bin.cjs +38 -24
package/dist/cli/bin.js +35 -21
package/dist/index.d.cts +1 -0
package/dist/index.d.ts +1 -0
package/dist/snapshot/parse.cjs +61 -0
package/dist/snapshot/parse.d.cts +24 -0
package/dist/snapshot/parse.d.ts +24 -0
package/dist/snapshot/parse.js +45 -0
package/dist/snapshot/snapshot.cjs +224 -0
package/dist/snapshot/snapshot.d.cts +6 -0
package/dist/snapshot/snapshot.d.ts +6 -0
package/dist/snapshot/snapshot.js +161 -0
package/dist/transcribe/parse.cjs +2 -2
package/dist/transcribe/parse.js +1 -1
package/dist/transcribe/transcribe.cjs +2 -0
package/dist/transcribe/transcribe.d.cts +2 -1
package/dist/transcribe/transcribe.d.ts +2 -1
package/dist/transcribe/transcribe.js +2 -0
package/package.json +3 -3

package/dist/align/align.cjs CHANGED Viewed

@@ -91,6 +91,7 @@ var import_segmentation = require("../markup/segmentation.cjs");
 var import_getSentenceRanges = require("./getSentenceRanges.cjs");
 var import_search = require("./search.cjs");
 var import_slugify = require("./slugify.cjs");
+var import_textFragments = require("./textFragments.cjs");
 async function align(input, output, transcriptionsDir, audiobookDir, options) {
   var _stack = [];
   try {
@@ -126,6 +127,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
       audiobookFiles,
       transcriptions,
       options.granularity,
+      options.textRef,
       options.primaryLocale,
       options.logger
     );
@@ -149,7 +151,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
   }
 }
 class Aligner {
-  constructor(epub, audiofiles, transcriptions, granularity, languageOverride, logger) {
+  constructor(epub, audiofiles, transcriptions, granularity, textRef, languageOverride, logger) {
     this.epub = epub;
     this.audiofiles = audiofiles;
     this.languageOverride = languageOverride;
@@ -157,12 +159,14 @@ class Aligner {
     this.transcription = concatTranscriptions(transcriptions, audiofiles);
     this.getChapterSentences = (0, import_memoize.default)(this.getChapterSentences.bind(this));
     this.granularity = granularity ?? "sentence";
+    this.textRef = textRef ?? "id-fragment";
   }
   transcription;
   totalDuration = 0;
   alignedChapters = [];
   timing = (0, import_ghost_story.createAggregator)();
   granularity;
+  textRef;
   report = {
     chapters: []
   };
@@ -177,8 +181,59 @@ class Aligner {
     return segmentation.filter((s) => s.text.match(/\S/));
   }
   async writeAlignedChapter(alignedChapter) {
+    const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
     const { chapter, sentenceRanges, wordRanges, xml } = alignedChapter;
+    const sentences = await this.getChapterSentences(chapter.id);
+    const sentenceIdToFragment = new Map(
+      sentenceRanges.map((range) => [
+        range.id,
+        `${range.chapterId}-s${range.id}`
+      ])
+    );
+    const wordIdToFragment = new Map(
+      wordRanges.map((ranges) => [
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        ranges[0].sentenceId,
+        new Map(
+          ranges.map((range) => [
+            range.id,
+            `${range.chapterId}-s${range.sentenceId}-w${range.id}`
+          ])
+        )
+      ])
+    );
     const wordRangeMap = new Map(wordRanges.map((w) => [w[0].sentenceId, w]));
+    if (this.textRef === "text-fragment") {
+      const trie = new import_textFragments.TextFragmentTrie(
+        sentences.map((s) => s.text.replace("\n", " ")),
+        locale
+      );
+      for (const range of sentenceRanges) {
+        const sentence = sentences[range.id];
+        sentenceIdToFragment.set(
+          range.id,
+          trie.findMinimalFragment(
+            // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+            range.id
+          )
+        );
+        if (this.granularity === "word") {
+          const wordRanges2 = wordRangeMap.get(range.id);
+          const toFragment = wordIdToFragment.get(range.id);
+          const words = sentence.words.entries.filter((w) => w.text.match(/\S/));
+          const wordTrie = new import_textFragments.TextFragmentTrie(
+            words.map((w) => w.text.replace("\n", " ")),
+            locale
+          );
+          for (const wordRange of wordRanges2) {
+            toFragment.set(
+              wordRange.id,
+              wordTrie.findMinimalFragment(wordRange.id)
+            );
+          }
+        }
+      }
+    }
     const audiofiles = Array.from(
       new Set(sentenceRanges.map(({ audiofile }) => audiofile))
     );
@@ -215,7 +270,9 @@ class Aligner {
         chapter,
         this.granularity,
         sentenceRanges,
-        wordRangeMap
+        wordRangeMap,
+        sentenceIdToFragment,
+        wordIdToFragment
       ),
       "xml"
     );
@@ -475,7 +532,7 @@ class Aligner {
         alignedChapter.wordRanges[i] = (0, import_getSentenceRanges.expandEmptySentenceRanges)(wordRanges);
       }
       await this.writeAlignedChapter(alignedChapter);
-      collapsedStart += sentences.length - 1;
+      collapsedStart += sentences.length;
     }
     await this.epub.addMetadata({
       type: "meta",
@@ -503,7 +560,7 @@ class Aligner {
     return this.timing;
   }
 }
-function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
+function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
   return [
     import_epub.Epub.createXmlElement(
       "smil",
@@ -530,7 +587,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
                   },
                   [
                     import_epub.Epub.createXmlElement("text", {
-                      src: `../${chapter.href}#${chapter.id}-s${sentenceRange.id}`
+                      src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
                     }),
                     import_epub.Epub.createXmlElement("audio", {
                       src: `../Audio/${(0, import_posix.basename)(sentenceRange.audiofile)}`,
@@ -541,12 +598,13 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
                 );
               }
               const words = wordRanges.get(sentenceRange.id);
+              const wordToFragment = wordIdToFragment.get(sentenceRange.id);
               return import_epub.Epub.createXmlElement(
                 "seq",
                 {
                   id: `${chapter.id}-s${sentenceRange.id}`,
                   "epub:type": "text-range-small",
-                  "epub:textref": `../${chapter.href}#${chapter.id}-s${sentenceRange.id}`
+                  "epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
                 },
                 words.map(
                   (word) => import_epub.Epub.createXmlElement(
@@ -556,7 +614,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
                     },
                     [
                       import_epub.Epub.createXmlElement("text", {
-                        src: `../${chapter.href}#${chapter.id}-s${sentenceRange.id}-w${word.id}`
+                        src: `../${chapter.href}#${wordToFragment.get(word.id)}`
                       }),
                       import_epub.Epub.createXmlElement("audio", {
                         src: `../Audio/${(0, import_posix.basename)(word.audiofile)}`,

package/dist/align/align.d.cts CHANGED Viewed

@@ -39,7 +39,8 @@ interface Report {
 }
 interface AlignOptions {
     reportsPath?: string | null | undefined;
-    granularity: "sentence" | "word" | null | undefined;
+    granularity?: "sentence" | "word" | null | undefined;
+    textRef?: "id-fragment" | "text-fragment" | null | undefined;
     primaryLocale?: Intl.Locale | null | undefined;
     logger?: Logger | null | undefined;
     onProgress?: ((progress: number) => void) | null | undefined;
@@ -55,8 +56,9 @@ declare class Aligner {
     private alignedChapters;
     private timing;
     private granularity;
+    private textRef;
     report: Report;
-    constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
+    constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, textRef: "id-fragment" | "text-fragment" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
     private getChapterSentences;
     private writeAlignedChapter;
     private addChapterReport;

package/dist/align/align.d.ts CHANGED Viewed

@@ -39,7 +39,8 @@ interface Report {
 }
 interface AlignOptions {
     reportsPath?: string | null | undefined;
-    granularity: "sentence" | "word" | null | undefined;
+    granularity?: "sentence" | "word" | null | undefined;
+    textRef?: "id-fragment" | "text-fragment" | null | undefined;
     primaryLocale?: Intl.Locale | null | undefined;
     logger?: Logger | null | undefined;
     onProgress?: ((progress: number) => void) | null | undefined;
@@ -55,8 +56,9 @@ declare class Aligner {
     private alignedChapters;
     private timing;
     private granularity;
+    private textRef;
     report: Report;
-    constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
+    constructor(epub: Epub, audiofiles: string[], transcriptions: Pick<RecognitionResult, "transcript" | "timeline">[], granularity: "sentence" | "word" | null | undefined, textRef: "id-fragment" | "text-fragment" | null | undefined, languageOverride?: (Intl.Locale | null) | undefined, logger?: (Logger | null) | undefined);
     private getChapterSentences;
     private writeAlignedChapter;
     private addChapterReport;

package/dist/align/align.js CHANGED Viewed

@@ -27,6 +27,7 @@ import {
 } from "./getSentenceRanges.js";
 import { findBoundaries } from "./search.js";
 import { slugify } from "./slugify.js";
+import { TextFragmentTrie } from "./textFragments.js";
 async function align(input, output, transcriptionsDir, audiobookDir, options) {
   var _stack = [];
   try {
@@ -62,6 +63,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
       audiobookFiles,
       transcriptions,
       options.granularity,
+      options.textRef,
       options.primaryLocale,
       options.logger
     );
@@ -85,7 +87,7 @@ async function align(input, output, transcriptionsDir, audiobookDir, options) {
   }
 }
 class Aligner {
-  constructor(epub, audiofiles, transcriptions, granularity, languageOverride, logger) {
+  constructor(epub, audiofiles, transcriptions, granularity, textRef, languageOverride, logger) {
     this.epub = epub;
     this.audiofiles = audiofiles;
     this.languageOverride = languageOverride;
@@ -93,12 +95,14 @@ class Aligner {
     this.transcription = concatTranscriptions(transcriptions, audiofiles);
     this.getChapterSentences = memoize(this.getChapterSentences.bind(this));
     this.granularity = granularity ?? "sentence";
+    this.textRef = textRef ?? "id-fragment";
   }
   transcription;
   totalDuration = 0;
   alignedChapters = [];
   timing = createAggregator();
   granularity;
+  textRef;
   report = {
     chapters: []
   };
@@ -113,8 +117,59 @@ class Aligner {
     return segmentation.filter((s) => s.text.match(/\S/));
   }
   async writeAlignedChapter(alignedChapter) {
+    const locale = this.languageOverride ?? await this.epub.getLanguage() ?? new Intl.Locale("en-US");
     const { chapter, sentenceRanges, wordRanges, xml } = alignedChapter;
+    const sentences = await this.getChapterSentences(chapter.id);
+    const sentenceIdToFragment = new Map(
+      sentenceRanges.map((range) => [
+        range.id,
+        `${range.chapterId}-s${range.id}`
+      ])
+    );
+    const wordIdToFragment = new Map(
+      wordRanges.map((ranges) => [
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        ranges[0].sentenceId,
+        new Map(
+          ranges.map((range) => [
+            range.id,
+            `${range.chapterId}-s${range.sentenceId}-w${range.id}`
+          ])
+        )
+      ])
+    );
     const wordRangeMap = new Map(wordRanges.map((w) => [w[0].sentenceId, w]));
+    if (this.textRef === "text-fragment") {
+      const trie = new TextFragmentTrie(
+        sentences.map((s) => s.text.replace("\n", " ")),
+        locale
+      );
+      for (const range of sentenceRanges) {
+        const sentence = sentences[range.id];
+        sentenceIdToFragment.set(
+          range.id,
+          trie.findMinimalFragment(
+            // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+            range.id
+          )
+        );
+        if (this.granularity === "word") {
+          const wordRanges2 = wordRangeMap.get(range.id);
+          const toFragment = wordIdToFragment.get(range.id);
+          const words = sentence.words.entries.filter((w) => w.text.match(/\S/));
+          const wordTrie = new TextFragmentTrie(
+            words.map((w) => w.text.replace("\n", " ")),
+            locale
+          );
+          for (const wordRange of wordRanges2) {
+            toFragment.set(
+              wordRange.id,
+              wordTrie.findMinimalFragment(wordRange.id)
+            );
+          }
+        }
+      }
+    }
     const audiofiles = Array.from(
       new Set(sentenceRanges.map(({ audiofile }) => audiofile))
     );
@@ -151,7 +206,9 @@ class Aligner {
         chapter,
         this.granularity,
         sentenceRanges,
-        wordRangeMap
+        wordRangeMap,
+        sentenceIdToFragment,
+        wordIdToFragment
       ),
       "xml"
     );
@@ -411,7 +468,7 @@ class Aligner {
         alignedChapter.wordRanges[i] = expandEmptySentenceRanges(wordRanges);
       }
       await this.writeAlignedChapter(alignedChapter);
-      collapsedStart += sentences.length - 1;
+      collapsedStart += sentences.length;
     }
     await this.epub.addMetadata({
       type: "meta",
@@ -439,7 +496,7 @@ class Aligner {
     return this.timing;
   }
 }
-function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
+function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges, sentenceIdToFragment, wordIdToFragment) {
   return [
     Epub.createXmlElement(
       "smil",
@@ -466,7 +523,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
                   },
                   [
                     Epub.createXmlElement("text", {
-                      src: `../${chapter.href}#${chapter.id}-s${sentenceRange.id}`
+                      src: `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
                     }),
                     Epub.createXmlElement("audio", {
                       src: `../Audio/${basename(sentenceRange.audiofile)}`,
@@ -477,12 +534,13 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
                 );
               }
               const words = wordRanges.get(sentenceRange.id);
+              const wordToFragment = wordIdToFragment.get(sentenceRange.id);
               return Epub.createXmlElement(
                 "seq",
                 {
                   id: `${chapter.id}-s${sentenceRange.id}`,
                   "epub:type": "text-range-small",
-                  "epub:textref": `../${chapter.href}#${chapter.id}-s${sentenceRange.id}`
+                  "epub:textref": `../${chapter.href}#${sentenceIdToFragment.get(sentenceRange.id)}`
                 },
                 words.map(
                   (word) => Epub.createXmlElement(
@@ -492,7 +550,7 @@ function createMediaOverlay(chapter, granularity, sentenceRanges, wordRanges) {
                     },
                     [
                       Epub.createXmlElement("text", {
-                        src: `../${chapter.href}#${chapter.id}-s${sentenceRange.id}-w${word.id}`
+                        src: `../${chapter.href}#${wordToFragment.get(word.id)}`
                       }),
                       Epub.createXmlElement("audio", {
                         src: `../Audio/${basename(word.audiofile)}`,

package/dist/align/getSentenceRanges.cjs CHANGED Viewed

@@ -275,6 +275,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
             if (start2 && end2) {
               perSentenceWordRanges.push({
                 id: k,
+                chapterId,
                 sentenceId: j + chapterSentenceIndex + slice[0],
                 start: end2.audiofile === start2.audiofile ? start2.start : 0,
                 audiofile: end2.audiofile,

package/dist/align/getSentenceRanges.d.cts CHANGED Viewed

@@ -19,6 +19,7 @@ type SentenceRange = {
 };
 type WordRange = {
     id: number;
+    chapterId: string;
     sentenceId: number;
     start: number;
     end: number;

package/dist/align/getSentenceRanges.d.ts CHANGED Viewed

@@ -19,6 +19,7 @@ type SentenceRange = {
 };
 type WordRange = {
     id: number;
+    chapterId: string;
     sentenceId: number;
     start: number;
     end: number;

package/dist/align/getSentenceRanges.js CHANGED Viewed

@@ -247,6 +247,7 @@ async function getSentenceRanges(transcriptionText, mappedTimeline, sentences, c
             if (start2 && end2) {
               perSentenceWordRanges.push({
                 id: k,
+                chapterId,
                 sentenceId: j + chapterSentenceIndex + slice[0],
                 start: end2.audiofile === start2.audiofile ? start2.start : 0,
                 audiofile: end2.audiofile,

package/dist/align/parse.cjs CHANGED Viewed

@@ -34,6 +34,12 @@ const alignParser = (0, import_core.object)("Alignment", {
     "--epub",
     (0, import_valueparser.path)({ mustExist: true, type: "file", extensions: [".epub"] })
   ),
+  textRef: (0, import_core.withDefault)(
+    (0, import_core.option)("--text-ref", (0, import_core.choice)(["id-fragment", "text-fragment"]), {
+      description: import_core.message`Whether to use text fragments rather than element id fragments to identify text ranges in generated media overlays.`
+    }),
+    "id-fragment"
+  ),
   reports: (0, import_core.optional)((0, import_core.option)("--reports", (0, import_valueparser.path)({ type: "directory" })))
 });
 const alignCommand = (0, import_core.command)(

package/dist/align/parse.d.cts CHANGED Viewed

@@ -3,10 +3,12 @@ import * as _optique_core from '@optique/core';
 declare const alignParser: _optique_core.Parser<"sync", {
     readonly audiobook: string;
     readonly epub: string;
+    readonly textRef: "id-fragment" | "text-fragment";
     readonly reports: string | undefined;
 }, {
     readonly audiobook: _optique_core.ValueParserResult<string> | undefined;
     readonly epub: _optique_core.ValueParserResult<string> | undefined;
+    readonly textRef: [_optique_core.ValueParserResult<"id-fragment" | "text-fragment"> | undefined] | undefined;
     readonly reports: [_optique_core.ValueParserResult<string> | undefined] | undefined;
 }>;
 declare const alignCommand: _optique_core.Parser<"sync", {
@@ -16,6 +18,7 @@ declare const alignCommand: _optique_core.Parser<"sync", {
 } & {
     readonly audiobook: string;
     readonly epub: string;
+    readonly textRef: "id-fragment" | "text-fragment";
     readonly reports: string | undefined;
 } & {
     readonly noProgress: boolean;

package/dist/align/parse.d.ts CHANGED Viewed

@@ -3,10 +3,12 @@ import * as _optique_core from '@optique/core';
 declare const alignParser: _optique_core.Parser<"sync", {
     readonly audiobook: string;
     readonly epub: string;
+    readonly textRef: "id-fragment" | "text-fragment";
     readonly reports: string | undefined;
 }, {
     readonly audiobook: _optique_core.ValueParserResult<string> | undefined;
     readonly epub: _optique_core.ValueParserResult<string> | undefined;
+    readonly textRef: [_optique_core.ValueParserResult<"id-fragment" | "text-fragment"> | undefined] | undefined;
     readonly reports: [_optique_core.ValueParserResult<string> | undefined] | undefined;
 }>;
 declare const alignCommand: _optique_core.Parser<"sync", {
@@ -16,6 +18,7 @@ declare const alignCommand: _optique_core.Parser<"sync", {
 } & {
     readonly audiobook: string;
     readonly epub: string;
+    readonly textRef: "id-fragment" | "text-fragment";
     readonly reports: string | undefined;
 } & {
     readonly noProgress: boolean;

package/dist/align/parse.js CHANGED Viewed

@@ -1,12 +1,14 @@
 import "../chunk-BIEQXUOY.js";
 import {
+  choice,
   command,
   constant,
   merge,
   message,
   object,
   option,
-  optional
+  optional,
+  withDefault
 } from "@optique/core";
 import { path } from "@optique/run/valueparser";
 import {
@@ -23,6 +25,12 @@ const alignParser = object("Alignment", {
     "--epub",
     path({ mustExist: true, type: "file", extensions: [".epub"] })
   ),
+  textRef: withDefault(
+    option("--text-ref", choice(["id-fragment", "text-fragment"]), {
+      description: message`Whether to use text fragments rather than element id fragments to identify text ranges in generated media overlays.`
+    }),
+    "id-fragment"
+  ),
   reports: optional(option("--reports", path({ type: "directory" })))
 });
 const alignCommand = command(

package/dist/align/textFragments.cjs ADDED Viewed

@@ -0,0 +1,147 @@
+"use strict";
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+var textFragments_exports = {};
+__export(textFragments_exports, {
+  TextFragmentTrie: () => TextFragmentTrie
+});
+module.exports = __toCommonJS(textFragments_exports);
+var import_itertools = require("itertools");
+var import_runes2 = require("runes2");
+class TextFragmentTrie {
+  root = new Node(null, "");
+  spans;
+  constructor(casedSpans, locale = new Intl.Locale("en-Latn-US")) {
+    this.spans = casedSpans.map((span) => span.toLocaleLowerCase(locale));
+    for (const [i, span] of (0, import_itertools.enumerate)(this.spans)) {
+      const parents = [this.root];
+      for (const [j, char] of (0, import_itertools.enumerate)((0, import_runes2.runes)(span))) {
+        for (const [k, parent] of (0, import_itertools.enumerate)(parents)) {
+          const newNode = new Node(parent, char, { span: i, pos: j });
+          let node = parent.children.find((child) => child.eq(newNode));
+          if (!node) {
+            node = newNode;
+            parent.children.push(node);
+          } else {
+            node.indices.push({ span: i, pos: j });
+          }
+          parents[k] = node;
+        }
+        parents.push(this.root);
+      }
+    }
+  }
+  findMinimalFragment(spanIndex) {
+    let node = this.root;
+    while (node.children.length) {
+      const candidates = node.children.filter(
+        (child2) => child2.indices.some(
+          ({ span: childSpanIndex }) => childSpanIndex === spanIndex
+        )
+      );
+      const child = (0, import_itertools.min)(
+        candidates,
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        (c) => c.indices.find((i) => i.span === spanIndex).pos
+      );
+      if (!child) {
+        return this.nodeToFragment(node, spanIndex, true);
+      }
+      if (child.indices.length === 1) {
+        return this.nodeToFragment(child, spanIndex);
+      }
+      node = child;
+    }
+    return this.nodeToFragment(node, spanIndex, true);
+  }
+  nodeToFragment(node, spanIndex, findPrefix) {
+    const span = this.spans[spanIndex];
+    let fragment = ":~:text=";
+    let prefix = "";
+    if (findPrefix) {
+      const prev = this.spans[spanIndex - 1];
+      if (prev) {
+        const prefixes = node.indices.filter(({ span: s }) => s !== spanIndex).map(({ span: spanIndex2, pos }) => {
+          let startNode2 = node;
+          let startPos = pos;
+          while (startNode2.parent && startNode2.parent !== this.root) {
+            startPos -= startNode2.value.length;
+            startNode2 = startNode2.parent;
+          }
+          const prev2 = this.spans[spanIndex2 - 1];
+          const span2 = this.spans[spanIndex2];
+          return (prev2 ?? "") + span2.slice(0, startPos);
+        });
+        const reversedPrefixes = prefixes.map((p) => (0, import_runes2.runes)(p).toReversed());
+        for (const [i2, char] of (0, import_itertools.enumerate)((0, import_runes2.runes)(prev).toReversed())) {
+          prefix = char + prefix;
+          for (const [j, p] of (0, import_itertools.enumerate)([...reversedPrefixes.toReversed()])) {
+            if (p[i2] !== char) {
+              reversedPrefixes.splice(reversedPrefixes.length - 1 - j, 1);
+            }
+          }
+          if (reversedPrefixes.length === 0) {
+            break;
+          }
+        }
+      }
+    }
+    if (prefix) {
+      fragment += `${encodeTextFragmentPart(prefix)}-,`;
+    }
+    let startNode = node;
+    let start = "";
+    while (startNode) {
+      start = startNode.value + start;
+      startNode = startNode.parent;
+    }
+    fragment += encodeTextFragmentPart(start);
+    const remainingSentence = span.slice(start.length + node.value.length);
+    let end = "";
+    let i = remainingSentence.length - 1;
+    while (remainingSentence.indexOf(end) !== i + 1 && i >= node.value.length) {
+      end = remainingSentence.slice(i);
+      i--;
+    }
+    if (end) {
+      fragment += `,${encodeTextFragmentPart(end)}`;
+    }
+    return fragment;
+  }
+}
+function encodeTextFragmentPart(part) {
+  return encodeURIComponent(part).replaceAll(/-/g, "%2d").replaceAll(/,/g, "%2c");
+}
+class Node {
+  constructor(parent, value, firstIndex) {
+    this.parent = parent;
+    this.value = value;
+    if (firstIndex !== void 0) {
+      this.indices.push(firstIndex);
+    }
+  }
+  children = [];
+  indices = [];
+  eq(other) {
+    return this.value === other.value;
+  }
+}
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  TextFragmentTrie
+});

package/dist/align/textFragments.d.cts ADDED Viewed

@@ -0,0 +1,23 @@
+declare class TextFragmentTrie {
+    private root;
+    private spans;
+    constructor(casedSpans: string[], locale?: Intl.Locale);
+    findMinimalFragment(spanIndex: number): string;
+    nodeToFragment(node: Node, spanIndex: number, findPrefix?: boolean): string;
+}
+declare class Node {
+    parent: Node | null;
+    value: string;
+    children: Node[];
+    indices: {
+        span: number;
+        pos: number;
+    }[];
+    constructor(parent: Node | null, value: string, firstIndex?: {
+        span: number;
+        pos: number;
+    });
+    eq(other: Node): boolean;
+}
+export { TextFragmentTrie };

package/dist/align/textFragments.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+declare class TextFragmentTrie {
+    private root;
+    private spans;
+    constructor(casedSpans: string[], locale?: Intl.Locale);
+    findMinimalFragment(spanIndex: number): string;
+    nodeToFragment(node: Node, spanIndex: number, findPrefix?: boolean): string;
+}
+declare class Node {
+    parent: Node | null;
+    value: string;
+    children: Node[];
+    indices: {
+        span: number;
+        pos: number;
+    }[];
+    constructor(parent: Node | null, value: string, firstIndex?: {
+        span: number;
+        pos: number;
+    });
+    eq(other: Node): boolean;
+}
+export { TextFragmentTrie };