npm - @storyteller-platform/align - Versions diffs - 0.1.9 → 0.1.10 - Mend

@storyteller-platform/align 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

package/dist/align/__tests__/align.test.cjs +6 -5
package/dist/align/__tests__/align.test.js +6 -5
package/dist/align/align.cjs +133 -81
package/dist/align/align.d.cts +1 -0
package/dist/align/align.d.ts +1 -0
package/dist/align/align.js +133 -81
package/dist/align/getSentenceRanges.cjs +78 -149
package/dist/align/getSentenceRanges.d.cts +1 -1
package/dist/align/getSentenceRanges.d.ts +1 -1
package/dist/align/getSentenceRanges.js +78 -149
package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
package/dist/errorAlign/__tests__/native.test.cjs +118 -0
package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
package/dist/errorAlign/__tests__/native.test.js +107 -0
package/dist/errorAlign/backtraceGraph.cjs +298 -0
package/dist/errorAlign/backtraceGraph.d.cts +103 -0
package/dist/errorAlign/backtraceGraph.d.ts +103 -0
package/dist/errorAlign/backtraceGraph.js +270 -0
package/dist/errorAlign/beamSearch.cjs +302 -0
package/dist/errorAlign/beamSearch.d.cts +53 -0
package/dist/errorAlign/beamSearch.d.ts +53 -0
package/dist/errorAlign/beamSearch.js +268 -0
package/dist/errorAlign/core.cjs +33 -0
package/dist/errorAlign/core.d.cts +5 -0
package/dist/errorAlign/core.d.ts +5 -0
package/dist/errorAlign/core.js +11 -0
package/dist/errorAlign/editDistance.cjs +115 -0
package/dist/errorAlign/editDistance.d.cts +46 -0
package/dist/errorAlign/editDistance.d.ts +46 -0
package/dist/errorAlign/editDistance.js +90 -0
package/dist/errorAlign/errorAlign.cjs +159 -0
package/dist/errorAlign/errorAlign.d.cts +15 -0
package/dist/errorAlign/errorAlign.d.ts +15 -0
package/dist/errorAlign/errorAlign.js +145 -0
package/dist/errorAlign/graphMetadata.cjs +97 -0
package/dist/errorAlign/graphMetadata.d.cts +44 -0
package/dist/errorAlign/graphMetadata.d.ts +44 -0
package/dist/errorAlign/graphMetadata.js +64 -0
package/dist/errorAlign/hash.cjs +173 -0
package/dist/errorAlign/hash.d.cts +28 -0
package/dist/errorAlign/hash.d.ts +28 -0
package/dist/errorAlign/hash.js +150 -0
package/dist/errorAlign/native.cjs +60 -0
package/dist/errorAlign/native.d.cts +18 -0
package/dist/errorAlign/native.d.ts +18 -0
package/dist/errorAlign/native.js +24 -0
package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
package/dist/errorAlign/node-gyp-build.d.js +0 -0
package/dist/errorAlign/pathToAlignment.cjs +122 -0
package/dist/errorAlign/pathToAlignment.d.cts +11 -0
package/dist/errorAlign/pathToAlignment.d.ts +11 -0
package/dist/errorAlign/pathToAlignment.js +89 -0
package/dist/errorAlign/utils.cjs +301 -0
package/dist/errorAlign/utils.d.cts +107 -0
package/dist/errorAlign/utils.d.ts +107 -0
package/dist/errorAlign/utils.js +248 -0
package/dist/index.d.cts +1 -0
package/dist/index.d.ts +1 -0
package/dist/markup/__tests__/markup.test.cjs +108 -81
package/dist/markup/__tests__/markup.test.js +109 -82
package/dist/markup/__tests__/parseDom.test.cjs +112 -0
package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
package/dist/markup/__tests__/parseDom.test.js +89 -0
package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
package/dist/markup/__tests__/serializeDom.test.js +97 -0
package/dist/markup/__tests__/transform.test.cjs +122 -0
package/dist/markup/__tests__/transform.test.d.cts +2 -0
package/dist/markup/__tests__/transform.test.d.ts +2 -0
package/dist/markup/__tests__/transform.test.js +99 -0
package/dist/markup/map.cjs +261 -0
package/dist/markup/map.d.cts +50 -0
package/dist/markup/map.d.ts +50 -0
package/dist/markup/map.js +236 -0
package/dist/markup/markup.cjs +23 -201
package/dist/markup/markup.d.cts +5 -9
package/dist/markup/markup.d.ts +5 -9
package/dist/markup/markup.js +24 -203
package/dist/markup/model.cjs +172 -0
package/dist/markup/model.d.cts +57 -0
package/dist/markup/model.d.ts +57 -0
package/dist/markup/model.js +145 -0
package/dist/markup/parseDom.cjs +59 -0
package/dist/markup/parseDom.d.cts +7 -0
package/dist/markup/parseDom.d.ts +7 -0
package/dist/markup/parseDom.js +35 -0
package/dist/markup/segmentation.cjs +11 -57
package/dist/markup/segmentation.d.cts +6 -2
package/dist/markup/segmentation.d.ts +6 -2
package/dist/markup/segmentation.js +11 -58
package/dist/markup/serializeDom.cjs +87 -0
package/dist/markup/serializeDom.d.cts +7 -0
package/dist/markup/serializeDom.d.ts +7 -0
package/dist/markup/serializeDom.js +63 -0
package/dist/markup/transform.cjs +92 -0
package/dist/markup/transform.d.cts +11 -0
package/dist/markup/transform.d.ts +11 -0
package/dist/markup/transform.js +71 -0
package/dist/types/node-gyp-build.d.cjs +1 -0
package/dist/types/node-gyp-build.d.d.cts +3 -0
package/dist/types/node-gyp-build.d.d.ts +3 -0
package/dist/types/node-gyp-build.d.js +0 -0
package/package.json +11 -4

package/dist/errorAlign/utils.js ADDED Viewed

@@ -0,0 +1,248 @@
+import "../chunk-BIEQXUOY.js";
+import assert from "node:assert";
+import { chain, enumerate, every, find, map, range } from "itertools";
+const OP_TYPES = [
+  "MATCH",
+  "INSERT",
+  "DELETE",
+  "SUBSTITUTE"
+];
+class Alignment {
+  constructor(opType, refSlice = null, hypSlice = null, ref = null, hyp = null, leftCompound = false, rightCompound = false) {
+    this.opType = opType;
+    this.refSlice = refSlice;
+    this.hypSlice = hypSlice;
+    this.ref = ref;
+    this.hyp = hyp;
+    this.leftCompound = leftCompound;
+    this.rightCompound = rightCompound;
+    switch (opType) {
+      case "MATCH": {
+        if (ref === null || hyp === null) {
+          throw new TypeError("MATCH operation must have non-empty ref or hyp.");
+        }
+        if (leftCompound || rightCompound) {
+          throw new TypeError("MATCH operation cannot have compound markers.");
+        }
+        break;
+      }
+      case "INSERT": {
+        if (hyp === null || ref !== null) {
+          throw new TypeError(
+            "INSERT operation must have non-empty hyp and empty ref."
+          );
+        }
+        break;
+      }
+      case "DELETE": {
+        if (hyp !== null || ref === null) {
+          throw new TypeError(
+            "DELETE operation must have non-empty ref and empty hyp."
+          );
+        }
+        break;
+      }
+      case "SUBSTITUTE": {
+        if (ref === null || hyp === null) {
+          throw new TypeError(
+            "SUBSTITUTE operation must have both ref and hyp."
+          );
+        }
+      }
+    }
+  }
+  /** Return the hypothesis with compound markers if applicable. */
+  get hypWithCompoundMarkers() {
+    if (this.hyp === null) {
+      return null;
+    }
+    return `${this.leftCompound ? "-" : ""}"${this.hyp}"${this.rightCompound ? "-" : ""}`;
+  }
+  toString() {
+    switch (this.opType) {
+      case "DELETE": {
+        return `Alignment(${this.opType}: "${this.ref}")`;
+      }
+      case "INSERT": {
+        return `Alignment(${this.opType}: ${this.hypWithCompoundMarkers})`;
+      }
+      case "SUBSTITUTE": {
+        return `Alignment(${this.opType}: ${this.hypWithCompoundMarkers} -> "${this.ref}")`;
+      }
+      case "MATCH": {
+        return `Alignment(${this.opType}: "${this.hyp}" == "${this.ref}")`;
+      }
+    }
+  }
+}
+function opTypePowerset() {
+  const opCombinations = map(
+    range(1, OP_TYPES.length + 1),
+    (r) => combinations(OP_TYPES, r)
+  );
+  return chain(...opCombinations);
+}
+function* combinations(iterable, r) {
+  const pool = Array.from(iterable);
+  const n = pool.length;
+  if (r > n) {
+    return;
+  }
+  const indices = Array.from(range(r));
+  yield indices.map((i) => pool[i]);
+  while (true) {
+    let i;
+    find: {
+      for (i of reversed(range(r))) {
+        if (indices[i] !== i + n - r) {
+          break find;
+        }
+      }
+      return;
+    }
+    indices[i] += 1;
+    for (const j of range(i + 1, r)) {
+      indices[j] = indices[j - 1] + 1;
+    }
+    yield indices.map((i2) => pool[i2]);
+  }
+}
+function reversed(iterable) {
+  return Array.from(iterable).toReversed();
+}
+const START_DELIMITER = "<";
+const END_DELIMITER = ">";
+const DELIMITERS = /* @__PURE__ */ new Set([START_DELIMITER, END_DELIMITER]);
+const OP_TYPE_MAP = OP_TYPES.reduce(
+  (acc, opType) => ({ ...acc, [opType]: opType }),
+  {}
+);
+const OP_TYPE_COMBO_MAP = Array.from(enumerate(opTypePowerset())).reduce((acc, [i, opTypes]) => ({ ...acc, [i]: opTypes }), {});
+function getOpTypeComboIndex(ops) {
+  return find(
+    enumerate(opTypePowerset()),
+    ([_i, set]) => set.length === ops.length && every(range(set.length), (i) => set[i] === ops[i])
+  )[0];
+}
+const NUMERIC_TOKEN = "\\p{N}+([,.]\\p{N}+)*(?=\\s|$)";
+const STANDARD_TOKEN = "[\\p{L}\\p{N}]+(['][\\p{L}\\p{N}]+)*'?";
+function isVowel(c) {
+  assert(c.length === 1, "Input must be a single character");
+  return "aeiouy".includes(c);
+}
+function isConsonant(c) {
+  assert(c.length === 1, "Input must be a single character");
+  return "bcdfghjklmnpqrstvwxyz".includes(c);
+}
+function categorizeChar(c) {
+  if (DELIMITERS.has(c)) return 0;
+  if (isConsonant(c)) return 1;
+  if (isVowel(c)) return 2;
+  return 3;
+}
+function basicTokenizer(text) {
+  return Array.from(
+    text.matchAll(new RegExp(`(${NUMERIC_TOKEN}|${STANDARD_TOKEN})`, "udg"))
+  );
+}
+function basicNormalizer(text) {
+  return text.toLowerCase();
+}
+function ensureLengthPreservation(normalizer) {
+  return function wrapper(text, ...args) {
+    const normalized = normalizer(text, ...args);
+    if (normalized.length !== text.length) {
+      throw new RangeError("Normalizer must preserve length.");
+    }
+    return normalized;
+  };
+}
+function unpackRegexMatch(tokenizer) {
+  return function wrapper(text, ...args) {
+    const matches = tokenizer(text, ...args);
+    return matches.map((match) => [match[1], match.indices[1]]);
+  };
+}
+function translateSlice(segmentSlice, indexMap) {
+  const sliceIndices = indexMap.slice(...segmentSlice).filter((x) => x >= 0);
+  if (sliceIndices.length === 0) {
+    return null;
+  }
+  return [sliceIndices[0], sliceIndices.at(-1) + 1];
+}
+class Counter {
+  counts = /* @__PURE__ */ new Map();
+  constructor(init = []) {
+    if (init instanceof Map) {
+      this.counts = init;
+      return;
+    }
+    for (const element of init) {
+      this.counts.set(element, (this.counts.get(element) ?? 0) + 1);
+    }
+  }
+  elements() {
+    return this.counts.entries().flatMap(([e, c]) => Array.from(range(c)).map(() => e));
+  }
+  mostCommon(n) {
+    const ordered = Array.from(this.counts.entries()).toSorted(
+      ([_a, a], [_b, b]) => a - b
+    );
+    if (n === void 0) return ordered;
+    return ordered.slice(0, n);
+  }
+  total() {
+    return this.counts.values().reduce((acc, v) => acc + v);
+  }
+  subtract(update) {
+    if (update instanceof Map) {
+      for (const [element, count] of update.entries()) {
+        this.counts.set(element, (this.counts.get(element) ?? 0) - count);
+      }
+      return;
+    }
+    for (const element of update) {
+      this.counts.set(element, (this.counts.get(element) ?? 0) - 1);
+    }
+  }
+  update(update) {
+    if (update instanceof Map) {
+      for (const [element, count] of update.entries()) {
+        this.counts.set(element, (this.counts.get(element) ?? 0) + count);
+      }
+      return;
+    }
+    for (const element of update) {
+      this.counts.set(element, (this.counts.get(element) ?? 0) + 1);
+    }
+  }
+  get(element) {
+    return this.counts.get(element) ?? 0;
+  }
+  set(element, count) {
+    this.counts.set(element, count);
+  }
+}
+export {
+  Alignment,
+  Counter,
+  DELIMITERS,
+  END_DELIMITER,
+  NUMERIC_TOKEN,
+  OP_TYPES,
+  OP_TYPE_COMBO_MAP,
+  OP_TYPE_MAP,
+  STANDARD_TOKEN,
+  START_DELIMITER,
+  basicNormalizer,
+  basicTokenizer,
+  categorizeChar,
+  ensureLengthPreservation,
+  getOpTypeComboIndex,
+  isConsonant,
+  isVowel,
+  opTypePowerset,
+  reversed,
+  translateSlice,
+  unpackRegexMatch
+};

package/dist/index.d.cts CHANGED Viewed

@@ -8,5 +8,6 @@ import 'pino';
 import './process/AudioEncoding.cjs';
 import '@echogarden/text-segmentation';
 import '@storyteller-platform/epub';
+import './markup/map.cjs';
 import '@storyteller-platform/ghost-story/recognition';
 import './align/getSentenceRanges.cjs';

package/dist/index.d.ts CHANGED Viewed

@@ -8,5 +8,6 @@ import 'pino';
 import './process/AudioEncoding.js';
 import '@echogarden/text-segmentation';
 import '@storyteller-platform/epub';
+import './markup/map.js';
 import '@storyteller-platform/ghost-story/recognition';
 import './align/getSentenceRanges.js';

package/dist/markup/__tests__/markup.test.cjs CHANGED Viewed

@@ -22,55 +22,70 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
   mod
 ));
 var import_node_assert = __toESM(require("node:assert"), 1);
+var import_promises = require("node:fs/promises");
+var import_node_path = require("node:path");
 var import_node_test = require("node:test");
 var import_epub = require("@storyteller-platform/epub");
 var import_markup = require("../markup.cjs");
 var import_segmentation = require("../segmentation.cjs");
-void (0, import_node_test.describe)("appendTextNode", () => {
-  void (0, import_node_test.it)("can append text nodes to empty parents", () => {
-    const input = [];
-    (0, import_markup.appendTextNode)("chapter_one", input, "test", [], /* @__PURE__ */ new Set());
-    import_node_assert.default.deepStrictEqual(input, [{ "#text": "test" }]);
-  });
-  void (0, import_node_test.it)("can append text nodes with marks", () => {
-    const input = [];
-    (0, import_markup.appendTextNode)(
-      "chapter_one",
-      input,
-      "test",
-      [{ elementName: "a", attributes: { "@_href": "#" } }],
-      /* @__PURE__ */ new Set()
-    );
-    import_node_assert.default.deepStrictEqual(input, [
-      { a: [{ "#text": "test" }], ":@": { "@_href": "#" } }
-    ]);
-  });
-  void (0, import_node_test.it)("can wrap text nodes with sentence spans", () => {
-    const input = [];
-    (0, import_markup.appendTextNode)("chapter_one", input, "test", [], /* @__PURE__ */ new Set(), 0);
-    import_node_assert.default.deepStrictEqual(input, [
-      {
-        span: [{ "#text": "test" }],
-        ":@": { "@_id": "chapter_one-s0" }
-      }
-    ]);
-  });
-  void (0, import_node_test.it)("can join text nodes with the same sentence ids", () => {
-    const input = [
-      {
-        span: [{ "#text": "test" }],
-        ":@": { "@_id": "chapter_one-s0" }
-      }
-    ];
-    (0, import_markup.appendTextNode)("chapter_one", input, "test", [], /* @__PURE__ */ new Set(), 0);
-    import_node_assert.default.deepStrictEqual(input, [
-      {
-        span: [{ "#text": "test" }, { "#text": "test" }],
-        ":@": { "@_id": "chapter_one-s0" }
+function sanitizeFilename(title) {
+  return title.replace(/[/\\:*?"<>|]/g, "-").replace(/\s+/g, " ").trim().replace(/[.]+$/, "");
+}
+function truncate(input, byteLimit, suffix = "") {
+  const normalized = input.normalize("NFC");
+  const encoder = new TextEncoder();
+  let result = "";
+  for (const char of normalized) {
+    const withSuffix = result + char + suffix;
+    const byteLength = encoder.encode(withSuffix).length;
+    if (byteLength > byteLimit) break;
+    result += char;
+  }
+  return result + suffix;
+}
+function getSafeFilepathSegment(name, suffix = "") {
+  return truncate(sanitizeFilename(name), 150, suffix);
+}
+async function assertMarkupSnapshot(context, output) {
+  const snapshotFilename = getSafeFilepathSegment(context.fullName, ".snapshot");
+  const snapshotFilepath = (0, import_node_path.join)(
+    "src",
+    "markup",
+    "__snapshots__",
+    snapshotFilename
+  );
+  if (process.env["UPDATE_SNAPSHOTS"]) {
+    await (0, import_promises.mkdir)((0, import_node_path.dirname)(snapshotFilepath), { recursive: true });
+    await (0, import_promises.writeFile)(snapshotFilepath, output, { encoding: "utf-8" });
+    return;
+  }
+  try {
+    const existingSnapshot = await (0, import_promises.readFile)(snapshotFilepath, {
+      encoding: "utf-8"
+    });
+    const existingLines = existingSnapshot.split("\n");
+    const newLines = output.split("\n");
+    for (let i = 0; i < existingLines.length; i++) {
+      const existingLine = existingLines[i];
+      const newLine = newLines[i];
+      if (existingLine !== newLine) {
+        import_node_assert.default.strictEqual(
+          newLines.slice(Math.max(0, i - 5), i + 5),
+          existingLines.slice(Math.max(0, i - 5), i + 5)
+        );
       }
-    ]);
-  });
-});
+    }
+  } catch (e) {
+    if (e instanceof import_node_assert.default.AssertionError) {
+      throw e;
+    }
+    throw new import_node_assert.default.AssertionError({
+      actual: output,
+      expected: "",
+      diff: "simple"
+    });
+  }
+}
 void (0, import_node_test.describe)("markupChapter", () => {
   void (0, import_node_test.it)("can tag sentences", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(
@@ -109,16 +124,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
 </html>
 `
     );
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
   void (0, import_node_test.it)("can tag sentences with formatting marks", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(
@@ -142,16 +158,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
 </html>
 `
     );
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
   void (0, import_node_test.it)("can tag sentences with formatting marks that overlap sentence boundaries", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(
@@ -175,16 +192,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
 </html>
 `
     );
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
   void (0, import_node_test.it)("can tag sentences with nested formatting marks", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(
@@ -208,16 +226,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
 </html>
 `
     );
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
   void (0, import_node_test.it)("can tag sentences with atoms", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(
@@ -241,16 +260,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
 </html>
 `
     );
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
   void (0, import_node_test.it)("can tag sentences in nested textblocks", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(
@@ -284,16 +304,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
 </html>
     `
     );
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
   void (0, import_node_test.it)("can tag sentences that cross textblock boundaries", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(
@@ -320,18 +341,19 @@ void (0, import_node_test.describe)("markupChapter", () => {
 </html>
     `
     );
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
-  void (0, import_node_test.it)("can handle soft page breaks", async (t) => {
+  void import_node_test.it.only("can handle soft page breaks", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(
       /* xml */
       `
@@ -363,16 +385,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
   </body>
 </html>`
     );
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
   void (0, import_node_test.it)("can handle boolean-like text values", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(`
@@ -384,16 +407,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
 </body>
 </html>
 `);
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
   void (0, import_node_test.it)("can handle number-like text values", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(`
@@ -405,16 +429,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
 </body>
 </html>
 `);
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
   void (0, import_node_test.it)("can handle null-like text values", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(`
@@ -426,16 +451,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
 </body>
 </html>
 `);
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
   void (0, import_node_test.it)("can preserve nbsp entities", async (t) => {
     const input = import_epub.Epub.xhtmlParser.parse(`
@@ -450,15 +476,16 @@ void (0, import_node_test.describe)("markupChapter", () => {
 </body>
 </html>
 `);
-    const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
+    const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
       import_epub.Epub.getXhtmlBody(input),
       {}
     );
     const { markedUp: output } = (0, import_markup.markupChapter)(
       "chapter_one",
       input,
-      segmentation
+      segmentation,
+      mapping
     );
-    t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
+    await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
   });
 });