@storyteller-platform/align 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/dist/align/__tests__/align.test.cjs +6 -5
  2. package/dist/align/__tests__/align.test.js +6 -5
  3. package/dist/align/align.cjs +133 -81
  4. package/dist/align/align.d.cts +1 -0
  5. package/dist/align/align.d.ts +1 -0
  6. package/dist/align/align.js +133 -81
  7. package/dist/align/getSentenceRanges.cjs +78 -149
  8. package/dist/align/getSentenceRanges.d.cts +1 -1
  9. package/dist/align/getSentenceRanges.d.ts +1 -1
  10. package/dist/align/getSentenceRanges.js +78 -149
  11. package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
  12. package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
  13. package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
  14. package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
  15. package/dist/errorAlign/__tests__/native.test.cjs +118 -0
  16. package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
  17. package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
  18. package/dist/errorAlign/__tests__/native.test.js +107 -0
  19. package/dist/errorAlign/backtraceGraph.cjs +298 -0
  20. package/dist/errorAlign/backtraceGraph.d.cts +103 -0
  21. package/dist/errorAlign/backtraceGraph.d.ts +103 -0
  22. package/dist/errorAlign/backtraceGraph.js +270 -0
  23. package/dist/errorAlign/beamSearch.cjs +302 -0
  24. package/dist/errorAlign/beamSearch.d.cts +53 -0
  25. package/dist/errorAlign/beamSearch.d.ts +53 -0
  26. package/dist/errorAlign/beamSearch.js +268 -0
  27. package/dist/errorAlign/core.cjs +33 -0
  28. package/dist/errorAlign/core.d.cts +5 -0
  29. package/dist/errorAlign/core.d.ts +5 -0
  30. package/dist/errorAlign/core.js +11 -0
  31. package/dist/errorAlign/editDistance.cjs +115 -0
  32. package/dist/errorAlign/editDistance.d.cts +46 -0
  33. package/dist/errorAlign/editDistance.d.ts +46 -0
  34. package/dist/errorAlign/editDistance.js +90 -0
  35. package/dist/errorAlign/errorAlign.cjs +159 -0
  36. package/dist/errorAlign/errorAlign.d.cts +15 -0
  37. package/dist/errorAlign/errorAlign.d.ts +15 -0
  38. package/dist/errorAlign/errorAlign.js +145 -0
  39. package/dist/errorAlign/graphMetadata.cjs +97 -0
  40. package/dist/errorAlign/graphMetadata.d.cts +44 -0
  41. package/dist/errorAlign/graphMetadata.d.ts +44 -0
  42. package/dist/errorAlign/graphMetadata.js +64 -0
  43. package/dist/errorAlign/hash.cjs +173 -0
  44. package/dist/errorAlign/hash.d.cts +28 -0
  45. package/dist/errorAlign/hash.d.ts +28 -0
  46. package/dist/errorAlign/hash.js +150 -0
  47. package/dist/errorAlign/native.cjs +60 -0
  48. package/dist/errorAlign/native.d.cts +18 -0
  49. package/dist/errorAlign/native.d.ts +18 -0
  50. package/dist/errorAlign/native.js +24 -0
  51. package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
  52. package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
  53. package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
  54. package/dist/errorAlign/node-gyp-build.d.js +0 -0
  55. package/dist/errorAlign/pathToAlignment.cjs +122 -0
  56. package/dist/errorAlign/pathToAlignment.d.cts +11 -0
  57. package/dist/errorAlign/pathToAlignment.d.ts +11 -0
  58. package/dist/errorAlign/pathToAlignment.js +89 -0
  59. package/dist/errorAlign/utils.cjs +301 -0
  60. package/dist/errorAlign/utils.d.cts +107 -0
  61. package/dist/errorAlign/utils.d.ts +107 -0
  62. package/dist/errorAlign/utils.js +248 -0
  63. package/dist/index.d.cts +1 -0
  64. package/dist/index.d.ts +1 -0
  65. package/dist/markup/__tests__/markup.test.cjs +108 -81
  66. package/dist/markup/__tests__/markup.test.js +109 -82
  67. package/dist/markup/__tests__/parseDom.test.cjs +112 -0
  68. package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
  69. package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
  70. package/dist/markup/__tests__/parseDom.test.js +89 -0
  71. package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
  72. package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
  73. package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
  74. package/dist/markup/__tests__/serializeDom.test.js +97 -0
  75. package/dist/markup/__tests__/transform.test.cjs +122 -0
  76. package/dist/markup/__tests__/transform.test.d.cts +2 -0
  77. package/dist/markup/__tests__/transform.test.d.ts +2 -0
  78. package/dist/markup/__tests__/transform.test.js +99 -0
  79. package/dist/markup/map.cjs +261 -0
  80. package/dist/markup/map.d.cts +50 -0
  81. package/dist/markup/map.d.ts +50 -0
  82. package/dist/markup/map.js +236 -0
  83. package/dist/markup/markup.cjs +23 -201
  84. package/dist/markup/markup.d.cts +5 -9
  85. package/dist/markup/markup.d.ts +5 -9
  86. package/dist/markup/markup.js +24 -203
  87. package/dist/markup/model.cjs +172 -0
  88. package/dist/markup/model.d.cts +57 -0
  89. package/dist/markup/model.d.ts +57 -0
  90. package/dist/markup/model.js +145 -0
  91. package/dist/markup/parseDom.cjs +59 -0
  92. package/dist/markup/parseDom.d.cts +7 -0
  93. package/dist/markup/parseDom.d.ts +7 -0
  94. package/dist/markup/parseDom.js +35 -0
  95. package/dist/markup/segmentation.cjs +11 -57
  96. package/dist/markup/segmentation.d.cts +6 -2
  97. package/dist/markup/segmentation.d.ts +6 -2
  98. package/dist/markup/segmentation.js +11 -58
  99. package/dist/markup/serializeDom.cjs +87 -0
  100. package/dist/markup/serializeDom.d.cts +7 -0
  101. package/dist/markup/serializeDom.d.ts +7 -0
  102. package/dist/markup/serializeDom.js +63 -0
  103. package/dist/markup/transform.cjs +92 -0
  104. package/dist/markup/transform.d.cts +11 -0
  105. package/dist/markup/transform.d.ts +11 -0
  106. package/dist/markup/transform.js +71 -0
  107. package/dist/types/node-gyp-build.d.cjs +1 -0
  108. package/dist/types/node-gyp-build.d.d.cts +3 -0
  109. package/dist/types/node-gyp-build.d.d.ts +3 -0
  110. package/dist/types/node-gyp-build.d.js +0 -0
  111. package/package.json +11 -4
@@ -0,0 +1,248 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ import assert from "node:assert";
3
+ import { chain, enumerate, every, find, map, range } from "itertools";
4
+ const OP_TYPES = [
5
+ "MATCH",
6
+ "INSERT",
7
+ "DELETE",
8
+ "SUBSTITUTE"
9
+ ];
10
+ class Alignment {
11
+ constructor(opType, refSlice = null, hypSlice = null, ref = null, hyp = null, leftCompound = false, rightCompound = false) {
12
+ this.opType = opType;
13
+ this.refSlice = refSlice;
14
+ this.hypSlice = hypSlice;
15
+ this.ref = ref;
16
+ this.hyp = hyp;
17
+ this.leftCompound = leftCompound;
18
+ this.rightCompound = rightCompound;
19
+ switch (opType) {
20
+ case "MATCH": {
21
+ if (ref === null || hyp === null) {
22
+ throw new TypeError("MATCH operation must have non-empty ref or hyp.");
23
+ }
24
+ if (leftCompound || rightCompound) {
25
+ throw new TypeError("MATCH operation cannot have compound markers.");
26
+ }
27
+ break;
28
+ }
29
+ case "INSERT": {
30
+ if (hyp === null || ref !== null) {
31
+ throw new TypeError(
32
+ "INSERT operation must have non-empty hyp and empty ref."
33
+ );
34
+ }
35
+ break;
36
+ }
37
+ case "DELETE": {
38
+ if (hyp !== null || ref === null) {
39
+ throw new TypeError(
40
+ "DELETE operation must have non-empty ref and empty hyp."
41
+ );
42
+ }
43
+ break;
44
+ }
45
+ case "SUBSTITUTE": {
46
+ if (ref === null || hyp === null) {
47
+ throw new TypeError(
48
+ "SUBSTITUTE operation must have both ref and hyp."
49
+ );
50
+ }
51
+ }
52
+ }
53
+ }
54
+ /** Return the hypothesis with compound markers if applicable. */
55
+ get hypWithCompoundMarkers() {
56
+ if (this.hyp === null) {
57
+ return null;
58
+ }
59
+ return `${this.leftCompound ? "-" : ""}"${this.hyp}"${this.rightCompound ? "-" : ""}`;
60
+ }
61
+ toString() {
62
+ switch (this.opType) {
63
+ case "DELETE": {
64
+ return `Alignment(${this.opType}: "${this.ref}")`;
65
+ }
66
+ case "INSERT": {
67
+ return `Alignment(${this.opType}: ${this.hypWithCompoundMarkers})`;
68
+ }
69
+ case "SUBSTITUTE": {
70
+ return `Alignment(${this.opType}: ${this.hypWithCompoundMarkers} -> "${this.ref}")`;
71
+ }
72
+ case "MATCH": {
73
+ return `Alignment(${this.opType}: "${this.hyp}" == "${this.ref}")`;
74
+ }
75
+ }
76
+ }
77
+ }
78
+ function opTypePowerset() {
79
+ const opCombinations = map(
80
+ range(1, OP_TYPES.length + 1),
81
+ (r) => combinations(OP_TYPES, r)
82
+ );
83
+ return chain(...opCombinations);
84
+ }
85
+ function* combinations(iterable, r) {
86
+ const pool = Array.from(iterable);
87
+ const n = pool.length;
88
+ if (r > n) {
89
+ return;
90
+ }
91
+ const indices = Array.from(range(r));
92
+ yield indices.map((i) => pool[i]);
93
+ while (true) {
94
+ let i;
95
+ find: {
96
+ for (i of reversed(range(r))) {
97
+ if (indices[i] !== i + n - r) {
98
+ break find;
99
+ }
100
+ }
101
+ return;
102
+ }
103
+ indices[i] += 1;
104
+ for (const j of range(i + 1, r)) {
105
+ indices[j] = indices[j - 1] + 1;
106
+ }
107
+ yield indices.map((i2) => pool[i2]);
108
+ }
109
+ }
110
+ function reversed(iterable) {
111
+ return Array.from(iterable).toReversed();
112
+ }
113
+ const START_DELIMITER = "<";
114
+ const END_DELIMITER = ">";
115
+ const DELIMITERS = /* @__PURE__ */ new Set([START_DELIMITER, END_DELIMITER]);
116
+ const OP_TYPE_MAP = OP_TYPES.reduce(
117
+ (acc, opType) => ({ ...acc, [opType]: opType }),
118
+ {}
119
+ );
120
+ const OP_TYPE_COMBO_MAP = Array.from(enumerate(opTypePowerset())).reduce((acc, [i, opTypes]) => ({ ...acc, [i]: opTypes }), {});
121
+ function getOpTypeComboIndex(ops) {
122
+ return find(
123
+ enumerate(opTypePowerset()),
124
+ ([_i, set]) => set.length === ops.length && every(range(set.length), (i) => set[i] === ops[i])
125
+ )[0];
126
+ }
127
+ const NUMERIC_TOKEN = "\\p{N}+([,.]\\p{N}+)*(?=\\s|$)";
128
+ const STANDARD_TOKEN = "[\\p{L}\\p{N}]+(['][\\p{L}\\p{N}]+)*'?";
129
+ function isVowel(c) {
130
+ assert(c.length === 1, "Input must be a single character");
131
+ return "aeiouy".includes(c);
132
+ }
133
+ function isConsonant(c) {
134
+ assert(c.length === 1, "Input must be a single character");
135
+ return "bcdfghjklmnpqrstvwxyz".includes(c);
136
+ }
137
+ function categorizeChar(c) {
138
+ if (DELIMITERS.has(c)) return 0;
139
+ if (isConsonant(c)) return 1;
140
+ if (isVowel(c)) return 2;
141
+ return 3;
142
+ }
143
+ function basicTokenizer(text) {
144
+ return Array.from(
145
+ text.matchAll(new RegExp(`(${NUMERIC_TOKEN}|${STANDARD_TOKEN})`, "udg"))
146
+ );
147
+ }
148
+ function basicNormalizer(text) {
149
+ return text.toLowerCase();
150
+ }
151
+ function ensureLengthPreservation(normalizer) {
152
+ return function wrapper(text, ...args) {
153
+ const normalized = normalizer(text, ...args);
154
+ if (normalized.length !== text.length) {
155
+ throw new RangeError("Normalizer must preserve length.");
156
+ }
157
+ return normalized;
158
+ };
159
+ }
160
+ function unpackRegexMatch(tokenizer) {
161
+ return function wrapper(text, ...args) {
162
+ const matches = tokenizer(text, ...args);
163
+ return matches.map((match) => [match[1], match.indices[1]]);
164
+ };
165
+ }
166
+ function translateSlice(segmentSlice, indexMap) {
167
+ const sliceIndices = indexMap.slice(...segmentSlice).filter((x) => x >= 0);
168
+ if (sliceIndices.length === 0) {
169
+ return null;
170
+ }
171
+ return [sliceIndices[0], sliceIndices.at(-1) + 1];
172
+ }
173
+ class Counter {
174
+ counts = /* @__PURE__ */ new Map();
175
+ constructor(init = []) {
176
+ if (init instanceof Map) {
177
+ this.counts = init;
178
+ return;
179
+ }
180
+ for (const element of init) {
181
+ this.counts.set(element, (this.counts.get(element) ?? 0) + 1);
182
+ }
183
+ }
184
+ elements() {
185
+ return this.counts.entries().flatMap(([e, c]) => Array.from(range(c)).map(() => e));
186
+ }
187
+ mostCommon(n) {
188
+ const ordered = Array.from(this.counts.entries()).toSorted(
189
+ ([_a, a], [_b, b]) => a - b
190
+ );
191
+ if (n === void 0) return ordered;
192
+ return ordered.slice(0, n);
193
+ }
194
+ total() {
195
+ return this.counts.values().reduce((acc, v) => acc + v);
196
+ }
197
+ subtract(update) {
198
+ if (update instanceof Map) {
199
+ for (const [element, count] of update.entries()) {
200
+ this.counts.set(element, (this.counts.get(element) ?? 0) - count);
201
+ }
202
+ return;
203
+ }
204
+ for (const element of update) {
205
+ this.counts.set(element, (this.counts.get(element) ?? 0) - 1);
206
+ }
207
+ }
208
+ update(update) {
209
+ if (update instanceof Map) {
210
+ for (const [element, count] of update.entries()) {
211
+ this.counts.set(element, (this.counts.get(element) ?? 0) + count);
212
+ }
213
+ return;
214
+ }
215
+ for (const element of update) {
216
+ this.counts.set(element, (this.counts.get(element) ?? 0) + 1);
217
+ }
218
+ }
219
+ get(element) {
220
+ return this.counts.get(element) ?? 0;
221
+ }
222
+ set(element, count) {
223
+ this.counts.set(element, count);
224
+ }
225
+ }
226
+ export {
227
+ Alignment,
228
+ Counter,
229
+ DELIMITERS,
230
+ END_DELIMITER,
231
+ NUMERIC_TOKEN,
232
+ OP_TYPES,
233
+ OP_TYPE_COMBO_MAP,
234
+ OP_TYPE_MAP,
235
+ STANDARD_TOKEN,
236
+ START_DELIMITER,
237
+ basicNormalizer,
238
+ basicTokenizer,
239
+ categorizeChar,
240
+ ensureLengthPreservation,
241
+ getOpTypeComboIndex,
242
+ isConsonant,
243
+ isVowel,
244
+ opTypePowerset,
245
+ reversed,
246
+ translateSlice,
247
+ unpackRegexMatch
248
+ };
package/dist/index.d.cts CHANGED
@@ -8,5 +8,6 @@ import 'pino';
8
8
  import './process/AudioEncoding.cjs';
9
9
  import '@echogarden/text-segmentation';
10
10
  import '@storyteller-platform/epub';
11
+ import './markup/map.cjs';
11
12
  import '@storyteller-platform/ghost-story/recognition';
12
13
  import './align/getSentenceRanges.cjs';
package/dist/index.d.ts CHANGED
@@ -8,5 +8,6 @@ import 'pino';
8
8
  import './process/AudioEncoding.js';
9
9
  import '@echogarden/text-segmentation';
10
10
  import '@storyteller-platform/epub';
11
+ import './markup/map.js';
11
12
  import '@storyteller-platform/ghost-story/recognition';
12
13
  import './align/getSentenceRanges.js';
@@ -22,55 +22,70 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
22
22
  mod
23
23
  ));
24
24
  var import_node_assert = __toESM(require("node:assert"), 1);
25
+ var import_promises = require("node:fs/promises");
26
+ var import_node_path = require("node:path");
25
27
  var import_node_test = require("node:test");
26
28
  var import_epub = require("@storyteller-platform/epub");
27
29
  var import_markup = require("../markup.cjs");
28
30
  var import_segmentation = require("../segmentation.cjs");
29
- void (0, import_node_test.describe)("appendTextNode", () => {
30
- void (0, import_node_test.it)("can append text nodes to empty parents", () => {
31
- const input = [];
32
- (0, import_markup.appendTextNode)("chapter_one", input, "test", [], /* @__PURE__ */ new Set());
33
- import_node_assert.default.deepStrictEqual(input, [{ "#text": "test" }]);
34
- });
35
- void (0, import_node_test.it)("can append text nodes with marks", () => {
36
- const input = [];
37
- (0, import_markup.appendTextNode)(
38
- "chapter_one",
39
- input,
40
- "test",
41
- [{ elementName: "a", attributes: { "@_href": "#" } }],
42
- /* @__PURE__ */ new Set()
43
- );
44
- import_node_assert.default.deepStrictEqual(input, [
45
- { a: [{ "#text": "test" }], ":@": { "@_href": "#" } }
46
- ]);
47
- });
48
- void (0, import_node_test.it)("can wrap text nodes with sentence spans", () => {
49
- const input = [];
50
- (0, import_markup.appendTextNode)("chapter_one", input, "test", [], /* @__PURE__ */ new Set(), 0);
51
- import_node_assert.default.deepStrictEqual(input, [
52
- {
53
- span: [{ "#text": "test" }],
54
- ":@": { "@_id": "chapter_one-s0" }
55
- }
56
- ]);
57
- });
58
- void (0, import_node_test.it)("can join text nodes with the same sentence ids", () => {
59
- const input = [
60
- {
61
- span: [{ "#text": "test" }],
62
- ":@": { "@_id": "chapter_one-s0" }
63
- }
64
- ];
65
- (0, import_markup.appendTextNode)("chapter_one", input, "test", [], /* @__PURE__ */ new Set(), 0);
66
- import_node_assert.default.deepStrictEqual(input, [
67
- {
68
- span: [{ "#text": "test" }, { "#text": "test" }],
69
- ":@": { "@_id": "chapter_one-s0" }
31
+ function sanitizeFilename(title) {
32
+ return title.replace(/[/\\:*?"<>|]/g, "-").replace(/\s+/g, " ").trim().replace(/[.]+$/, "");
33
+ }
34
+ function truncate(input, byteLimit, suffix = "") {
35
+ const normalized = input.normalize("NFC");
36
+ const encoder = new TextEncoder();
37
+ let result = "";
38
+ for (const char of normalized) {
39
+ const withSuffix = result + char + suffix;
40
+ const byteLength = encoder.encode(withSuffix).length;
41
+ if (byteLength > byteLimit) break;
42
+ result += char;
43
+ }
44
+ return result + suffix;
45
+ }
46
+ function getSafeFilepathSegment(name, suffix = "") {
47
+ return truncate(sanitizeFilename(name), 150, suffix);
48
+ }
49
+ async function assertMarkupSnapshot(context, output) {
50
+ const snapshotFilename = getSafeFilepathSegment(context.fullName, ".snapshot");
51
+ const snapshotFilepath = (0, import_node_path.join)(
52
+ "src",
53
+ "markup",
54
+ "__snapshots__",
55
+ snapshotFilename
56
+ );
57
+ if (process.env["UPDATE_SNAPSHOTS"]) {
58
+ await (0, import_promises.mkdir)((0, import_node_path.dirname)(snapshotFilepath), { recursive: true });
59
+ await (0, import_promises.writeFile)(snapshotFilepath, output, { encoding: "utf-8" });
60
+ return;
61
+ }
62
+ try {
63
+ const existingSnapshot = await (0, import_promises.readFile)(snapshotFilepath, {
64
+ encoding: "utf-8"
65
+ });
66
+ const existingLines = existingSnapshot.split("\n");
67
+ const newLines = output.split("\n");
68
+ for (let i = 0; i < existingLines.length; i++) {
69
+ const existingLine = existingLines[i];
70
+ const newLine = newLines[i];
71
+ if (existingLine !== newLine) {
72
+ import_node_assert.default.strictEqual(
73
+ newLines.slice(Math.max(0, i - 5), i + 5),
74
+ existingLines.slice(Math.max(0, i - 5), i + 5)
75
+ );
70
76
  }
71
- ]);
72
- });
73
- });
77
+ }
78
+ } catch (e) {
79
+ if (e instanceof import_node_assert.default.AssertionError) {
80
+ throw e;
81
+ }
82
+ throw new import_node_assert.default.AssertionError({
83
+ actual: output,
84
+ expected: "",
85
+ diff: "simple"
86
+ });
87
+ }
88
+ }
74
89
  void (0, import_node_test.describe)("markupChapter", () => {
75
90
  void (0, import_node_test.it)("can tag sentences", async (t) => {
76
91
  const input = import_epub.Epub.xhtmlParser.parse(
@@ -109,16 +124,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
109
124
  </html>
110
125
  `
111
126
  );
112
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
127
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
113
128
  import_epub.Epub.getXhtmlBody(input),
114
129
  {}
115
130
  );
116
131
  const { markedUp: output } = (0, import_markup.markupChapter)(
117
132
  "chapter_one",
118
133
  input,
119
- segmentation
134
+ segmentation,
135
+ mapping
120
136
  );
121
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
137
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
122
138
  });
123
139
  void (0, import_node_test.it)("can tag sentences with formatting marks", async (t) => {
124
140
  const input = import_epub.Epub.xhtmlParser.parse(
@@ -142,16 +158,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
142
158
  </html>
143
159
  `
144
160
  );
145
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
161
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
146
162
  import_epub.Epub.getXhtmlBody(input),
147
163
  {}
148
164
  );
149
165
  const { markedUp: output } = (0, import_markup.markupChapter)(
150
166
  "chapter_one",
151
167
  input,
152
- segmentation
168
+ segmentation,
169
+ mapping
153
170
  );
154
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
171
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
155
172
  });
156
173
  void (0, import_node_test.it)("can tag sentences with formatting marks that overlap sentence boundaries", async (t) => {
157
174
  const input = import_epub.Epub.xhtmlParser.parse(
@@ -175,16 +192,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
175
192
  </html>
176
193
  `
177
194
  );
178
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
195
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
179
196
  import_epub.Epub.getXhtmlBody(input),
180
197
  {}
181
198
  );
182
199
  const { markedUp: output } = (0, import_markup.markupChapter)(
183
200
  "chapter_one",
184
201
  input,
185
- segmentation
202
+ segmentation,
203
+ mapping
186
204
  );
187
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
205
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
188
206
  });
189
207
  void (0, import_node_test.it)("can tag sentences with nested formatting marks", async (t) => {
190
208
  const input = import_epub.Epub.xhtmlParser.parse(
@@ -208,16 +226,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
208
226
  </html>
209
227
  `
210
228
  );
211
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
229
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
212
230
  import_epub.Epub.getXhtmlBody(input),
213
231
  {}
214
232
  );
215
233
  const { markedUp: output } = (0, import_markup.markupChapter)(
216
234
  "chapter_one",
217
235
  input,
218
- segmentation
236
+ segmentation,
237
+ mapping
219
238
  );
220
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
239
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
221
240
  });
222
241
  void (0, import_node_test.it)("can tag sentences with atoms", async (t) => {
223
242
  const input = import_epub.Epub.xhtmlParser.parse(
@@ -241,16 +260,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
241
260
  </html>
242
261
  `
243
262
  );
244
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
263
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
245
264
  import_epub.Epub.getXhtmlBody(input),
246
265
  {}
247
266
  );
248
267
  const { markedUp: output } = (0, import_markup.markupChapter)(
249
268
  "chapter_one",
250
269
  input,
251
- segmentation
270
+ segmentation,
271
+ mapping
252
272
  );
253
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
273
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
254
274
  });
255
275
  void (0, import_node_test.it)("can tag sentences in nested textblocks", async (t) => {
256
276
  const input = import_epub.Epub.xhtmlParser.parse(
@@ -284,16 +304,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
284
304
  </html>
285
305
  `
286
306
  );
287
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
307
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
288
308
  import_epub.Epub.getXhtmlBody(input),
289
309
  {}
290
310
  );
291
311
  const { markedUp: output } = (0, import_markup.markupChapter)(
292
312
  "chapter_one",
293
313
  input,
294
- segmentation
314
+ segmentation,
315
+ mapping
295
316
  );
296
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
317
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
297
318
  });
298
319
  void (0, import_node_test.it)("can tag sentences that cross textblock boundaries", async (t) => {
299
320
  const input = import_epub.Epub.xhtmlParser.parse(
@@ -320,18 +341,19 @@ void (0, import_node_test.describe)("markupChapter", () => {
320
341
  </html>
321
342
  `
322
343
  );
323
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
344
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
324
345
  import_epub.Epub.getXhtmlBody(input),
325
346
  {}
326
347
  );
327
348
  const { markedUp: output } = (0, import_markup.markupChapter)(
328
349
  "chapter_one",
329
350
  input,
330
- segmentation
351
+ segmentation,
352
+ mapping
331
353
  );
332
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
354
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
333
355
  });
334
- void (0, import_node_test.it)("can handle soft page breaks", async (t) => {
356
+ void import_node_test.it.only("can handle soft page breaks", async (t) => {
335
357
  const input = import_epub.Epub.xhtmlParser.parse(
336
358
  /* xml */
337
359
  `
@@ -363,16 +385,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
363
385
  </body>
364
386
  </html>`
365
387
  );
366
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
388
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
367
389
  import_epub.Epub.getXhtmlBody(input),
368
390
  {}
369
391
  );
370
392
  const { markedUp: output } = (0, import_markup.markupChapter)(
371
393
  "chapter_one",
372
394
  input,
373
- segmentation
395
+ segmentation,
396
+ mapping
374
397
  );
375
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
398
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
376
399
  });
377
400
  void (0, import_node_test.it)("can handle boolean-like text values", async (t) => {
378
401
  const input = import_epub.Epub.xhtmlParser.parse(`
@@ -384,16 +407,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
384
407
  </body>
385
408
  </html>
386
409
  `);
387
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
410
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
388
411
  import_epub.Epub.getXhtmlBody(input),
389
412
  {}
390
413
  );
391
414
  const { markedUp: output } = (0, import_markup.markupChapter)(
392
415
  "chapter_one",
393
416
  input,
394
- segmentation
417
+ segmentation,
418
+ mapping
395
419
  );
396
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
420
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
397
421
  });
398
422
  void (0, import_node_test.it)("can handle number-like text values", async (t) => {
399
423
  const input = import_epub.Epub.xhtmlParser.parse(`
@@ -405,16 +429,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
405
429
  </body>
406
430
  </html>
407
431
  `);
408
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
432
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
409
433
  import_epub.Epub.getXhtmlBody(input),
410
434
  {}
411
435
  );
412
436
  const { markedUp: output } = (0, import_markup.markupChapter)(
413
437
  "chapter_one",
414
438
  input,
415
- segmentation
439
+ segmentation,
440
+ mapping
416
441
  );
417
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
442
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
418
443
  });
419
444
  void (0, import_node_test.it)("can handle null-like text values", async (t) => {
420
445
  const input = import_epub.Epub.xhtmlParser.parse(`
@@ -426,16 +451,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
426
451
  </body>
427
452
  </html>
428
453
  `);
429
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
454
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
430
455
  import_epub.Epub.getXhtmlBody(input),
431
456
  {}
432
457
  );
433
458
  const { markedUp: output } = (0, import_markup.markupChapter)(
434
459
  "chapter_one",
435
460
  input,
436
- segmentation
461
+ segmentation,
462
+ mapping
437
463
  );
438
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
464
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
439
465
  });
440
466
  void (0, import_node_test.it)("can preserve nbsp entities", async (t) => {
441
467
  const input = import_epub.Epub.xhtmlParser.parse(`
@@ -450,15 +476,16 @@ void (0, import_node_test.describe)("markupChapter", () => {
450
476
  </body>
451
477
  </html>
452
478
  `);
453
- const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
479
+ const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
454
480
  import_epub.Epub.getXhtmlBody(input),
455
481
  {}
456
482
  );
457
483
  const { markedUp: output } = (0, import_markup.markupChapter)(
458
484
  "chapter_one",
459
485
  input,
460
- segmentation
486
+ segmentation,
487
+ mapping
461
488
  );
462
- t.assert.snapshot(import_epub.Epub.xhtmlBuilder.build(output).split("\n"));
489
+ await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
463
490
  });
464
491
  });