@storyteller-platform/align 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/__tests__/align.test.cjs +6 -5
- package/dist/align/__tests__/align.test.js +6 -5
- package/dist/align/align.cjs +133 -81
- package/dist/align/align.d.cts +1 -0
- package/dist/align/align.d.ts +1 -0
- package/dist/align/align.js +133 -81
- package/dist/align/getSentenceRanges.cjs +78 -149
- package/dist/align/getSentenceRanges.d.cts +1 -1
- package/dist/align/getSentenceRanges.d.ts +1 -1
- package/dist/align/getSentenceRanges.js +78 -149
- package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
- package/dist/errorAlign/__tests__/native.test.cjs +118 -0
- package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/native.test.js +107 -0
- package/dist/errorAlign/backtraceGraph.cjs +298 -0
- package/dist/errorAlign/backtraceGraph.d.cts +103 -0
- package/dist/errorAlign/backtraceGraph.d.ts +103 -0
- package/dist/errorAlign/backtraceGraph.js +270 -0
- package/dist/errorAlign/beamSearch.cjs +302 -0
- package/dist/errorAlign/beamSearch.d.cts +53 -0
- package/dist/errorAlign/beamSearch.d.ts +53 -0
- package/dist/errorAlign/beamSearch.js +268 -0
- package/dist/errorAlign/core.cjs +33 -0
- package/dist/errorAlign/core.d.cts +5 -0
- package/dist/errorAlign/core.d.ts +5 -0
- package/dist/errorAlign/core.js +11 -0
- package/dist/errorAlign/editDistance.cjs +115 -0
- package/dist/errorAlign/editDistance.d.cts +46 -0
- package/dist/errorAlign/editDistance.d.ts +46 -0
- package/dist/errorAlign/editDistance.js +90 -0
- package/dist/errorAlign/errorAlign.cjs +159 -0
- package/dist/errorAlign/errorAlign.d.cts +15 -0
- package/dist/errorAlign/errorAlign.d.ts +15 -0
- package/dist/errorAlign/errorAlign.js +145 -0
- package/dist/errorAlign/graphMetadata.cjs +97 -0
- package/dist/errorAlign/graphMetadata.d.cts +44 -0
- package/dist/errorAlign/graphMetadata.d.ts +44 -0
- package/dist/errorAlign/graphMetadata.js +64 -0
- package/dist/errorAlign/hash.cjs +173 -0
- package/dist/errorAlign/hash.d.cts +28 -0
- package/dist/errorAlign/hash.d.ts +28 -0
- package/dist/errorAlign/hash.js +150 -0
- package/dist/errorAlign/native.cjs +60 -0
- package/dist/errorAlign/native.d.cts +18 -0
- package/dist/errorAlign/native.d.ts +18 -0
- package/dist/errorAlign/native.js +24 -0
- package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
- package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
- package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
- package/dist/errorAlign/node-gyp-build.d.js +0 -0
- package/dist/errorAlign/pathToAlignment.cjs +122 -0
- package/dist/errorAlign/pathToAlignment.d.cts +11 -0
- package/dist/errorAlign/pathToAlignment.d.ts +11 -0
- package/dist/errorAlign/pathToAlignment.js +89 -0
- package/dist/errorAlign/utils.cjs +301 -0
- package/dist/errorAlign/utils.d.cts +107 -0
- package/dist/errorAlign/utils.d.ts +107 -0
- package/dist/errorAlign/utils.js +248 -0
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/markup/__tests__/markup.test.cjs +108 -81
- package/dist/markup/__tests__/markup.test.js +109 -82
- package/dist/markup/__tests__/parseDom.test.cjs +112 -0
- package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
- package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
- package/dist/markup/__tests__/parseDom.test.js +89 -0
- package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
- package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
- package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
- package/dist/markup/__tests__/serializeDom.test.js +97 -0
- package/dist/markup/__tests__/transform.test.cjs +122 -0
- package/dist/markup/__tests__/transform.test.d.cts +2 -0
- package/dist/markup/__tests__/transform.test.d.ts +2 -0
- package/dist/markup/__tests__/transform.test.js +99 -0
- package/dist/markup/map.cjs +261 -0
- package/dist/markup/map.d.cts +50 -0
- package/dist/markup/map.d.ts +50 -0
- package/dist/markup/map.js +236 -0
- package/dist/markup/markup.cjs +23 -201
- package/dist/markup/markup.d.cts +5 -9
- package/dist/markup/markup.d.ts +5 -9
- package/dist/markup/markup.js +24 -203
- package/dist/markup/model.cjs +172 -0
- package/dist/markup/model.d.cts +57 -0
- package/dist/markup/model.d.ts +57 -0
- package/dist/markup/model.js +145 -0
- package/dist/markup/parseDom.cjs +59 -0
- package/dist/markup/parseDom.d.cts +7 -0
- package/dist/markup/parseDom.d.ts +7 -0
- package/dist/markup/parseDom.js +35 -0
- package/dist/markup/segmentation.cjs +11 -57
- package/dist/markup/segmentation.d.cts +6 -2
- package/dist/markup/segmentation.d.ts +6 -2
- package/dist/markup/segmentation.js +11 -58
- package/dist/markup/serializeDom.cjs +87 -0
- package/dist/markup/serializeDom.d.cts +7 -0
- package/dist/markup/serializeDom.d.ts +7 -0
- package/dist/markup/serializeDom.js +63 -0
- package/dist/markup/transform.cjs +92 -0
- package/dist/markup/transform.d.cts +11 -0
- package/dist/markup/transform.d.ts +11 -0
- package/dist/markup/transform.js +71 -0
- package/dist/types/node-gyp-build.d.cjs +1 -0
- package/dist/types/node-gyp-build.d.d.cts +3 -0
- package/dist/types/node-gyp-build.d.d.ts +3 -0
- package/dist/types/node-gyp-build.d.js +0 -0
- package/package.json +11 -4
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
import "../chunk-BIEQXUOY.js";
|
|
2
|
+
import assert from "node:assert";
|
|
3
|
+
import { chain, enumerate, every, find, map, range } from "itertools";
|
|
4
|
+
const OP_TYPES = [
|
|
5
|
+
"MATCH",
|
|
6
|
+
"INSERT",
|
|
7
|
+
"DELETE",
|
|
8
|
+
"SUBSTITUTE"
|
|
9
|
+
];
|
|
10
|
+
class Alignment {
|
|
11
|
+
constructor(opType, refSlice = null, hypSlice = null, ref = null, hyp = null, leftCompound = false, rightCompound = false) {
|
|
12
|
+
this.opType = opType;
|
|
13
|
+
this.refSlice = refSlice;
|
|
14
|
+
this.hypSlice = hypSlice;
|
|
15
|
+
this.ref = ref;
|
|
16
|
+
this.hyp = hyp;
|
|
17
|
+
this.leftCompound = leftCompound;
|
|
18
|
+
this.rightCompound = rightCompound;
|
|
19
|
+
switch (opType) {
|
|
20
|
+
case "MATCH": {
|
|
21
|
+
if (ref === null || hyp === null) {
|
|
22
|
+
throw new TypeError("MATCH operation must have non-empty ref or hyp.");
|
|
23
|
+
}
|
|
24
|
+
if (leftCompound || rightCompound) {
|
|
25
|
+
throw new TypeError("MATCH operation cannot have compound markers.");
|
|
26
|
+
}
|
|
27
|
+
break;
|
|
28
|
+
}
|
|
29
|
+
case "INSERT": {
|
|
30
|
+
if (hyp === null || ref !== null) {
|
|
31
|
+
throw new TypeError(
|
|
32
|
+
"INSERT operation must have non-empty hyp and empty ref."
|
|
33
|
+
);
|
|
34
|
+
}
|
|
35
|
+
break;
|
|
36
|
+
}
|
|
37
|
+
case "DELETE": {
|
|
38
|
+
if (hyp !== null || ref === null) {
|
|
39
|
+
throw new TypeError(
|
|
40
|
+
"DELETE operation must have non-empty ref and empty hyp."
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
case "SUBSTITUTE": {
|
|
46
|
+
if (ref === null || hyp === null) {
|
|
47
|
+
throw new TypeError(
|
|
48
|
+
"SUBSTITUTE operation must have both ref and hyp."
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
/** Return the hypothesis with compound markers if applicable. */
|
|
55
|
+
get hypWithCompoundMarkers() {
|
|
56
|
+
if (this.hyp === null) {
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
return `${this.leftCompound ? "-" : ""}"${this.hyp}"${this.rightCompound ? "-" : ""}`;
|
|
60
|
+
}
|
|
61
|
+
toString() {
|
|
62
|
+
switch (this.opType) {
|
|
63
|
+
case "DELETE": {
|
|
64
|
+
return `Alignment(${this.opType}: "${this.ref}")`;
|
|
65
|
+
}
|
|
66
|
+
case "INSERT": {
|
|
67
|
+
return `Alignment(${this.opType}: ${this.hypWithCompoundMarkers})`;
|
|
68
|
+
}
|
|
69
|
+
case "SUBSTITUTE": {
|
|
70
|
+
return `Alignment(${this.opType}: ${this.hypWithCompoundMarkers} -> "${this.ref}")`;
|
|
71
|
+
}
|
|
72
|
+
case "MATCH": {
|
|
73
|
+
return `Alignment(${this.opType}: "${this.hyp}" == "${this.ref}")`;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
function opTypePowerset() {
|
|
79
|
+
const opCombinations = map(
|
|
80
|
+
range(1, OP_TYPES.length + 1),
|
|
81
|
+
(r) => combinations(OP_TYPES, r)
|
|
82
|
+
);
|
|
83
|
+
return chain(...opCombinations);
|
|
84
|
+
}
|
|
85
|
+
function* combinations(iterable, r) {
|
|
86
|
+
const pool = Array.from(iterable);
|
|
87
|
+
const n = pool.length;
|
|
88
|
+
if (r > n) {
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
const indices = Array.from(range(r));
|
|
92
|
+
yield indices.map((i) => pool[i]);
|
|
93
|
+
while (true) {
|
|
94
|
+
let i;
|
|
95
|
+
find: {
|
|
96
|
+
for (i of reversed(range(r))) {
|
|
97
|
+
if (indices[i] !== i + n - r) {
|
|
98
|
+
break find;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
indices[i] += 1;
|
|
104
|
+
for (const j of range(i + 1, r)) {
|
|
105
|
+
indices[j] = indices[j - 1] + 1;
|
|
106
|
+
}
|
|
107
|
+
yield indices.map((i2) => pool[i2]);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
function reversed(iterable) {
|
|
111
|
+
return Array.from(iterable).toReversed();
|
|
112
|
+
}
|
|
113
|
+
const START_DELIMITER = "<";
|
|
114
|
+
const END_DELIMITER = ">";
|
|
115
|
+
const DELIMITERS = /* @__PURE__ */ new Set([START_DELIMITER, END_DELIMITER]);
|
|
116
|
+
const OP_TYPE_MAP = OP_TYPES.reduce(
|
|
117
|
+
(acc, opType) => ({ ...acc, [opType]: opType }),
|
|
118
|
+
{}
|
|
119
|
+
);
|
|
120
|
+
const OP_TYPE_COMBO_MAP = Array.from(enumerate(opTypePowerset())).reduce((acc, [i, opTypes]) => ({ ...acc, [i]: opTypes }), {});
|
|
121
|
+
function getOpTypeComboIndex(ops) {
|
|
122
|
+
return find(
|
|
123
|
+
enumerate(opTypePowerset()),
|
|
124
|
+
([_i, set]) => set.length === ops.length && every(range(set.length), (i) => set[i] === ops[i])
|
|
125
|
+
)[0];
|
|
126
|
+
}
|
|
127
|
+
const NUMERIC_TOKEN = "\\p{N}+([,.]\\p{N}+)*(?=\\s|$)";
|
|
128
|
+
const STANDARD_TOKEN = "[\\p{L}\\p{N}]+(['][\\p{L}\\p{N}]+)*'?";
|
|
129
|
+
function isVowel(c) {
|
|
130
|
+
assert(c.length === 1, "Input must be a single character");
|
|
131
|
+
return "aeiouy".includes(c);
|
|
132
|
+
}
|
|
133
|
+
function isConsonant(c) {
|
|
134
|
+
assert(c.length === 1, "Input must be a single character");
|
|
135
|
+
return "bcdfghjklmnpqrstvwxyz".includes(c);
|
|
136
|
+
}
|
|
137
|
+
function categorizeChar(c) {
|
|
138
|
+
if (DELIMITERS.has(c)) return 0;
|
|
139
|
+
if (isConsonant(c)) return 1;
|
|
140
|
+
if (isVowel(c)) return 2;
|
|
141
|
+
return 3;
|
|
142
|
+
}
|
|
143
|
+
function basicTokenizer(text) {
|
|
144
|
+
return Array.from(
|
|
145
|
+
text.matchAll(new RegExp(`(${NUMERIC_TOKEN}|${STANDARD_TOKEN})`, "udg"))
|
|
146
|
+
);
|
|
147
|
+
}
|
|
148
|
+
function basicNormalizer(text) {
|
|
149
|
+
return text.toLowerCase();
|
|
150
|
+
}
|
|
151
|
+
function ensureLengthPreservation(normalizer) {
|
|
152
|
+
return function wrapper(text, ...args) {
|
|
153
|
+
const normalized = normalizer(text, ...args);
|
|
154
|
+
if (normalized.length !== text.length) {
|
|
155
|
+
throw new RangeError("Normalizer must preserve length.");
|
|
156
|
+
}
|
|
157
|
+
return normalized;
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
function unpackRegexMatch(tokenizer) {
|
|
161
|
+
return function wrapper(text, ...args) {
|
|
162
|
+
const matches = tokenizer(text, ...args);
|
|
163
|
+
return matches.map((match) => [match[1], match.indices[1]]);
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
function translateSlice(segmentSlice, indexMap) {
|
|
167
|
+
const sliceIndices = indexMap.slice(...segmentSlice).filter((x) => x >= 0);
|
|
168
|
+
if (sliceIndices.length === 0) {
|
|
169
|
+
return null;
|
|
170
|
+
}
|
|
171
|
+
return [sliceIndices[0], sliceIndices.at(-1) + 1];
|
|
172
|
+
}
|
|
173
|
+
class Counter {
|
|
174
|
+
counts = /* @__PURE__ */ new Map();
|
|
175
|
+
constructor(init = []) {
|
|
176
|
+
if (init instanceof Map) {
|
|
177
|
+
this.counts = init;
|
|
178
|
+
return;
|
|
179
|
+
}
|
|
180
|
+
for (const element of init) {
|
|
181
|
+
this.counts.set(element, (this.counts.get(element) ?? 0) + 1);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
elements() {
|
|
185
|
+
return this.counts.entries().flatMap(([e, c]) => Array.from(range(c)).map(() => e));
|
|
186
|
+
}
|
|
187
|
+
mostCommon(n) {
|
|
188
|
+
const ordered = Array.from(this.counts.entries()).toSorted(
|
|
189
|
+
([_a, a], [_b, b]) => a - b
|
|
190
|
+
);
|
|
191
|
+
if (n === void 0) return ordered;
|
|
192
|
+
return ordered.slice(0, n);
|
|
193
|
+
}
|
|
194
|
+
total() {
|
|
195
|
+
return this.counts.values().reduce((acc, v) => acc + v);
|
|
196
|
+
}
|
|
197
|
+
subtract(update) {
|
|
198
|
+
if (update instanceof Map) {
|
|
199
|
+
for (const [element, count] of update.entries()) {
|
|
200
|
+
this.counts.set(element, (this.counts.get(element) ?? 0) - count);
|
|
201
|
+
}
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
204
|
+
for (const element of update) {
|
|
205
|
+
this.counts.set(element, (this.counts.get(element) ?? 0) - 1);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
update(update) {
|
|
209
|
+
if (update instanceof Map) {
|
|
210
|
+
for (const [element, count] of update.entries()) {
|
|
211
|
+
this.counts.set(element, (this.counts.get(element) ?? 0) + count);
|
|
212
|
+
}
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
for (const element of update) {
|
|
216
|
+
this.counts.set(element, (this.counts.get(element) ?? 0) + 1);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
get(element) {
|
|
220
|
+
return this.counts.get(element) ?? 0;
|
|
221
|
+
}
|
|
222
|
+
set(element, count) {
|
|
223
|
+
this.counts.set(element, count);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
export {
|
|
227
|
+
Alignment,
|
|
228
|
+
Counter,
|
|
229
|
+
DELIMITERS,
|
|
230
|
+
END_DELIMITER,
|
|
231
|
+
NUMERIC_TOKEN,
|
|
232
|
+
OP_TYPES,
|
|
233
|
+
OP_TYPE_COMBO_MAP,
|
|
234
|
+
OP_TYPE_MAP,
|
|
235
|
+
STANDARD_TOKEN,
|
|
236
|
+
START_DELIMITER,
|
|
237
|
+
basicNormalizer,
|
|
238
|
+
basicTokenizer,
|
|
239
|
+
categorizeChar,
|
|
240
|
+
ensureLengthPreservation,
|
|
241
|
+
getOpTypeComboIndex,
|
|
242
|
+
isConsonant,
|
|
243
|
+
isVowel,
|
|
244
|
+
opTypePowerset,
|
|
245
|
+
reversed,
|
|
246
|
+
translateSlice,
|
|
247
|
+
unpackRegexMatch
|
|
248
|
+
};
|
package/dist/index.d.cts
CHANGED
package/dist/index.d.ts
CHANGED
|
@@ -22,55 +22,70 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
22
22
|
mod
|
|
23
23
|
));
|
|
24
24
|
var import_node_assert = __toESM(require("node:assert"), 1);
|
|
25
|
+
var import_promises = require("node:fs/promises");
|
|
26
|
+
var import_node_path = require("node:path");
|
|
25
27
|
var import_node_test = require("node:test");
|
|
26
28
|
var import_epub = require("@storyteller-platform/epub");
|
|
27
29
|
var import_markup = require("../markup.cjs");
|
|
28
30
|
var import_segmentation = require("../segmentation.cjs");
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
31
|
+
function sanitizeFilename(title) {
|
|
32
|
+
return title.replace(/[/\\:*?"<>|]/g, "-").replace(/\s+/g, " ").trim().replace(/[.]+$/, "");
|
|
33
|
+
}
|
|
34
|
+
function truncate(input, byteLimit, suffix = "") {
|
|
35
|
+
const normalized = input.normalize("NFC");
|
|
36
|
+
const encoder = new TextEncoder();
|
|
37
|
+
let result = "";
|
|
38
|
+
for (const char of normalized) {
|
|
39
|
+
const withSuffix = result + char + suffix;
|
|
40
|
+
const byteLength = encoder.encode(withSuffix).length;
|
|
41
|
+
if (byteLength > byteLimit) break;
|
|
42
|
+
result += char;
|
|
43
|
+
}
|
|
44
|
+
return result + suffix;
|
|
45
|
+
}
|
|
46
|
+
function getSafeFilepathSegment(name, suffix = "") {
|
|
47
|
+
return truncate(sanitizeFilename(name), 150, suffix);
|
|
48
|
+
}
|
|
49
|
+
async function assertMarkupSnapshot(context, output) {
|
|
50
|
+
const snapshotFilename = getSafeFilepathSegment(context.fullName, ".snapshot");
|
|
51
|
+
const snapshotFilepath = (0, import_node_path.join)(
|
|
52
|
+
"src",
|
|
53
|
+
"markup",
|
|
54
|
+
"__snapshots__",
|
|
55
|
+
snapshotFilename
|
|
56
|
+
);
|
|
57
|
+
if (process.env["UPDATE_SNAPSHOTS"]) {
|
|
58
|
+
await (0, import_promises.mkdir)((0, import_node_path.dirname)(snapshotFilepath), { recursive: true });
|
|
59
|
+
await (0, import_promises.writeFile)(snapshotFilepath, output, { encoding: "utf-8" });
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
try {
|
|
63
|
+
const existingSnapshot = await (0, import_promises.readFile)(snapshotFilepath, {
|
|
64
|
+
encoding: "utf-8"
|
|
65
|
+
});
|
|
66
|
+
const existingLines = existingSnapshot.split("\n");
|
|
67
|
+
const newLines = output.split("\n");
|
|
68
|
+
for (let i = 0; i < existingLines.length; i++) {
|
|
69
|
+
const existingLine = existingLines[i];
|
|
70
|
+
const newLine = newLines[i];
|
|
71
|
+
if (existingLine !== newLine) {
|
|
72
|
+
import_node_assert.default.strictEqual(
|
|
73
|
+
newLines.slice(Math.max(0, i - 5), i + 5),
|
|
74
|
+
existingLines.slice(Math.max(0, i - 5), i + 5)
|
|
75
|
+
);
|
|
70
76
|
}
|
|
71
|
-
|
|
72
|
-
})
|
|
73
|
-
|
|
77
|
+
}
|
|
78
|
+
} catch (e) {
|
|
79
|
+
if (e instanceof import_node_assert.default.AssertionError) {
|
|
80
|
+
throw e;
|
|
81
|
+
}
|
|
82
|
+
throw new import_node_assert.default.AssertionError({
|
|
83
|
+
actual: output,
|
|
84
|
+
expected: "",
|
|
85
|
+
diff: "simple"
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
}
|
|
74
89
|
void (0, import_node_test.describe)("markupChapter", () => {
|
|
75
90
|
void (0, import_node_test.it)("can tag sentences", async (t) => {
|
|
76
91
|
const input = import_epub.Epub.xhtmlParser.parse(
|
|
@@ -109,16 +124,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
109
124
|
</html>
|
|
110
125
|
`
|
|
111
126
|
);
|
|
112
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
127
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
113
128
|
import_epub.Epub.getXhtmlBody(input),
|
|
114
129
|
{}
|
|
115
130
|
);
|
|
116
131
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
117
132
|
"chapter_one",
|
|
118
133
|
input,
|
|
119
|
-
segmentation
|
|
134
|
+
segmentation,
|
|
135
|
+
mapping
|
|
120
136
|
);
|
|
121
|
-
t
|
|
137
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
122
138
|
});
|
|
123
139
|
void (0, import_node_test.it)("can tag sentences with formatting marks", async (t) => {
|
|
124
140
|
const input = import_epub.Epub.xhtmlParser.parse(
|
|
@@ -142,16 +158,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
142
158
|
</html>
|
|
143
159
|
`
|
|
144
160
|
);
|
|
145
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
161
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
146
162
|
import_epub.Epub.getXhtmlBody(input),
|
|
147
163
|
{}
|
|
148
164
|
);
|
|
149
165
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
150
166
|
"chapter_one",
|
|
151
167
|
input,
|
|
152
|
-
segmentation
|
|
168
|
+
segmentation,
|
|
169
|
+
mapping
|
|
153
170
|
);
|
|
154
|
-
t
|
|
171
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
155
172
|
});
|
|
156
173
|
void (0, import_node_test.it)("can tag sentences with formatting marks that overlap sentence boundaries", async (t) => {
|
|
157
174
|
const input = import_epub.Epub.xhtmlParser.parse(
|
|
@@ -175,16 +192,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
175
192
|
</html>
|
|
176
193
|
`
|
|
177
194
|
);
|
|
178
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
195
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
179
196
|
import_epub.Epub.getXhtmlBody(input),
|
|
180
197
|
{}
|
|
181
198
|
);
|
|
182
199
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
183
200
|
"chapter_one",
|
|
184
201
|
input,
|
|
185
|
-
segmentation
|
|
202
|
+
segmentation,
|
|
203
|
+
mapping
|
|
186
204
|
);
|
|
187
|
-
t
|
|
205
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
188
206
|
});
|
|
189
207
|
void (0, import_node_test.it)("can tag sentences with nested formatting marks", async (t) => {
|
|
190
208
|
const input = import_epub.Epub.xhtmlParser.parse(
|
|
@@ -208,16 +226,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
208
226
|
</html>
|
|
209
227
|
`
|
|
210
228
|
);
|
|
211
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
229
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
212
230
|
import_epub.Epub.getXhtmlBody(input),
|
|
213
231
|
{}
|
|
214
232
|
);
|
|
215
233
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
216
234
|
"chapter_one",
|
|
217
235
|
input,
|
|
218
|
-
segmentation
|
|
236
|
+
segmentation,
|
|
237
|
+
mapping
|
|
219
238
|
);
|
|
220
|
-
t
|
|
239
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
221
240
|
});
|
|
222
241
|
void (0, import_node_test.it)("can tag sentences with atoms", async (t) => {
|
|
223
242
|
const input = import_epub.Epub.xhtmlParser.parse(
|
|
@@ -241,16 +260,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
241
260
|
</html>
|
|
242
261
|
`
|
|
243
262
|
);
|
|
244
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
263
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
245
264
|
import_epub.Epub.getXhtmlBody(input),
|
|
246
265
|
{}
|
|
247
266
|
);
|
|
248
267
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
249
268
|
"chapter_one",
|
|
250
269
|
input,
|
|
251
|
-
segmentation
|
|
270
|
+
segmentation,
|
|
271
|
+
mapping
|
|
252
272
|
);
|
|
253
|
-
t
|
|
273
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
254
274
|
});
|
|
255
275
|
void (0, import_node_test.it)("can tag sentences in nested textblocks", async (t) => {
|
|
256
276
|
const input = import_epub.Epub.xhtmlParser.parse(
|
|
@@ -284,16 +304,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
284
304
|
</html>
|
|
285
305
|
`
|
|
286
306
|
);
|
|
287
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
307
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
288
308
|
import_epub.Epub.getXhtmlBody(input),
|
|
289
309
|
{}
|
|
290
310
|
);
|
|
291
311
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
292
312
|
"chapter_one",
|
|
293
313
|
input,
|
|
294
|
-
segmentation
|
|
314
|
+
segmentation,
|
|
315
|
+
mapping
|
|
295
316
|
);
|
|
296
|
-
t
|
|
317
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
297
318
|
});
|
|
298
319
|
void (0, import_node_test.it)("can tag sentences that cross textblock boundaries", async (t) => {
|
|
299
320
|
const input = import_epub.Epub.xhtmlParser.parse(
|
|
@@ -320,18 +341,19 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
320
341
|
</html>
|
|
321
342
|
`
|
|
322
343
|
);
|
|
323
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
344
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
324
345
|
import_epub.Epub.getXhtmlBody(input),
|
|
325
346
|
{}
|
|
326
347
|
);
|
|
327
348
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
328
349
|
"chapter_one",
|
|
329
350
|
input,
|
|
330
|
-
segmentation
|
|
351
|
+
segmentation,
|
|
352
|
+
mapping
|
|
331
353
|
);
|
|
332
|
-
t
|
|
354
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
333
355
|
});
|
|
334
|
-
void
|
|
356
|
+
void import_node_test.it.only("can handle soft page breaks", async (t) => {
|
|
335
357
|
const input = import_epub.Epub.xhtmlParser.parse(
|
|
336
358
|
/* xml */
|
|
337
359
|
`
|
|
@@ -363,16 +385,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
363
385
|
</body>
|
|
364
386
|
</html>`
|
|
365
387
|
);
|
|
366
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
388
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
367
389
|
import_epub.Epub.getXhtmlBody(input),
|
|
368
390
|
{}
|
|
369
391
|
);
|
|
370
392
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
371
393
|
"chapter_one",
|
|
372
394
|
input,
|
|
373
|
-
segmentation
|
|
395
|
+
segmentation,
|
|
396
|
+
mapping
|
|
374
397
|
);
|
|
375
|
-
t
|
|
398
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
376
399
|
});
|
|
377
400
|
void (0, import_node_test.it)("can handle boolean-like text values", async (t) => {
|
|
378
401
|
const input = import_epub.Epub.xhtmlParser.parse(`
|
|
@@ -384,16 +407,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
384
407
|
</body>
|
|
385
408
|
</html>
|
|
386
409
|
`);
|
|
387
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
410
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
388
411
|
import_epub.Epub.getXhtmlBody(input),
|
|
389
412
|
{}
|
|
390
413
|
);
|
|
391
414
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
392
415
|
"chapter_one",
|
|
393
416
|
input,
|
|
394
|
-
segmentation
|
|
417
|
+
segmentation,
|
|
418
|
+
mapping
|
|
395
419
|
);
|
|
396
|
-
t
|
|
420
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
397
421
|
});
|
|
398
422
|
void (0, import_node_test.it)("can handle number-like text values", async (t) => {
|
|
399
423
|
const input = import_epub.Epub.xhtmlParser.parse(`
|
|
@@ -405,16 +429,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
405
429
|
</body>
|
|
406
430
|
</html>
|
|
407
431
|
`);
|
|
408
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
432
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
409
433
|
import_epub.Epub.getXhtmlBody(input),
|
|
410
434
|
{}
|
|
411
435
|
);
|
|
412
436
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
413
437
|
"chapter_one",
|
|
414
438
|
input,
|
|
415
|
-
segmentation
|
|
439
|
+
segmentation,
|
|
440
|
+
mapping
|
|
416
441
|
);
|
|
417
|
-
t
|
|
442
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
418
443
|
});
|
|
419
444
|
void (0, import_node_test.it)("can handle null-like text values", async (t) => {
|
|
420
445
|
const input = import_epub.Epub.xhtmlParser.parse(`
|
|
@@ -426,16 +451,17 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
426
451
|
</body>
|
|
427
452
|
</html>
|
|
428
453
|
`);
|
|
429
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
454
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
430
455
|
import_epub.Epub.getXhtmlBody(input),
|
|
431
456
|
{}
|
|
432
457
|
);
|
|
433
458
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
434
459
|
"chapter_one",
|
|
435
460
|
input,
|
|
436
|
-
segmentation
|
|
461
|
+
segmentation,
|
|
462
|
+
mapping
|
|
437
463
|
);
|
|
438
|
-
t
|
|
464
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
439
465
|
});
|
|
440
466
|
void (0, import_node_test.it)("can preserve nbsp entities", async (t) => {
|
|
441
467
|
const input = import_epub.Epub.xhtmlParser.parse(`
|
|
@@ -450,15 +476,16 @@ void (0, import_node_test.describe)("markupChapter", () => {
|
|
|
450
476
|
</body>
|
|
451
477
|
</html>
|
|
452
478
|
`);
|
|
453
|
-
const segmentation = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
479
|
+
const { result: segmentation, mapping } = await (0, import_segmentation.getXhtmlSegmentation)(
|
|
454
480
|
import_epub.Epub.getXhtmlBody(input),
|
|
455
481
|
{}
|
|
456
482
|
);
|
|
457
483
|
const { markedUp: output } = (0, import_markup.markupChapter)(
|
|
458
484
|
"chapter_one",
|
|
459
485
|
input,
|
|
460
|
-
segmentation
|
|
486
|
+
segmentation,
|
|
487
|
+
mapping
|
|
461
488
|
);
|
|
462
|
-
t
|
|
489
|
+
await assertMarkupSnapshot(t, import_epub.Epub.xhtmlBuilder.build(output));
|
|
463
490
|
});
|
|
464
491
|
});
|