@storyteller-platform/align 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/dist/align/__tests__/align.test.cjs +6 -5
  2. package/dist/align/__tests__/align.test.js +6 -5
  3. package/dist/align/align.cjs +133 -81
  4. package/dist/align/align.d.cts +1 -0
  5. package/dist/align/align.d.ts +1 -0
  6. package/dist/align/align.js +133 -81
  7. package/dist/align/getSentenceRanges.cjs +78 -149
  8. package/dist/align/getSentenceRanges.d.cts +1 -1
  9. package/dist/align/getSentenceRanges.d.ts +1 -1
  10. package/dist/align/getSentenceRanges.js +78 -149
  11. package/dist/align/slugify.cjs +16 -8
  12. package/dist/align/slugify.js +16 -8
  13. package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
  14. package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
  15. package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
  16. package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
  17. package/dist/errorAlign/__tests__/native.test.cjs +118 -0
  18. package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
  19. package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
  20. package/dist/errorAlign/__tests__/native.test.js +107 -0
  21. package/dist/errorAlign/backtraceGraph.cjs +298 -0
  22. package/dist/errorAlign/backtraceGraph.d.cts +103 -0
  23. package/dist/errorAlign/backtraceGraph.d.ts +103 -0
  24. package/dist/errorAlign/backtraceGraph.js +270 -0
  25. package/dist/errorAlign/beamSearch.cjs +302 -0
  26. package/dist/errorAlign/beamSearch.d.cts +53 -0
  27. package/dist/errorAlign/beamSearch.d.ts +53 -0
  28. package/dist/errorAlign/beamSearch.js +268 -0
  29. package/dist/errorAlign/core.cjs +33 -0
  30. package/dist/errorAlign/core.d.cts +5 -0
  31. package/dist/errorAlign/core.d.ts +5 -0
  32. package/dist/errorAlign/core.js +11 -0
  33. package/dist/errorAlign/editDistance.cjs +115 -0
  34. package/dist/errorAlign/editDistance.d.cts +46 -0
  35. package/dist/errorAlign/editDistance.d.ts +46 -0
  36. package/dist/errorAlign/editDistance.js +90 -0
  37. package/dist/errorAlign/errorAlign.cjs +159 -0
  38. package/dist/errorAlign/errorAlign.d.cts +15 -0
  39. package/dist/errorAlign/errorAlign.d.ts +15 -0
  40. package/dist/errorAlign/errorAlign.js +145 -0
  41. package/dist/errorAlign/graphMetadata.cjs +97 -0
  42. package/dist/errorAlign/graphMetadata.d.cts +44 -0
  43. package/dist/errorAlign/graphMetadata.d.ts +44 -0
  44. package/dist/errorAlign/graphMetadata.js +64 -0
  45. package/dist/errorAlign/hash.cjs +173 -0
  46. package/dist/errorAlign/hash.d.cts +28 -0
  47. package/dist/errorAlign/hash.d.ts +28 -0
  48. package/dist/errorAlign/hash.js +150 -0
  49. package/dist/errorAlign/native.cjs +60 -0
  50. package/dist/errorAlign/native.d.cts +18 -0
  51. package/dist/errorAlign/native.d.ts +18 -0
  52. package/dist/errorAlign/native.js +24 -0
  53. package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
  54. package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
  55. package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
  56. package/dist/errorAlign/node-gyp-build.d.js +0 -0
  57. package/dist/errorAlign/pathToAlignment.cjs +122 -0
  58. package/dist/errorAlign/pathToAlignment.d.cts +11 -0
  59. package/dist/errorAlign/pathToAlignment.d.ts +11 -0
  60. package/dist/errorAlign/pathToAlignment.js +89 -0
  61. package/dist/errorAlign/utils.cjs +301 -0
  62. package/dist/errorAlign/utils.d.cts +107 -0
  63. package/dist/errorAlign/utils.d.ts +107 -0
  64. package/dist/errorAlign/utils.js +248 -0
  65. package/dist/index.d.cts +1 -0
  66. package/dist/index.d.ts +1 -0
  67. package/dist/markup/__tests__/markup.test.cjs +108 -81
  68. package/dist/markup/__tests__/markup.test.js +109 -82
  69. package/dist/markup/__tests__/parseDom.test.cjs +112 -0
  70. package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
  71. package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
  72. package/dist/markup/__tests__/parseDom.test.js +89 -0
  73. package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
  74. package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
  75. package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
  76. package/dist/markup/__tests__/serializeDom.test.js +97 -0
  77. package/dist/markup/__tests__/transform.test.cjs +122 -0
  78. package/dist/markup/__tests__/transform.test.d.cts +2 -0
  79. package/dist/markup/__tests__/transform.test.d.ts +2 -0
  80. package/dist/markup/__tests__/transform.test.js +99 -0
  81. package/dist/markup/map.cjs +261 -0
  82. package/dist/markup/map.d.cts +50 -0
  83. package/dist/markup/map.d.ts +50 -0
  84. package/dist/markup/map.js +236 -0
  85. package/dist/markup/markup.cjs +23 -201
  86. package/dist/markup/markup.d.cts +5 -9
  87. package/dist/markup/markup.d.ts +5 -9
  88. package/dist/markup/markup.js +24 -203
  89. package/dist/markup/model.cjs +172 -0
  90. package/dist/markup/model.d.cts +57 -0
  91. package/dist/markup/model.d.ts +57 -0
  92. package/dist/markup/model.js +145 -0
  93. package/dist/markup/parseDom.cjs +59 -0
  94. package/dist/markup/parseDom.d.cts +7 -0
  95. package/dist/markup/parseDom.d.ts +7 -0
  96. package/dist/markup/parseDom.js +35 -0
  97. package/dist/markup/segmentation.cjs +11 -57
  98. package/dist/markup/segmentation.d.cts +6 -2
  99. package/dist/markup/segmentation.d.ts +6 -2
  100. package/dist/markup/segmentation.js +11 -58
  101. package/dist/markup/serializeDom.cjs +87 -0
  102. package/dist/markup/serializeDom.d.cts +7 -0
  103. package/dist/markup/serializeDom.d.ts +7 -0
  104. package/dist/markup/serializeDom.js +63 -0
  105. package/dist/markup/transform.cjs +92 -0
  106. package/dist/markup/transform.d.cts +11 -0
  107. package/dist/markup/transform.d.ts +11 -0
  108. package/dist/markup/transform.js +71 -0
  109. package/dist/types/node-gyp-build.d.cjs +1 -0
  110. package/dist/types/node-gyp-build.d.d.cts +3 -0
  111. package/dist/types/node-gyp-build.d.d.ts +3 -0
  112. package/dist/types/node-gyp-build.d.js +0 -0
  113. package/package.json +11 -4
@@ -0,0 +1,89 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ import assert from "node:assert";
3
+ import { Alignment, translateSlice } from "./utils.js";
4
+ function getDeleteAlignment(startRefIndex, endRefIndex, subgraphMetadata) {
5
+ const refSlice = translateSlice(
6
+ [startRefIndex, endRefIndex],
7
+ subgraphMetadata.refIndexMap
8
+ );
9
+ assert(!!refSlice);
10
+ return new Alignment(
11
+ "DELETE",
12
+ refSlice,
13
+ null,
14
+ subgraphMetadata.refRaw.slice(...refSlice)
15
+ );
16
+ }
17
+ function getInsertAlignment(startHypIndex, endHypIndex, subgraphMetadata) {
18
+ const hypSlice = translateSlice(
19
+ [startHypIndex, endHypIndex],
20
+ subgraphMetadata.hypIndexMap
21
+ );
22
+ assert(!!hypSlice);
23
+ return new Alignment(
24
+ "INSERT",
25
+ null,
26
+ hypSlice,
27
+ null,
28
+ subgraphMetadata.hypRaw.slice(...hypSlice),
29
+ subgraphMetadata.hypIndexMap[startHypIndex] >= 0,
30
+ subgraphMetadata.hypIndexMap[endHypIndex - 1] >= 0
31
+ );
32
+ }
33
+ function getMatchOrSubstitutionAlignment(startHypIndex, endHypIndex, startRefIndex, endRefIndex, score, subgraphMetadata) {
34
+ const hypSlice = translateSlice(
35
+ [startHypIndex, endHypIndex],
36
+ subgraphMetadata.hypIndexMap
37
+ );
38
+ const refSlice = translateSlice(
39
+ [startRefIndex, endRefIndex],
40
+ subgraphMetadata.refIndexMap
41
+ );
42
+ assert(!!hypSlice);
43
+ assert(!!refSlice);
44
+ const isMatchSegment = score === 0;
45
+ const opType = isMatchSegment ? "MATCH" : "SUBSTITUTE";
46
+ return new Alignment(
47
+ opType,
48
+ refSlice,
49
+ hypSlice,
50
+ subgraphMetadata.refRaw.slice(...refSlice),
51
+ subgraphMetadata.hypRaw.slice(...hypSlice),
52
+ subgraphMetadata.hypIndexMap[startHypIndex] >= 0,
53
+ subgraphMetadata.hypIndexMap[endHypIndex - 1] >= 0
54
+ );
55
+ }
56
+ function getAlignments(path) {
57
+ const subgraphMetadata = path.src;
58
+ const segmentationIndices = path.endIndices;
59
+ const alignments = [];
60
+ let startHyp = 0;
61
+ let startRef = 0;
62
+ for (let [endHyp, endRef, score] of segmentationIndices) {
63
+ endHyp += 1;
64
+ endRef += 1;
65
+ if (startHyp === endHyp) {
66
+ const alignment = getDeleteAlignment(startRef, endRef, subgraphMetadata);
67
+ alignments.push(alignment);
68
+ } else if (startRef === endRef) {
69
+ const alignment = getInsertAlignment(startHyp, endHyp, subgraphMetadata);
70
+ alignments.push(alignment);
71
+ } else {
72
+ const alignment = getMatchOrSubstitutionAlignment(
73
+ startHyp,
74
+ endHyp,
75
+ startRef,
76
+ endRef,
77
+ score,
78
+ subgraphMetadata
79
+ );
80
+ alignments.push(alignment);
81
+ }
82
+ startHyp = endHyp;
83
+ startRef = endRef;
84
+ }
85
+ return alignments;
86
+ }
87
+ export {
88
+ getAlignments
89
+ };
@@ -0,0 +1,301 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+ var utils_exports = {};
30
+ __export(utils_exports, {
31
+ Alignment: () => Alignment,
32
+ Counter: () => Counter,
33
+ DELIMITERS: () => DELIMITERS,
34
+ END_DELIMITER: () => END_DELIMITER,
35
+ NUMERIC_TOKEN: () => NUMERIC_TOKEN,
36
+ OP_TYPES: () => OP_TYPES,
37
+ OP_TYPE_COMBO_MAP: () => OP_TYPE_COMBO_MAP,
38
+ OP_TYPE_MAP: () => OP_TYPE_MAP,
39
+ STANDARD_TOKEN: () => STANDARD_TOKEN,
40
+ START_DELIMITER: () => START_DELIMITER,
41
+ basicNormalizer: () => basicNormalizer,
42
+ basicTokenizer: () => basicTokenizer,
43
+ categorizeChar: () => categorizeChar,
44
+ ensureLengthPreservation: () => ensureLengthPreservation,
45
+ getOpTypeComboIndex: () => getOpTypeComboIndex,
46
+ isConsonant: () => isConsonant,
47
+ isVowel: () => isVowel,
48
+ opTypePowerset: () => opTypePowerset,
49
+ reversed: () => reversed,
50
+ translateSlice: () => translateSlice,
51
+ unpackRegexMatch: () => unpackRegexMatch
52
+ });
53
+ module.exports = __toCommonJS(utils_exports);
54
+ var import_node_assert = __toESM(require("node:assert"), 1);
55
+ var import_itertools = require("itertools");
56
+ const OP_TYPES = [
57
+ "MATCH",
58
+ "INSERT",
59
+ "DELETE",
60
+ "SUBSTITUTE"
61
+ ];
62
+ class Alignment {
63
+ constructor(opType, refSlice = null, hypSlice = null, ref = null, hyp = null, leftCompound = false, rightCompound = false) {
64
+ this.opType = opType;
65
+ this.refSlice = refSlice;
66
+ this.hypSlice = hypSlice;
67
+ this.ref = ref;
68
+ this.hyp = hyp;
69
+ this.leftCompound = leftCompound;
70
+ this.rightCompound = rightCompound;
71
+ switch (opType) {
72
+ case "MATCH": {
73
+ if (ref === null || hyp === null) {
74
+ throw new TypeError("MATCH operation must have non-empty ref or hyp.");
75
+ }
76
+ if (leftCompound || rightCompound) {
77
+ throw new TypeError("MATCH operation cannot have compound markers.");
78
+ }
79
+ break;
80
+ }
81
+ case "INSERT": {
82
+ if (hyp === null || ref !== null) {
83
+ throw new TypeError(
84
+ "INSERT operation must have non-empty hyp and empty ref."
85
+ );
86
+ }
87
+ break;
88
+ }
89
+ case "DELETE": {
90
+ if (hyp !== null || ref === null) {
91
+ throw new TypeError(
92
+ "DELETE operation must have non-empty ref and empty hyp."
93
+ );
94
+ }
95
+ break;
96
+ }
97
+ case "SUBSTITUTE": {
98
+ if (ref === null || hyp === null) {
99
+ throw new TypeError(
100
+ "SUBSTITUTE operation must have both ref and hyp."
101
+ );
102
+ }
103
+ }
104
+ }
105
+ }
106
+ /** Return the hypothesis with compound markers if applicable. */
107
+ get hypWithCompoundMarkers() {
108
+ if (this.hyp === null) {
109
+ return null;
110
+ }
111
+ return `${this.leftCompound ? "-" : ""}"${this.hyp}"${this.rightCompound ? "-" : ""}`;
112
+ }
113
+ toString() {
114
+ switch (this.opType) {
115
+ case "DELETE": {
116
+ return `Alignment(${this.opType}: "${this.ref}")`;
117
+ }
118
+ case "INSERT": {
119
+ return `Alignment(${this.opType}: ${this.hypWithCompoundMarkers})`;
120
+ }
121
+ case "SUBSTITUTE": {
122
+ return `Alignment(${this.opType}: ${this.hypWithCompoundMarkers} -> "${this.ref}")`;
123
+ }
124
+ case "MATCH": {
125
+ return `Alignment(${this.opType}: "${this.hyp}" == "${this.ref}")`;
126
+ }
127
+ }
128
+ }
129
+ }
130
+ function opTypePowerset() {
131
+ const opCombinations = (0, import_itertools.map)(
132
+ (0, import_itertools.range)(1, OP_TYPES.length + 1),
133
+ (r) => combinations(OP_TYPES, r)
134
+ );
135
+ return (0, import_itertools.chain)(...opCombinations);
136
+ }
137
+ function* combinations(iterable, r) {
138
+ const pool = Array.from(iterable);
139
+ const n = pool.length;
140
+ if (r > n) {
141
+ return;
142
+ }
143
+ const indices = Array.from((0, import_itertools.range)(r));
144
+ yield indices.map((i) => pool[i]);
145
+ while (true) {
146
+ let i;
147
+ find: {
148
+ for (i of reversed((0, import_itertools.range)(r))) {
149
+ if (indices[i] !== i + n - r) {
150
+ break find;
151
+ }
152
+ }
153
+ return;
154
+ }
155
+ indices[i] += 1;
156
+ for (const j of (0, import_itertools.range)(i + 1, r)) {
157
+ indices[j] = indices[j - 1] + 1;
158
+ }
159
+ yield indices.map((i2) => pool[i2]);
160
+ }
161
+ }
162
+ function reversed(iterable) {
163
+ return Array.from(iterable).toReversed();
164
+ }
165
+ const START_DELIMITER = "<";
166
+ const END_DELIMITER = ">";
167
+ const DELIMITERS = /* @__PURE__ */ new Set([START_DELIMITER, END_DELIMITER]);
168
+ const OP_TYPE_MAP = OP_TYPES.reduce(
169
+ (acc, opType) => ({ ...acc, [opType]: opType }),
170
+ {}
171
+ );
172
+ const OP_TYPE_COMBO_MAP = Array.from((0, import_itertools.enumerate)(opTypePowerset())).reduce((acc, [i, opTypes]) => ({ ...acc, [i]: opTypes }), {});
173
+ function getOpTypeComboIndex(ops) {
174
+ return (0, import_itertools.find)(
175
+ (0, import_itertools.enumerate)(opTypePowerset()),
176
+ ([_i, set]) => set.length === ops.length && (0, import_itertools.every)((0, import_itertools.range)(set.length), (i) => set[i] === ops[i])
177
+ )[0];
178
+ }
179
+ const NUMERIC_TOKEN = "\\p{N}+([,.]\\p{N}+)*(?=\\s|$)";
180
+ const STANDARD_TOKEN = "[\\p{L}\\p{N}]+(['][\\p{L}\\p{N}]+)*'?";
181
+ function isVowel(c) {
182
+ (0, import_node_assert.default)(c.length === 1, "Input must be a single character");
183
+ return "aeiouy".includes(c);
184
+ }
185
+ function isConsonant(c) {
186
+ (0, import_node_assert.default)(c.length === 1, "Input must be a single character");
187
+ return "bcdfghjklmnpqrstvwxyz".includes(c);
188
+ }
189
+ function categorizeChar(c) {
190
+ if (DELIMITERS.has(c)) return 0;
191
+ if (isConsonant(c)) return 1;
192
+ if (isVowel(c)) return 2;
193
+ return 3;
194
+ }
195
+ function basicTokenizer(text) {
196
+ return Array.from(
197
+ text.matchAll(new RegExp(`(${NUMERIC_TOKEN}|${STANDARD_TOKEN})`, "udg"))
198
+ );
199
+ }
200
+ function basicNormalizer(text) {
201
+ return text.toLowerCase();
202
+ }
203
+ function ensureLengthPreservation(normalizer) {
204
+ return function wrapper(text, ...args) {
205
+ const normalized = normalizer(text, ...args);
206
+ if (normalized.length !== text.length) {
207
+ throw new RangeError("Normalizer must preserve length.");
208
+ }
209
+ return normalized;
210
+ };
211
+ }
212
+ function unpackRegexMatch(tokenizer) {
213
+ return function wrapper(text, ...args) {
214
+ const matches = tokenizer(text, ...args);
215
+ return matches.map((match) => [match[1], match.indices[1]]);
216
+ };
217
+ }
218
+ function translateSlice(segmentSlice, indexMap) {
219
+ const sliceIndices = indexMap.slice(...segmentSlice).filter((x) => x >= 0);
220
+ if (sliceIndices.length === 0) {
221
+ return null;
222
+ }
223
+ return [sliceIndices[0], sliceIndices.at(-1) + 1];
224
+ }
225
+ class Counter {
226
+ counts = /* @__PURE__ */ new Map();
227
+ constructor(init = []) {
228
+ if (init instanceof Map) {
229
+ this.counts = init;
230
+ return;
231
+ }
232
+ for (const element of init) {
233
+ this.counts.set(element, (this.counts.get(element) ?? 0) + 1);
234
+ }
235
+ }
236
+ elements() {
237
+ return this.counts.entries().flatMap(([e, c]) => Array.from((0, import_itertools.range)(c)).map(() => e));
238
+ }
239
+ mostCommon(n) {
240
+ const ordered = Array.from(this.counts.entries()).toSorted(
241
+ ([_a, a], [_b, b]) => a - b
242
+ );
243
+ if (n === void 0) return ordered;
244
+ return ordered.slice(0, n);
245
+ }
246
+ total() {
247
+ return this.counts.values().reduce((acc, v) => acc + v);
248
+ }
249
+ subtract(update) {
250
+ if (update instanceof Map) {
251
+ for (const [element, count] of update.entries()) {
252
+ this.counts.set(element, (this.counts.get(element) ?? 0) - count);
253
+ }
254
+ return;
255
+ }
256
+ for (const element of update) {
257
+ this.counts.set(element, (this.counts.get(element) ?? 0) - 1);
258
+ }
259
+ }
260
+ update(update) {
261
+ if (update instanceof Map) {
262
+ for (const [element, count] of update.entries()) {
263
+ this.counts.set(element, (this.counts.get(element) ?? 0) + count);
264
+ }
265
+ return;
266
+ }
267
+ for (const element of update) {
268
+ this.counts.set(element, (this.counts.get(element) ?? 0) + 1);
269
+ }
270
+ }
271
+ get(element) {
272
+ return this.counts.get(element) ?? 0;
273
+ }
274
+ set(element, count) {
275
+ this.counts.set(element, count);
276
+ }
277
+ }
278
+ // Annotate the CommonJS export names for ESM import in node:
279
+ 0 && (module.exports = {
280
+ Alignment,
281
+ Counter,
282
+ DELIMITERS,
283
+ END_DELIMITER,
284
+ NUMERIC_TOKEN,
285
+ OP_TYPES,
286
+ OP_TYPE_COMBO_MAP,
287
+ OP_TYPE_MAP,
288
+ STANDARD_TOKEN,
289
+ START_DELIMITER,
290
+ basicNormalizer,
291
+ basicTokenizer,
292
+ categorizeChar,
293
+ ensureLengthPreservation,
294
+ getOpTypeComboIndex,
295
+ isConsonant,
296
+ isVowel,
297
+ opTypePowerset,
298
+ reversed,
299
+ translateSlice,
300
+ unpackRegexMatch
301
+ });
@@ -0,0 +1,107 @@
1
+ type OpType = "MATCH" | "INSERT" | "DELETE" | "SUBSTITUTE";
2
+ declare const OP_TYPES: ["MATCH", "INSERT", "DELETE", "SUBSTITUTE"];
3
+ type Slice = [number, number];
4
+ /** Class representing an operation with its type and cost. */
5
+ declare class Alignment {
6
+ opType: OpType;
7
+ refSlice: Slice | null;
8
+ hypSlice: Slice | null;
9
+ ref: string | null;
10
+ hyp: string | null;
11
+ leftCompound: boolean;
12
+ rightCompound: boolean;
13
+ constructor(opType: OpType, refSlice?: Slice | null, hypSlice?: Slice | null, ref?: string | null, hyp?: string | null, leftCompound?: boolean, rightCompound?: boolean);
14
+ /** Return the hypothesis with compound markers if applicable. */
15
+ get hypWithCompoundMarkers(): string | null;
16
+ toString(): string;
17
+ }
18
+ /**
19
+ * Generate all possible combinations of operation types, except the empty set.
20
+ *
21
+ * @returns All possible combinations of operation types.
22
+ */
23
+ declare function opTypePowerset(): IterableIterator<NonNullable<"DELETE" | "MATCH" | "INSERT" | "SUBSTITUTE">[]>;
24
+ declare function reversed<T>(iterable: IterableIterator<T>): T[];
25
+ declare const START_DELIMITER = "<";
26
+ declare const END_DELIMITER = ">";
27
+ declare const DELIMITERS: Set<string>;
28
+ declare const OP_TYPE_MAP: {
29
+ DELETE: "DELETE";
30
+ MATCH: "MATCH";
31
+ INSERT: "INSERT";
32
+ SUBSTITUTE: "SUBSTITUTE";
33
+ };
34
+ declare const OP_TYPE_COMBO_MAP: Record<number, OpType[]>;
35
+ declare function getOpTypeComboIndex(ops: OpType[]): number;
36
+ declare const NUMERIC_TOKEN = "\\p{N}+([,.]\\p{N}+)*(?=\\s|$)";
37
+ declare const STANDARD_TOKEN = "[\\p{L}\\p{N}]+(['][\\p{L}\\p{N}]+)*'?";
38
+ /**
39
+ * Check if the normalized character is a vowel.
40
+ *
41
+ * @param c The character to check.
42
+ * @returns True if the character is a vowel, false, otherwise.
43
+ */
44
+ declare function isVowel(c: string): boolean;
45
+ /**
46
+ * Check if the normalized character is a consonant.
47
+ *
48
+ * @param c The character to check.
49
+ * @returns True if the character is a consonant, false, otherwise.
50
+ */
51
+ declare function isConsonant(c: string): boolean;
52
+ /**
53
+ * Categorize a character as 'vowel', 'consonant', or 'unvoiced'.
54
+ *
55
+ * @param c The character to check.
56
+ * @returns The category of the character.
57
+ */
58
+ declare function categorizeChar(c: string): number;
59
+ /**
60
+ * Default tokenizer that splits text into words based on whitespace.
61
+ *
62
+ * @param text The input text to tokenize.
63
+ * @returns A list of tokens (words).
64
+ */
65
+ declare function basicTokenizer(text: string): RegExpMatchArray[];
66
+ /**
67
+ * Default normalizer that only converts text to lowercase.
68
+ *
69
+ * @param text The input text to normalize.
70
+ * @returns The normalized text.
71
+ */
72
+ declare function basicNormalizer(text: string): string;
73
+ /**
74
+ * Decorator to ensure that the normalizer preserves the length of the input text.
75
+ *
76
+ * @param normalizer The normalizer function to wrap.
77
+ * @returns The wrapped normalizer that preserves length.
78
+ */
79
+ declare function ensureLengthPreservation<Args extends unknown[]>(normalizer: (text: string, ...args: Args) => string): (text: string, ...args: Args) => string;
80
+ /**
81
+ * Unpack a regex match array to extract the matched string.
82
+ *
83
+ * @param tokenizer A function to tokenize the sequences. Must be regex-based and return match arrays.
84
+ * @returns A function that unpacks a list of match arrays into tuples (match string, span).
85
+ */
86
+ declare function unpackRegexMatch<Args extends unknown[]>(tokenizer: (text: string, ...args: Args) => RegExpMatchArray[]): (text: string, ...args: Args) => [string, [number, number]][];
87
+ /**
88
+ * Translate a slice from the alignment sequenc back to the original sequenc.
89
+ *
90
+ * @param segmentSlice The slice in the alignment sequence
91
+ * @param indexMap The mapping from alignment indices to original sequence indices.
92
+ * @returns The translated slice in the original sequence, or None if no valid indices.
93
+ */
94
+ declare function translateSlice(segmentSlice: Slice, indexMap: number[]): Slice | null;
95
+ declare class Counter<T> {
96
+ private counts;
97
+ constructor(init?: Iterable<T> | Map<T, number>);
98
+ elements(): IteratorObject<T, undefined, unknown>;
99
+ mostCommon(n?: number): [T, number][];
100
+ total(): number;
101
+ subtract(update: Iterable<T> | Map<T, number>): void;
102
+ update(update: Iterable<T> | Map<T, number>): void;
103
+ get(element: T): number;
104
+ set(element: T, count: number): void;
105
+ }
106
+
107
+ export { Alignment, Counter, DELIMITERS, END_DELIMITER, NUMERIC_TOKEN, OP_TYPES, OP_TYPE_COMBO_MAP, OP_TYPE_MAP, type OpType, STANDARD_TOKEN, START_DELIMITER, type Slice, basicNormalizer, basicTokenizer, categorizeChar, ensureLengthPreservation, getOpTypeComboIndex, isConsonant, isVowel, opTypePowerset, reversed, translateSlice, unpackRegexMatch };
@@ -0,0 +1,107 @@
1
+ type OpType = "MATCH" | "INSERT" | "DELETE" | "SUBSTITUTE";
2
+ declare const OP_TYPES: ["MATCH", "INSERT", "DELETE", "SUBSTITUTE"];
3
+ type Slice = [number, number];
4
+ /** Class representing an operation with its type and cost. */
5
+ declare class Alignment {
6
+ opType: OpType;
7
+ refSlice: Slice | null;
8
+ hypSlice: Slice | null;
9
+ ref: string | null;
10
+ hyp: string | null;
11
+ leftCompound: boolean;
12
+ rightCompound: boolean;
13
+ constructor(opType: OpType, refSlice?: Slice | null, hypSlice?: Slice | null, ref?: string | null, hyp?: string | null, leftCompound?: boolean, rightCompound?: boolean);
14
+ /** Return the hypothesis with compound markers if applicable. */
15
+ get hypWithCompoundMarkers(): string | null;
16
+ toString(): string;
17
+ }
18
+ /**
19
+ * Generate all possible combinations of operation types, except the empty set.
20
+ *
21
+ * @returns All possible combinations of operation types.
22
+ */
23
+ declare function opTypePowerset(): IterableIterator<NonNullable<"DELETE" | "MATCH" | "INSERT" | "SUBSTITUTE">[]>;
24
+ declare function reversed<T>(iterable: IterableIterator<T>): T[];
25
+ declare const START_DELIMITER = "<";
26
+ declare const END_DELIMITER = ">";
27
+ declare const DELIMITERS: Set<string>;
28
+ declare const OP_TYPE_MAP: {
29
+ DELETE: "DELETE";
30
+ MATCH: "MATCH";
31
+ INSERT: "INSERT";
32
+ SUBSTITUTE: "SUBSTITUTE";
33
+ };
34
+ declare const OP_TYPE_COMBO_MAP: Record<number, OpType[]>;
35
+ declare function getOpTypeComboIndex(ops: OpType[]): number;
36
+ declare const NUMERIC_TOKEN = "\\p{N}+([,.]\\p{N}+)*(?=\\s|$)";
37
+ declare const STANDARD_TOKEN = "[\\p{L}\\p{N}]+(['][\\p{L}\\p{N}]+)*'?";
38
+ /**
39
+ * Check if the normalized character is a vowel.
40
+ *
41
+ * @param c The character to check.
42
+ * @returns True if the character is a vowel, false, otherwise.
43
+ */
44
+ declare function isVowel(c: string): boolean;
45
+ /**
46
+ * Check if the normalized character is a consonant.
47
+ *
48
+ * @param c The character to check.
49
+ * @returns True if the character is a consonant, false, otherwise.
50
+ */
51
+ declare function isConsonant(c: string): boolean;
52
+ /**
53
+ * Categorize a character as 'vowel', 'consonant', or 'unvoiced'.
54
+ *
55
+ * @param c The character to check.
56
+ * @returns The category of the character.
57
+ */
58
+ declare function categorizeChar(c: string): number;
59
+ /**
60
+ * Default tokenizer that splits text into words based on whitespace.
61
+ *
62
+ * @param text The input text to tokenize.
63
+ * @returns A list of tokens (words).
64
+ */
65
+ declare function basicTokenizer(text: string): RegExpMatchArray[];
66
+ /**
67
+ * Default normalizer that only converts text to lowercase.
68
+ *
69
+ * @param text The input text to normalize.
70
+ * @returns The normalized text.
71
+ */
72
+ declare function basicNormalizer(text: string): string;
73
+ /**
74
+ * Decorator to ensure that the normalizer preserves the length of the input text.
75
+ *
76
+ * @param normalizer The normalizer function to wrap.
77
+ * @returns The wrapped normalizer that preserves length.
78
+ */
79
+ declare function ensureLengthPreservation<Args extends unknown[]>(normalizer: (text: string, ...args: Args) => string): (text: string, ...args: Args) => string;
80
+ /**
81
+ * Unpack a regex match array to extract the matched string.
82
+ *
83
+ * @param tokenizer A function to tokenize the sequences. Must be regex-based and return match arrays.
84
+ * @returns A function that unpacks a list of match arrays into tuples (match string, span).
85
+ */
86
+ declare function unpackRegexMatch<Args extends unknown[]>(tokenizer: (text: string, ...args: Args) => RegExpMatchArray[]): (text: string, ...args: Args) => [string, [number, number]][];
87
+ /**
88
+ * Translate a slice from the alignment sequenc back to the original sequenc.
89
+ *
90
+ * @param segmentSlice The slice in the alignment sequence
91
+ * @param indexMap The mapping from alignment indices to original sequence indices.
92
+ * @returns The translated slice in the original sequence, or None if no valid indices.
93
+ */
94
+ declare function translateSlice(segmentSlice: Slice, indexMap: number[]): Slice | null;
95
+ declare class Counter<T> {
96
+ private counts;
97
+ constructor(init?: Iterable<T> | Map<T, number>);
98
+ elements(): IteratorObject<T, undefined, unknown>;
99
+ mostCommon(n?: number): [T, number][];
100
+ total(): number;
101
+ subtract(update: Iterable<T> | Map<T, number>): void;
102
+ update(update: Iterable<T> | Map<T, number>): void;
103
+ get(element: T): number;
104
+ set(element: T, count: number): void;
105
+ }
106
+
107
+ export { Alignment, Counter, DELIMITERS, END_DELIMITER, NUMERIC_TOKEN, OP_TYPES, OP_TYPE_COMBO_MAP, OP_TYPE_MAP, type OpType, STANDARD_TOKEN, START_DELIMITER, type Slice, basicNormalizer, basicTokenizer, categorizeChar, ensureLengthPreservation, getOpTypeComboIndex, isConsonant, isVowel, opTypePowerset, reversed, translateSlice, unpackRegexMatch };