@storyteller-platform/align 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/__tests__/align.test.cjs +6 -5
- package/dist/align/__tests__/align.test.js +6 -5
- package/dist/align/align.cjs +133 -81
- package/dist/align/align.d.cts +1 -0
- package/dist/align/align.d.ts +1 -0
- package/dist/align/align.js +133 -81
- package/dist/align/getSentenceRanges.cjs +78 -149
- package/dist/align/getSentenceRanges.d.cts +1 -1
- package/dist/align/getSentenceRanges.d.ts +1 -1
- package/dist/align/getSentenceRanges.js +78 -149
- package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
- package/dist/errorAlign/__tests__/native.test.cjs +118 -0
- package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/native.test.js +107 -0
- package/dist/errorAlign/backtraceGraph.cjs +298 -0
- package/dist/errorAlign/backtraceGraph.d.cts +103 -0
- package/dist/errorAlign/backtraceGraph.d.ts +103 -0
- package/dist/errorAlign/backtraceGraph.js +270 -0
- package/dist/errorAlign/beamSearch.cjs +302 -0
- package/dist/errorAlign/beamSearch.d.cts +53 -0
- package/dist/errorAlign/beamSearch.d.ts +53 -0
- package/dist/errorAlign/beamSearch.js +268 -0
- package/dist/errorAlign/core.cjs +33 -0
- package/dist/errorAlign/core.d.cts +5 -0
- package/dist/errorAlign/core.d.ts +5 -0
- package/dist/errorAlign/core.js +11 -0
- package/dist/errorAlign/editDistance.cjs +115 -0
- package/dist/errorAlign/editDistance.d.cts +46 -0
- package/dist/errorAlign/editDistance.d.ts +46 -0
- package/dist/errorAlign/editDistance.js +90 -0
- package/dist/errorAlign/errorAlign.cjs +159 -0
- package/dist/errorAlign/errorAlign.d.cts +15 -0
- package/dist/errorAlign/errorAlign.d.ts +15 -0
- package/dist/errorAlign/errorAlign.js +145 -0
- package/dist/errorAlign/graphMetadata.cjs +97 -0
- package/dist/errorAlign/graphMetadata.d.cts +44 -0
- package/dist/errorAlign/graphMetadata.d.ts +44 -0
- package/dist/errorAlign/graphMetadata.js +64 -0
- package/dist/errorAlign/hash.cjs +173 -0
- package/dist/errorAlign/hash.d.cts +28 -0
- package/dist/errorAlign/hash.d.ts +28 -0
- package/dist/errorAlign/hash.js +150 -0
- package/dist/errorAlign/native.cjs +60 -0
- package/dist/errorAlign/native.d.cts +18 -0
- package/dist/errorAlign/native.d.ts +18 -0
- package/dist/errorAlign/native.js +24 -0
- package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
- package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
- package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
- package/dist/errorAlign/node-gyp-build.d.js +0 -0
- package/dist/errorAlign/pathToAlignment.cjs +122 -0
- package/dist/errorAlign/pathToAlignment.d.cts +11 -0
- package/dist/errorAlign/pathToAlignment.d.ts +11 -0
- package/dist/errorAlign/pathToAlignment.js +89 -0
- package/dist/errorAlign/utils.cjs +301 -0
- package/dist/errorAlign/utils.d.cts +107 -0
- package/dist/errorAlign/utils.d.ts +107 -0
- package/dist/errorAlign/utils.js +248 -0
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/markup/__tests__/markup.test.cjs +108 -81
- package/dist/markup/__tests__/markup.test.js +109 -82
- package/dist/markup/__tests__/parseDom.test.cjs +112 -0
- package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
- package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
- package/dist/markup/__tests__/parseDom.test.js +89 -0
- package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
- package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
- package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
- package/dist/markup/__tests__/serializeDom.test.js +97 -0
- package/dist/markup/__tests__/transform.test.cjs +122 -0
- package/dist/markup/__tests__/transform.test.d.cts +2 -0
- package/dist/markup/__tests__/transform.test.d.ts +2 -0
- package/dist/markup/__tests__/transform.test.js +99 -0
- package/dist/markup/map.cjs +261 -0
- package/dist/markup/map.d.cts +50 -0
- package/dist/markup/map.d.ts +50 -0
- package/dist/markup/map.js +236 -0
- package/dist/markup/markup.cjs +23 -201
- package/dist/markup/markup.d.cts +5 -9
- package/dist/markup/markup.d.ts +5 -9
- package/dist/markup/markup.js +24 -203
- package/dist/markup/model.cjs +172 -0
- package/dist/markup/model.d.cts +57 -0
- package/dist/markup/model.d.ts +57 -0
- package/dist/markup/model.js +145 -0
- package/dist/markup/parseDom.cjs +59 -0
- package/dist/markup/parseDom.d.cts +7 -0
- package/dist/markup/parseDom.d.ts +7 -0
- package/dist/markup/parseDom.js +35 -0
- package/dist/markup/segmentation.cjs +11 -57
- package/dist/markup/segmentation.d.cts +6 -2
- package/dist/markup/segmentation.d.ts +6 -2
- package/dist/markup/segmentation.js +11 -58
- package/dist/markup/serializeDom.cjs +87 -0
- package/dist/markup/serializeDom.d.cts +7 -0
- package/dist/markup/serializeDom.d.ts +7 -0
- package/dist/markup/serializeDom.js +63 -0
- package/dist/markup/transform.cjs +92 -0
- package/dist/markup/transform.d.cts +11 -0
- package/dist/markup/transform.d.ts +11 -0
- package/dist/markup/transform.js +71 -0
- package/dist/types/node-gyp-build.d.cjs +1 -0
- package/dist/types/node-gyp-build.d.d.cts +3 -0
- package/dist/types/node-gyp-build.d.d.ts +3 -0
- package/dist/types/node-gyp-build.d.js +0 -0
- package/package.json +11 -4
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import "../chunk-BIEQXUOY.js";
|
|
2
|
+
import assert from "node:assert";
|
|
3
|
+
import { hash } from "./hash.js";
|
|
4
|
+
import { END_DELIMITER, START_DELIMITER, translateSlice } from "./utils.js";
|
|
5
|
+
const INT64_MASK = (1n << 64n) - 1n;
|
|
6
|
+
const SORT_ID_BASE = 146527n;
|
|
7
|
+
class Path {
|
|
8
|
+
constructor(src) {
|
|
9
|
+
this.src = src;
|
|
10
|
+
}
|
|
11
|
+
refIndex = -1;
|
|
12
|
+
hypIndex = -1;
|
|
13
|
+
lastHypIndex = -1;
|
|
14
|
+
lastRefIndex = -1;
|
|
15
|
+
closedCost = 0;
|
|
16
|
+
openCost = 0;
|
|
17
|
+
atUnambiguousMatchNode = false;
|
|
18
|
+
endIndices = [];
|
|
19
|
+
sortId = 0n;
|
|
20
|
+
/**
|
|
21
|
+
* Get the ID of the path used for pruning.
|
|
22
|
+
*/
|
|
23
|
+
get pruneId() {
|
|
24
|
+
return hash([
|
|
25
|
+
this.hypIndex,
|
|
26
|
+
this.refIndex,
|
|
27
|
+
this.lastHypIndex,
|
|
28
|
+
this.lastRefIndex
|
|
29
|
+
]);
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Get the cost of the path.
|
|
33
|
+
*/
|
|
34
|
+
get cost() {
|
|
35
|
+
const isSub = isSubstitution(
|
|
36
|
+
this.hypIndex,
|
|
37
|
+
this.refIndex,
|
|
38
|
+
this.lastHypIndex,
|
|
39
|
+
this.lastRefIndex
|
|
40
|
+
);
|
|
41
|
+
return this.closedCost + this.openCost + (isSub ? this.openCost : 0);
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Get the normalized cost of the path.
|
|
45
|
+
*/
|
|
46
|
+
get normCost() {
|
|
47
|
+
const cost = this.cost;
|
|
48
|
+
if (cost === 0) return 0;
|
|
49
|
+
return cost / (this.refIndex + this.hypIndex + 3);
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Get the current node index of the path.
|
|
53
|
+
*/
|
|
54
|
+
get index() {
|
|
55
|
+
return [this.hypIndex, this.refIndex];
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Check if the path has reached the terminal node.
|
|
59
|
+
*/
|
|
60
|
+
get atEnd() {
|
|
61
|
+
return this.hypIndex === this.src.hypMaxIndex && this.refIndex === this.src.refMaxIndex;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Update the sort ID for path ordering. Ensures identical behavior as C++ implementation.
|
|
65
|
+
*/
|
|
66
|
+
updateSortId(t) {
|
|
67
|
+
this.sortId = this.sortId * SORT_ID_BASE + t & INT64_MASK;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
function* expand(parent) {
|
|
71
|
+
const deletePath = addDelete(parent);
|
|
72
|
+
if (deletePath) yield deletePath;
|
|
73
|
+
const insertPath = addInsert(parent);
|
|
74
|
+
if (insertPath) yield insertPath;
|
|
75
|
+
const subOrMatchPath = addSubstitutionOrMatch(parent);
|
|
76
|
+
if (subOrMatchPath) yield subOrMatchPath;
|
|
77
|
+
}
|
|
78
|
+
function addSubstitutionOrMatch(parent) {
|
|
79
|
+
if (parent.refIndex >= parent.src.refMaxIndex || parent.hypIndex >= parent.src.hypMaxIndex) {
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
let child = transitionToChildNode(parent, {
|
|
83
|
+
refStep: 1,
|
|
84
|
+
hypStep: 1
|
|
85
|
+
});
|
|
86
|
+
const isMatch = parent.src.ref[child.refIndex] === parent.src.hyp[child.hypIndex];
|
|
87
|
+
if (!isMatch) {
|
|
88
|
+
const refIsDelimiter = parent.src.refCharTypes[child.refIndex] === 0;
|
|
89
|
+
const hypIsDelimiter = parent.src.hypCharTypes[child.hypIndex] === 0;
|
|
90
|
+
if (refIsDelimiter || hypIsDelimiter) return null;
|
|
91
|
+
}
|
|
92
|
+
if (parent.src.ref[child.refIndex] === START_DELIMITER) {
|
|
93
|
+
endInsertionSegment(child, parent.hypIndex, parent.refIndex);
|
|
94
|
+
}
|
|
95
|
+
if (!isMatch) {
|
|
96
|
+
const isBacktrace = parent.src.backtraceNodeSet.has(parent.index);
|
|
97
|
+
const isLetterTypeMatch = parent.src.refCharTypes[child.refIndex] === parent.src.hypCharTypes[child.hypIndex];
|
|
98
|
+
child.openCost += isLetterTypeMatch ? 2 : 3;
|
|
99
|
+
child.openCost += isBacktrace ? 0 : 1;
|
|
100
|
+
}
|
|
101
|
+
if (child.src.ref[child.refIndex] === END_DELIMITER) {
|
|
102
|
+
child = endSegment(child);
|
|
103
|
+
}
|
|
104
|
+
return child;
|
|
105
|
+
}
|
|
106
|
+
function addInsert(parent) {
|
|
107
|
+
if (parent.refIndex >= parent.src.refMaxIndex) {
|
|
108
|
+
return null;
|
|
109
|
+
}
|
|
110
|
+
let child = transitionToChildNode(parent, {
|
|
111
|
+
refStep: 1,
|
|
112
|
+
hypStep: 0
|
|
113
|
+
});
|
|
114
|
+
if (parent.src.ref[child.refIndex] === START_DELIMITER) {
|
|
115
|
+
endInsertionSegment(child, parent.hypIndex, parent.refIndex);
|
|
116
|
+
}
|
|
117
|
+
const isBacktrace = parent.src.backtraceNodeSet.has(parent.index);
|
|
118
|
+
const isDelimiter = parent.src.refCharTypes[child.refIndex] === 0;
|
|
119
|
+
child.openCost += isDelimiter ? 1 : 2;
|
|
120
|
+
child.openCost += isBacktrace || isDelimiter ? 0 : 1;
|
|
121
|
+
if (child.src.ref[child.refIndex] === END_DELIMITER) {
|
|
122
|
+
child = endSegment(child);
|
|
123
|
+
}
|
|
124
|
+
return child;
|
|
125
|
+
}
|
|
126
|
+
function addDelete(parent) {
|
|
127
|
+
if (parent.hypIndex >= parent.src.hypMaxIndex) {
|
|
128
|
+
return null;
|
|
129
|
+
}
|
|
130
|
+
const child = transitionToChildNode(parent, { refStep: 0, hypStep: 1 });
|
|
131
|
+
const isBacktrace = parent.src.backtraceNodeSet.has(parent.index);
|
|
132
|
+
const isDelimiter = parent.src.hypCharTypes[child.hypIndex] === 0;
|
|
133
|
+
child.openCost += isDelimiter ? 1 : 2;
|
|
134
|
+
child.openCost += isBacktrace || isDelimiter ? 0 : 1;
|
|
135
|
+
if (child.src.hyp[child.hypIndex] === END_DELIMITER) {
|
|
136
|
+
endInsertionSegment(child, child.hypIndex, child.refIndex);
|
|
137
|
+
}
|
|
138
|
+
return child;
|
|
139
|
+
}
|
|
140
|
+
function resetSegmentVariables(path, hypIndex, refIndex) {
|
|
141
|
+
path.closedCost += path.openCost;
|
|
142
|
+
const isSub = isSubstitution(
|
|
143
|
+
hypIndex,
|
|
144
|
+
refIndex,
|
|
145
|
+
path.lastHypIndex,
|
|
146
|
+
path.lastRefIndex
|
|
147
|
+
);
|
|
148
|
+
path.closedCost += isSub ? path.openCost : 0;
|
|
149
|
+
path.lastHypIndex = hypIndex;
|
|
150
|
+
path.lastRefIndex = refIndex;
|
|
151
|
+
path.openCost = 0;
|
|
152
|
+
}
|
|
153
|
+
function endInsertionSegment(path, hypIndex, refIndex) {
|
|
154
|
+
const hypSlice = translateSlice(
|
|
155
|
+
[path.lastHypIndex + 1, hypIndex + 1],
|
|
156
|
+
path.src.hypIndexMap
|
|
157
|
+
);
|
|
158
|
+
const refIsEmpty = refIndex === path.lastRefIndex;
|
|
159
|
+
if (hypSlice && refIsEmpty) {
|
|
160
|
+
path.endIndices = path.endIndices.concat([
|
|
161
|
+
[path.hypIndex, path.refIndex, path.openCost]
|
|
162
|
+
]);
|
|
163
|
+
resetSegmentVariables(path, hypIndex, refIndex);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
function endSegment(path) {
|
|
167
|
+
const hypSlice = translateSlice(
|
|
168
|
+
[path.lastHypIndex + 1, path.hypIndex + 1],
|
|
169
|
+
path.src.hypIndexMap
|
|
170
|
+
);
|
|
171
|
+
const refSlice = translateSlice(
|
|
172
|
+
[path.lastRefIndex + 1, path.refIndex + 1],
|
|
173
|
+
path.src.refIndexMap
|
|
174
|
+
);
|
|
175
|
+
assert(!!refSlice);
|
|
176
|
+
const hypIsEmpty = path.hypIndex === path.lastHypIndex;
|
|
177
|
+
if (hypIsEmpty) {
|
|
178
|
+
path.endIndices = path.endIndices.concat([
|
|
179
|
+
[path.hypIndex, path.refIndex, path.openCost]
|
|
180
|
+
]);
|
|
181
|
+
} else {
|
|
182
|
+
if (!hypSlice) {
|
|
183
|
+
return null;
|
|
184
|
+
}
|
|
185
|
+
const isMatchSegment = path.openCost === 0;
|
|
186
|
+
path.atUnambiguousMatchNode = isMatchSegment && path.src.unambiguousMatches.has(path.index);
|
|
187
|
+
path.endIndices = path.endIndices.concat([
|
|
188
|
+
[path.hypIndex, path.refIndex, path.openCost]
|
|
189
|
+
]);
|
|
190
|
+
}
|
|
191
|
+
resetSegmentVariables(path, path.hypIndex, path.refIndex);
|
|
192
|
+
return path;
|
|
193
|
+
}
|
|
194
|
+
function transitionToChildNode(parent, { refStep, hypStep }) {
|
|
195
|
+
const child = new Path(parent.src);
|
|
196
|
+
child.refIndex = parent.refIndex + refStep;
|
|
197
|
+
child.hypIndex = parent.hypIndex + hypStep;
|
|
198
|
+
child.lastHypIndex = parent.lastHypIndex;
|
|
199
|
+
child.lastRefIndex = parent.lastRefIndex;
|
|
200
|
+
child.closedCost = parent.closedCost;
|
|
201
|
+
child.openCost = parent.openCost;
|
|
202
|
+
child.atUnambiguousMatchNode = false;
|
|
203
|
+
child.endIndices = parent.endIndices;
|
|
204
|
+
child.sortId = parent.sortId;
|
|
205
|
+
child.updateSortId(BigInt(refStep + refStep + hypStep));
|
|
206
|
+
return child;
|
|
207
|
+
}
|
|
208
|
+
function isSubstitution(hypIndex, refIndex, lastHypIndex, lastRefIndex) {
|
|
209
|
+
return !(refIndex === lastRefIndex || hypIndex === lastHypIndex);
|
|
210
|
+
}
|
|
211
|
+
function errorAlignBeamSearch(src, beamSize = 100) {
|
|
212
|
+
var _a;
|
|
213
|
+
const startPath = new Path(src);
|
|
214
|
+
let beam = [startPath];
|
|
215
|
+
let pruneMap = {};
|
|
216
|
+
const ended = [];
|
|
217
|
+
while (beam.length > 0) {
|
|
218
|
+
const newBeam = {};
|
|
219
|
+
for (const path of beam) {
|
|
220
|
+
if (path.atEnd) {
|
|
221
|
+
ended.push(path);
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
for (const newPath of expand(path)) {
|
|
225
|
+
const newPathCost = newPath.cost;
|
|
226
|
+
const newPathPruneId = newPath.pruneId;
|
|
227
|
+
if (newPathPruneId in pruneMap) {
|
|
228
|
+
if (newPathCost > pruneMap[newPathPruneId]) {
|
|
229
|
+
continue;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
pruneMap[newPathPruneId] = newPathCost;
|
|
233
|
+
if (!(newPathPruneId in newBeam) || newPathCost < newBeam[newPathPruneId].cost) {
|
|
234
|
+
newBeam[newPathPruneId] = newPath;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
const newBeamPaths = Object.values(newBeam).toSorted((a, b) => {
|
|
239
|
+
if (a.normCost === b.normCost) {
|
|
240
|
+
const comp = a.sortId - b.sortId;
|
|
241
|
+
if (comp < 0n) return -1;
|
|
242
|
+
if (comp > 0n) return 1;
|
|
243
|
+
return 0;
|
|
244
|
+
}
|
|
245
|
+
return a.normCost - b.normCost;
|
|
246
|
+
});
|
|
247
|
+
beam = newBeamPaths.slice(0, beamSize);
|
|
248
|
+
if ((_a = beam[0]) == null ? void 0 : _a.atUnambiguousMatchNode) {
|
|
249
|
+
beam = beam.slice(0, 1);
|
|
250
|
+
pruneMap = {};
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
const [result] = ended.toSorted((a, b) => {
|
|
254
|
+
if (a.cost === b.cost) {
|
|
255
|
+
const comp = a.sortId - b.sortId;
|
|
256
|
+
if (comp < 0n) return -1;
|
|
257
|
+
if (comp > 0n) return 1;
|
|
258
|
+
return 0;
|
|
259
|
+
}
|
|
260
|
+
return a.cost - b.cost;
|
|
261
|
+
});
|
|
262
|
+
assert(!!result);
|
|
263
|
+
return result;
|
|
264
|
+
}
|
|
265
|
+
export {
|
|
266
|
+
Path,
|
|
267
|
+
errorAlignBeamSearch
|
|
268
|
+
};
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var core_exports = {};
|
|
20
|
+
__export(core_exports, {
|
|
21
|
+
computeErrorAlignDistanceMatrix: () => import_editDistance.computeErrorAlignDistanceMatrix,
|
|
22
|
+
computeLevenshteinDistanceMatrix: () => import_editDistance.computeLevenshteinDistanceMatrix,
|
|
23
|
+
errorAlignBeamSearch: () => import_beamSearch.errorAlignBeamSearch
|
|
24
|
+
});
|
|
25
|
+
module.exports = __toCommonJS(core_exports);
|
|
26
|
+
var import_editDistance = require("./editDistance.cjs");
|
|
27
|
+
var import_beamSearch = require("./beamSearch.cjs");
|
|
28
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
29
|
+
0 && (module.exports = {
|
|
30
|
+
computeErrorAlignDistanceMatrix,
|
|
31
|
+
computeLevenshteinDistanceMatrix,
|
|
32
|
+
errorAlignBeamSearch
|
|
33
|
+
});
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import "../chunk-BIEQXUOY.js";
|
|
2
|
+
import {
|
|
3
|
+
computeErrorAlignDistanceMatrix,
|
|
4
|
+
computeLevenshteinDistanceMatrix
|
|
5
|
+
} from "./editDistance.js";
|
|
6
|
+
import { errorAlignBeamSearch } from "./beamSearch.js";
|
|
7
|
+
export {
|
|
8
|
+
computeErrorAlignDistanceMatrix,
|
|
9
|
+
computeLevenshteinDistanceMatrix,
|
|
10
|
+
errorAlignBeamSearch
|
|
11
|
+
};
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var editDistance_exports = {};
|
|
20
|
+
__export(editDistance_exports, {
|
|
21
|
+
computeDistanceMatrix: () => computeDistanceMatrix,
|
|
22
|
+
computeErrorAlignDistanceMatrix: () => computeErrorAlignDistanceMatrix,
|
|
23
|
+
computeLevenshteinDistanceMatrix: () => computeLevenshteinDistanceMatrix
|
|
24
|
+
});
|
|
25
|
+
module.exports = __toCommonJS(editDistance_exports);
|
|
26
|
+
var import_itertools = require("itertools");
|
|
27
|
+
var import_utils = require("./utils.cjs");
|
|
28
|
+
function getLevenshteinValues(refToken, hypToken) {
|
|
29
|
+
let diagCost;
|
|
30
|
+
if (hypToken === refToken) {
|
|
31
|
+
diagCost = 0;
|
|
32
|
+
} else {
|
|
33
|
+
diagCost = 1;
|
|
34
|
+
}
|
|
35
|
+
return [1, 1, diagCost];
|
|
36
|
+
}
|
|
37
|
+
function getErrorAlignValues(refToken, hypToken) {
|
|
38
|
+
let diagCost;
|
|
39
|
+
if (hypToken === refToken) {
|
|
40
|
+
diagCost = 0;
|
|
41
|
+
} else if (import_utils.DELIMITERS.has(hypToken) || import_utils.DELIMITERS.has(refToken)) {
|
|
42
|
+
diagCost = 3;
|
|
43
|
+
} else {
|
|
44
|
+
diagCost = 2;
|
|
45
|
+
}
|
|
46
|
+
return [1, 1, diagCost];
|
|
47
|
+
}
|
|
48
|
+
function computeDistanceMatrix(ref, hyp, scoreFunc, backtrace = false) {
|
|
49
|
+
const hypDim = hyp.length + 1;
|
|
50
|
+
const refDim = ref.length + 1;
|
|
51
|
+
const scoreMatrix = Array.from((0, import_itertools.range)(hypDim)).map(
|
|
52
|
+
(_) => Array.from((0, import_itertools.range)(refDim)).map((_2) => 0)
|
|
53
|
+
);
|
|
54
|
+
for (const j of (0, import_itertools.range)(refDim)) {
|
|
55
|
+
scoreMatrix[0][j] = j;
|
|
56
|
+
}
|
|
57
|
+
for (const i of (0, import_itertools.range)(hypDim)) {
|
|
58
|
+
scoreMatrix[i][0] = i;
|
|
59
|
+
}
|
|
60
|
+
let backtraceMatrix = null;
|
|
61
|
+
if (backtrace) {
|
|
62
|
+
backtraceMatrix = Array.from((0, import_itertools.range)(hypDim)).map(
|
|
63
|
+
(_) => Array.from((0, import_itertools.range)(refDim)).map((_2) => 0)
|
|
64
|
+
);
|
|
65
|
+
backtraceMatrix[0][0] = (0, import_utils.getOpTypeComboIndex)(["MATCH"]);
|
|
66
|
+
for (const j of (0, import_itertools.range)(1, refDim)) {
|
|
67
|
+
backtraceMatrix[0][j] = (0, import_utils.getOpTypeComboIndex)(["DELETE"]);
|
|
68
|
+
}
|
|
69
|
+
for (const i of (0, import_itertools.range)(1, hypDim)) {
|
|
70
|
+
backtraceMatrix[i][0] = (0, import_utils.getOpTypeComboIndex)(["INSERT"]);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
for (const j of (0, import_itertools.range)(1, refDim)) {
|
|
74
|
+
for (const i of (0, import_itertools.range)(1, hypDim)) {
|
|
75
|
+
const [insCost, delCost, diagCost] = scoreFunc(ref[j - 1], hyp[i - 1]);
|
|
76
|
+
const insVal = scoreMatrix[i - 1][j] + insCost;
|
|
77
|
+
const delVal = scoreMatrix[i][j - 1] + delCost;
|
|
78
|
+
const diagVal = scoreMatrix[i - 1][j - 1] + diagCost;
|
|
79
|
+
const newVal = Math.min(insVal, delVal, diagVal);
|
|
80
|
+
scoreMatrix[i][j] = newVal;
|
|
81
|
+
if (backtraceMatrix) {
|
|
82
|
+
const posOps = [];
|
|
83
|
+
if (diagVal === newVal && diagCost <= 0) {
|
|
84
|
+
posOps.push("MATCH");
|
|
85
|
+
}
|
|
86
|
+
if (insVal === newVal) {
|
|
87
|
+
posOps.push("INSERT");
|
|
88
|
+
}
|
|
89
|
+
if (delVal === newVal) {
|
|
90
|
+
posOps.push("DELETE");
|
|
91
|
+
}
|
|
92
|
+
if (diagVal === newVal && diagCost > 0) {
|
|
93
|
+
posOps.push("SUBSTITUTE");
|
|
94
|
+
}
|
|
95
|
+
backtraceMatrix[i][j] = (0, import_utils.getOpTypeComboIndex)(posOps);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
if (backtraceMatrix) {
|
|
100
|
+
return { scoreMatrix, backtraceMatrix };
|
|
101
|
+
}
|
|
102
|
+
return scoreMatrix;
|
|
103
|
+
}
|
|
104
|
+
function computeLevenshteinDistanceMatrix(ref, hyp, backtrace = false) {
|
|
105
|
+
return computeDistanceMatrix(ref, hyp, getLevenshteinValues, backtrace);
|
|
106
|
+
}
|
|
107
|
+
function computeErrorAlignDistanceMatrix(ref, hyp, backtrace = false) {
|
|
108
|
+
return computeDistanceMatrix(ref, hyp, getErrorAlignValues, backtrace);
|
|
109
|
+
}
|
|
110
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
111
|
+
0 && (module.exports = {
|
|
112
|
+
computeDistanceMatrix,
|
|
113
|
+
computeErrorAlignDistanceMatrix,
|
|
114
|
+
computeLevenshteinDistanceMatrix
|
|
115
|
+
});
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compute the edit distance score matrix between two sequences x (hyp) and y (ref)
|
|
3
|
+
* using only pure Python lists.
|
|
4
|
+
*
|
|
5
|
+
* @param ref The reference sequence/transcript.
|
|
6
|
+
* @param hyp The hypothesis sequence/transcript.
|
|
7
|
+
* @param scoreFunc A function that takes two tokens (refToken, hypToken) and returns
|
|
8
|
+
* a tuple of (deletionCost, insertionCost, diagonalCost)
|
|
9
|
+
* @param backtrace Whether to compute the backtrace matrix.
|
|
10
|
+
* @returns The score matrix and optionally the backtrace matrix
|
|
11
|
+
*/
|
|
12
|
+
declare function computeDistanceMatrix(ref: string | string[], hyp: string | string[], scoreFunc: (refToken: string, hypToken: string) => [number, number, number]): number[][];
|
|
13
|
+
declare function computeDistanceMatrix(ref: string | string[], hyp: string | string[], scoreFunc: (refToken: string, hypToken: string) => [number, number, number], backtrace: boolean): {
|
|
14
|
+
scoreMatrix: number[][];
|
|
15
|
+
backtraceMatrix: number[][];
|
|
16
|
+
};
|
|
17
|
+
/**
|
|
18
|
+
* Compute the Levenshtein distance matrix between two sequences.
|
|
19
|
+
*
|
|
20
|
+
* @param ref The reference sequence/transcript.
|
|
21
|
+
* @param hyp The hypothesis sequence/transcript.
|
|
22
|
+
* @param backtrace Whether to compute the backtrace matrix.
|
|
23
|
+
*
|
|
24
|
+
* @returns The score matrix and optionally the backtrace matrix
|
|
25
|
+
*/
|
|
26
|
+
declare function computeLevenshteinDistanceMatrix(ref: string | string[], hyp: string | string[]): number[][];
|
|
27
|
+
declare function computeLevenshteinDistanceMatrix(ref: string | string[], hyp: string | string[], backtrace: true): {
|
|
28
|
+
scoreMatrix: number[][];
|
|
29
|
+
backtraceMatrix: number[][];
|
|
30
|
+
};
|
|
31
|
+
/**
|
|
32
|
+
* Compute the error alignment distance matrix between two sequences.
|
|
33
|
+
*
|
|
34
|
+
* @param ref The reference sequence/transcript.
|
|
35
|
+
* @param hyp The hypothesis sequence/transcript.
|
|
36
|
+
* @param backtrace Whether to compute the backtrace matrix.
|
|
37
|
+
*
|
|
38
|
+
* @returns The score matrix and optionally the backtrace matrix.
|
|
39
|
+
*/
|
|
40
|
+
declare function computeErrorAlignDistanceMatrix(ref: string | string[], hyp: string | string[]): number[][];
|
|
41
|
+
declare function computeErrorAlignDistanceMatrix(ref: string | string[], hyp: string | string[], backtrace: true): {
|
|
42
|
+
scoreMatrix: number[][];
|
|
43
|
+
backtraceMatrix: number[][];
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export { computeDistanceMatrix, computeErrorAlignDistanceMatrix, computeLevenshteinDistanceMatrix };
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compute the edit distance score matrix between two sequences x (hyp) and y (ref)
|
|
3
|
+
* using only pure Python lists.
|
|
4
|
+
*
|
|
5
|
+
* @param ref The reference sequence/transcript.
|
|
6
|
+
* @param hyp The hypothesis sequence/transcript.
|
|
7
|
+
* @param scoreFunc A function that takes two tokens (refToken, hypToken) and returns
|
|
8
|
+
* a tuple of (deletionCost, insertionCost, diagonalCost)
|
|
9
|
+
* @param backtrace Whether to compute the backtrace matrix.
|
|
10
|
+
* @returns The score matrix and optionally the backtrace matrix
|
|
11
|
+
*/
|
|
12
|
+
declare function computeDistanceMatrix(ref: string | string[], hyp: string | string[], scoreFunc: (refToken: string, hypToken: string) => [number, number, number]): number[][];
|
|
13
|
+
declare function computeDistanceMatrix(ref: string | string[], hyp: string | string[], scoreFunc: (refToken: string, hypToken: string) => [number, number, number], backtrace: boolean): {
|
|
14
|
+
scoreMatrix: number[][];
|
|
15
|
+
backtraceMatrix: number[][];
|
|
16
|
+
};
|
|
17
|
+
/**
|
|
18
|
+
* Compute the Levenshtein distance matrix between two sequences.
|
|
19
|
+
*
|
|
20
|
+
* @param ref The reference sequence/transcript.
|
|
21
|
+
* @param hyp The hypothesis sequence/transcript.
|
|
22
|
+
* @param backtrace Whether to compute the backtrace matrix.
|
|
23
|
+
*
|
|
24
|
+
* @returns The score matrix and optionally the backtrace matrix
|
|
25
|
+
*/
|
|
26
|
+
declare function computeLevenshteinDistanceMatrix(ref: string | string[], hyp: string | string[]): number[][];
|
|
27
|
+
declare function computeLevenshteinDistanceMatrix(ref: string | string[], hyp: string | string[], backtrace: true): {
|
|
28
|
+
scoreMatrix: number[][];
|
|
29
|
+
backtraceMatrix: number[][];
|
|
30
|
+
};
|
|
31
|
+
/**
|
|
32
|
+
* Compute the error alignment distance matrix between two sequences.
|
|
33
|
+
*
|
|
34
|
+
* @param ref The reference sequence/transcript.
|
|
35
|
+
* @param hyp The hypothesis sequence/transcript.
|
|
36
|
+
* @param backtrace Whether to compute the backtrace matrix.
|
|
37
|
+
*
|
|
38
|
+
* @returns The score matrix and optionally the backtrace matrix.
|
|
39
|
+
*/
|
|
40
|
+
declare function computeErrorAlignDistanceMatrix(ref: string | string[], hyp: string | string[]): number[][];
|
|
41
|
+
declare function computeErrorAlignDistanceMatrix(ref: string | string[], hyp: string | string[], backtrace: true): {
|
|
42
|
+
scoreMatrix: number[][];
|
|
43
|
+
backtraceMatrix: number[][];
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export { computeDistanceMatrix, computeErrorAlignDistanceMatrix, computeLevenshteinDistanceMatrix };
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import "../chunk-BIEQXUOY.js";
|
|
2
|
+
import { range } from "itertools";
|
|
3
|
+
import { DELIMITERS, getOpTypeComboIndex } from "./utils.js";
|
|
4
|
+
function getLevenshteinValues(refToken, hypToken) {
|
|
5
|
+
let diagCost;
|
|
6
|
+
if (hypToken === refToken) {
|
|
7
|
+
diagCost = 0;
|
|
8
|
+
} else {
|
|
9
|
+
diagCost = 1;
|
|
10
|
+
}
|
|
11
|
+
return [1, 1, diagCost];
|
|
12
|
+
}
|
|
13
|
+
function getErrorAlignValues(refToken, hypToken) {
|
|
14
|
+
let diagCost;
|
|
15
|
+
if (hypToken === refToken) {
|
|
16
|
+
diagCost = 0;
|
|
17
|
+
} else if (DELIMITERS.has(hypToken) || DELIMITERS.has(refToken)) {
|
|
18
|
+
diagCost = 3;
|
|
19
|
+
} else {
|
|
20
|
+
diagCost = 2;
|
|
21
|
+
}
|
|
22
|
+
return [1, 1, diagCost];
|
|
23
|
+
}
|
|
24
|
+
function computeDistanceMatrix(ref, hyp, scoreFunc, backtrace = false) {
|
|
25
|
+
const hypDim = hyp.length + 1;
|
|
26
|
+
const refDim = ref.length + 1;
|
|
27
|
+
const scoreMatrix = Array.from(range(hypDim)).map(
|
|
28
|
+
(_) => Array.from(range(refDim)).map((_2) => 0)
|
|
29
|
+
);
|
|
30
|
+
for (const j of range(refDim)) {
|
|
31
|
+
scoreMatrix[0][j] = j;
|
|
32
|
+
}
|
|
33
|
+
for (const i of range(hypDim)) {
|
|
34
|
+
scoreMatrix[i][0] = i;
|
|
35
|
+
}
|
|
36
|
+
let backtraceMatrix = null;
|
|
37
|
+
if (backtrace) {
|
|
38
|
+
backtraceMatrix = Array.from(range(hypDim)).map(
|
|
39
|
+
(_) => Array.from(range(refDim)).map((_2) => 0)
|
|
40
|
+
);
|
|
41
|
+
backtraceMatrix[0][0] = getOpTypeComboIndex(["MATCH"]);
|
|
42
|
+
for (const j of range(1, refDim)) {
|
|
43
|
+
backtraceMatrix[0][j] = getOpTypeComboIndex(["DELETE"]);
|
|
44
|
+
}
|
|
45
|
+
for (const i of range(1, hypDim)) {
|
|
46
|
+
backtraceMatrix[i][0] = getOpTypeComboIndex(["INSERT"]);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
for (const j of range(1, refDim)) {
|
|
50
|
+
for (const i of range(1, hypDim)) {
|
|
51
|
+
const [insCost, delCost, diagCost] = scoreFunc(ref[j - 1], hyp[i - 1]);
|
|
52
|
+
const insVal = scoreMatrix[i - 1][j] + insCost;
|
|
53
|
+
const delVal = scoreMatrix[i][j - 1] + delCost;
|
|
54
|
+
const diagVal = scoreMatrix[i - 1][j - 1] + diagCost;
|
|
55
|
+
const newVal = Math.min(insVal, delVal, diagVal);
|
|
56
|
+
scoreMatrix[i][j] = newVal;
|
|
57
|
+
if (backtraceMatrix) {
|
|
58
|
+
const posOps = [];
|
|
59
|
+
if (diagVal === newVal && diagCost <= 0) {
|
|
60
|
+
posOps.push("MATCH");
|
|
61
|
+
}
|
|
62
|
+
if (insVal === newVal) {
|
|
63
|
+
posOps.push("INSERT");
|
|
64
|
+
}
|
|
65
|
+
if (delVal === newVal) {
|
|
66
|
+
posOps.push("DELETE");
|
|
67
|
+
}
|
|
68
|
+
if (diagVal === newVal && diagCost > 0) {
|
|
69
|
+
posOps.push("SUBSTITUTE");
|
|
70
|
+
}
|
|
71
|
+
backtraceMatrix[i][j] = getOpTypeComboIndex(posOps);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (backtraceMatrix) {
|
|
76
|
+
return { scoreMatrix, backtraceMatrix };
|
|
77
|
+
}
|
|
78
|
+
return scoreMatrix;
|
|
79
|
+
}
|
|
80
|
+
function computeLevenshteinDistanceMatrix(ref, hyp, backtrace = false) {
|
|
81
|
+
return computeDistanceMatrix(ref, hyp, getLevenshteinValues, backtrace);
|
|
82
|
+
}
|
|
83
|
+
function computeErrorAlignDistanceMatrix(ref, hyp, backtrace = false) {
|
|
84
|
+
return computeDistanceMatrix(ref, hyp, getErrorAlignValues, backtrace);
|
|
85
|
+
}
|
|
86
|
+
export {
|
|
87
|
+
computeDistanceMatrix,
|
|
88
|
+
computeErrorAlignDistanceMatrix,
|
|
89
|
+
computeLevenshteinDistanceMatrix
|
|
90
|
+
};
|