@storyteller-platform/align 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/__tests__/align.test.cjs +6 -5
- package/dist/align/__tests__/align.test.js +6 -5
- package/dist/align/align.cjs +133 -81
- package/dist/align/align.d.cts +1 -0
- package/dist/align/align.d.ts +1 -0
- package/dist/align/align.js +133 -81
- package/dist/align/getSentenceRanges.cjs +78 -149
- package/dist/align/getSentenceRanges.d.cts +1 -1
- package/dist/align/getSentenceRanges.d.ts +1 -1
- package/dist/align/getSentenceRanges.js +78 -149
- package/dist/align/slugify.cjs +16 -8
- package/dist/align/slugify.js +16 -8
- package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
- package/dist/errorAlign/__tests__/native.test.cjs +118 -0
- package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/native.test.js +107 -0
- package/dist/errorAlign/backtraceGraph.cjs +298 -0
- package/dist/errorAlign/backtraceGraph.d.cts +103 -0
- package/dist/errorAlign/backtraceGraph.d.ts +103 -0
- package/dist/errorAlign/backtraceGraph.js +270 -0
- package/dist/errorAlign/beamSearch.cjs +302 -0
- package/dist/errorAlign/beamSearch.d.cts +53 -0
- package/dist/errorAlign/beamSearch.d.ts +53 -0
- package/dist/errorAlign/beamSearch.js +268 -0
- package/dist/errorAlign/core.cjs +33 -0
- package/dist/errorAlign/core.d.cts +5 -0
- package/dist/errorAlign/core.d.ts +5 -0
- package/dist/errorAlign/core.js +11 -0
- package/dist/errorAlign/editDistance.cjs +115 -0
- package/dist/errorAlign/editDistance.d.cts +46 -0
- package/dist/errorAlign/editDistance.d.ts +46 -0
- package/dist/errorAlign/editDistance.js +90 -0
- package/dist/errorAlign/errorAlign.cjs +159 -0
- package/dist/errorAlign/errorAlign.d.cts +15 -0
- package/dist/errorAlign/errorAlign.d.ts +15 -0
- package/dist/errorAlign/errorAlign.js +145 -0
- package/dist/errorAlign/graphMetadata.cjs +97 -0
- package/dist/errorAlign/graphMetadata.d.cts +44 -0
- package/dist/errorAlign/graphMetadata.d.ts +44 -0
- package/dist/errorAlign/graphMetadata.js +64 -0
- package/dist/errorAlign/hash.cjs +173 -0
- package/dist/errorAlign/hash.d.cts +28 -0
- package/dist/errorAlign/hash.d.ts +28 -0
- package/dist/errorAlign/hash.js +150 -0
- package/dist/errorAlign/native.cjs +60 -0
- package/dist/errorAlign/native.d.cts +18 -0
- package/dist/errorAlign/native.d.ts +18 -0
- package/dist/errorAlign/native.js +24 -0
- package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
- package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
- package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
- package/dist/errorAlign/node-gyp-build.d.js +0 -0
- package/dist/errorAlign/pathToAlignment.cjs +122 -0
- package/dist/errorAlign/pathToAlignment.d.cts +11 -0
- package/dist/errorAlign/pathToAlignment.d.ts +11 -0
- package/dist/errorAlign/pathToAlignment.js +89 -0
- package/dist/errorAlign/utils.cjs +301 -0
- package/dist/errorAlign/utils.d.cts +107 -0
- package/dist/errorAlign/utils.d.ts +107 -0
- package/dist/errorAlign/utils.js +248 -0
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/markup/__tests__/markup.test.cjs +108 -81
- package/dist/markup/__tests__/markup.test.js +109 -82
- package/dist/markup/__tests__/parseDom.test.cjs +112 -0
- package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
- package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
- package/dist/markup/__tests__/parseDom.test.js +89 -0
- package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
- package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
- package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
- package/dist/markup/__tests__/serializeDom.test.js +97 -0
- package/dist/markup/__tests__/transform.test.cjs +122 -0
- package/dist/markup/__tests__/transform.test.d.cts +2 -0
- package/dist/markup/__tests__/transform.test.d.ts +2 -0
- package/dist/markup/__tests__/transform.test.js +99 -0
- package/dist/markup/map.cjs +261 -0
- package/dist/markup/map.d.cts +50 -0
- package/dist/markup/map.d.ts +50 -0
- package/dist/markup/map.js +236 -0
- package/dist/markup/markup.cjs +23 -201
- package/dist/markup/markup.d.cts +5 -9
- package/dist/markup/markup.d.ts +5 -9
- package/dist/markup/markup.js +24 -203
- package/dist/markup/model.cjs +172 -0
- package/dist/markup/model.d.cts +57 -0
- package/dist/markup/model.d.ts +57 -0
- package/dist/markup/model.js +145 -0
- package/dist/markup/parseDom.cjs +59 -0
- package/dist/markup/parseDom.d.cts +7 -0
- package/dist/markup/parseDom.d.ts +7 -0
- package/dist/markup/parseDom.js +35 -0
- package/dist/markup/segmentation.cjs +11 -57
- package/dist/markup/segmentation.d.cts +6 -2
- package/dist/markup/segmentation.d.ts +6 -2
- package/dist/markup/segmentation.js +11 -58
- package/dist/markup/serializeDom.cjs +87 -0
- package/dist/markup/serializeDom.d.cts +7 -0
- package/dist/markup/serializeDom.d.ts +7 -0
- package/dist/markup/serializeDom.js +63 -0
- package/dist/markup/transform.cjs +92 -0
- package/dist/markup/transform.d.cts +11 -0
- package/dist/markup/transform.d.ts +11 -0
- package/dist/markup/transform.js +71 -0
- package/dist/types/node-gyp-build.d.cjs +1 -0
- package/dist/types/node-gyp-build.d.d.cts +3 -0
- package/dist/types/node-gyp-build.d.d.ts +3 -0
- package/dist/types/node-gyp-build.d.js +0 -0
- package/package.json +11 -4
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import assert from "node:assert";
|
|
2
|
+
import { describe, test } from "node:test";
|
|
3
|
+
import { errorAlignBeamSearch as tsBeamSearch } from "../beamSearch.js";
|
|
4
|
+
import {
|
|
5
|
+
computeErrorAlignDistanceMatrix as tsErrorAlign,
|
|
6
|
+
computeLevenshteinDistanceMatrix as tsLevenshtein
|
|
7
|
+
} from "../editDistance.js";
|
|
8
|
+
import { SubgraphMetadata } from "../graphMetadata.js";
|
|
9
|
+
import {
|
|
10
|
+
computeErrorAlignDistanceMatrix as nativeErrorAlign,
|
|
11
|
+
computeLevenshteinDistanceMatrix as nativeLevenshtein,
|
|
12
|
+
errorAlignBeamSearch as nativeBeamSearch
|
|
13
|
+
} from "../native.js";
|
|
14
|
+
import { getAlignments } from "../pathToAlignment.js";
|
|
15
|
+
import {
|
|
16
|
+
basicNormalizer,
|
|
17
|
+
basicTokenizer,
|
|
18
|
+
ensureLengthPreservation,
|
|
19
|
+
unpackRegexMatch
|
|
20
|
+
} from "../utils.js";
|
|
21
|
+
void describe("native C++ vs TypeScript implementations", () => {
|
|
22
|
+
void describe("Levenshtein distance matrix", () => {
|
|
23
|
+
void test("string input", () => {
|
|
24
|
+
const ref = "kitten";
|
|
25
|
+
const hyp = "sitting";
|
|
26
|
+
const tsResult = tsLevenshtein(ref, hyp);
|
|
27
|
+
const nativeResult = nativeLevenshtein(ref, hyp);
|
|
28
|
+
assert.deepStrictEqual(nativeResult, tsResult);
|
|
29
|
+
});
|
|
30
|
+
void test("string array input", () => {
|
|
31
|
+
const ref = ["hello", "world"];
|
|
32
|
+
const hyp = ["hello", "there"];
|
|
33
|
+
const tsResult = tsLevenshtein(ref, hyp);
|
|
34
|
+
const nativeResult = nativeLevenshtein(ref, hyp);
|
|
35
|
+
assert.deepStrictEqual(nativeResult, tsResult);
|
|
36
|
+
});
|
|
37
|
+
void test("with backtrace", () => {
|
|
38
|
+
const ref = "kitten";
|
|
39
|
+
const hyp = "sitting";
|
|
40
|
+
const tsResult = tsLevenshtein(ref, hyp, true);
|
|
41
|
+
const nativeResult = nativeLevenshtein(ref, hyp, true);
|
|
42
|
+
assert.deepStrictEqual(nativeResult, tsResult);
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
void describe("error align distance matrix", () => {
|
|
46
|
+
void test("string input", () => {
|
|
47
|
+
const ref = "test";
|
|
48
|
+
const hyp = "best";
|
|
49
|
+
const tsResult = tsErrorAlign(ref, hyp);
|
|
50
|
+
const nativeResult = nativeErrorAlign(ref, hyp);
|
|
51
|
+
assert.deepStrictEqual(nativeResult, tsResult);
|
|
52
|
+
});
|
|
53
|
+
void test("with backtrace", () => {
|
|
54
|
+
const ref = "test";
|
|
55
|
+
const hyp = "best";
|
|
56
|
+
const tsResult = tsErrorAlign(ref, hyp, true);
|
|
57
|
+
const nativeResult = nativeErrorAlign(ref, hyp, true);
|
|
58
|
+
assert.deepStrictEqual(nativeResult, tsResult);
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
void describe("beam search", () => {
|
|
62
|
+
function buildSubgraphMetadata(ref, hyp) {
|
|
63
|
+
const tokenizer = basicTokenizer;
|
|
64
|
+
const normalizer = basicNormalizer;
|
|
65
|
+
const unpackedTokenizer = unpackRegexMatch(tokenizer);
|
|
66
|
+
const ensuredNormalizer = ensureLengthPreservation(normalizer);
|
|
67
|
+
const refTokenMatches = unpackedTokenizer(ref);
|
|
68
|
+
const hypTokenMatches = unpackedTokenizer(hyp);
|
|
69
|
+
const refNorm = refTokenMatches.map(([r]) => ensuredNormalizer(r));
|
|
70
|
+
const hypNorm = hypTokenMatches.map(([h]) => ensuredNormalizer(h));
|
|
71
|
+
return new SubgraphMetadata(
|
|
72
|
+
ref,
|
|
73
|
+
hyp,
|
|
74
|
+
refTokenMatches,
|
|
75
|
+
hypTokenMatches,
|
|
76
|
+
refNorm,
|
|
77
|
+
hypNorm
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
void test("simple substitution", () => {
|
|
81
|
+
const src = buildSubgraphMetadata("hello", "jello");
|
|
82
|
+
const tsPath = tsBeamSearch(src);
|
|
83
|
+
const nativePath = nativeBeamSearch(src);
|
|
84
|
+
const tsAlignments = getAlignments(tsPath);
|
|
85
|
+
const nativeAlignments = getAlignments(nativePath);
|
|
86
|
+
assert.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
87
|
+
});
|
|
88
|
+
void test("multi-word alignment with all op types", () => {
|
|
89
|
+
const ref = "This is a substitution test deleted.";
|
|
90
|
+
const hyp = "Inserted this is a contribution test.";
|
|
91
|
+
const src = buildSubgraphMetadata(ref, hyp);
|
|
92
|
+
const tsPath = tsBeamSearch(src);
|
|
93
|
+
const nativePath = nativeBeamSearch(src);
|
|
94
|
+
const tsAlignments = getAlignments(tsPath);
|
|
95
|
+
const nativeAlignments = getAlignments(nativePath);
|
|
96
|
+
assert.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
97
|
+
});
|
|
98
|
+
void test("identical strings", () => {
|
|
99
|
+
const src = buildSubgraphMetadata("test words", "test words");
|
|
100
|
+
const tsPath = tsBeamSearch(src);
|
|
101
|
+
const nativePath = nativeBeamSearch(src);
|
|
102
|
+
const tsAlignments = getAlignments(tsPath);
|
|
103
|
+
const nativeAlignments = getAlignments(nativePath);
|
|
104
|
+
assert.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
});
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
var backtraceGraph_exports = {};
|
|
30
|
+
__export(backtraceGraph_exports, {
|
|
31
|
+
BacktraceGraph: () => BacktraceGraph,
|
|
32
|
+
Node: () => Node
|
|
33
|
+
});
|
|
34
|
+
module.exports = __toCommonJS(backtraceGraph_exports);
|
|
35
|
+
var import_node_assert = __toESM(require("node:assert"), 1);
|
|
36
|
+
var import_itertools = require("itertools");
|
|
37
|
+
var import_utils = require("./utils.cjs");
|
|
38
|
+
class Node {
|
|
39
|
+
constructor(hypIndex, refIndex) {
|
|
40
|
+
this.hypIndex = hypIndex;
|
|
41
|
+
this.refIndex = refIndex;
|
|
42
|
+
}
|
|
43
|
+
children = /* @__PURE__ */ new Map();
|
|
44
|
+
parents = /* @__PURE__ */ new Map();
|
|
45
|
+
get index() {
|
|
46
|
+
return [this.hypIndex, this.refIndex];
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Get the offset index of th enode so indices match the hypothesis and reference strings.
|
|
50
|
+
*
|
|
51
|
+
* Root will be at (-1, -1).
|
|
52
|
+
*/
|
|
53
|
+
get offsetIndex() {
|
|
54
|
+
return [this.hypIndex - 1, this.refIndex - 1];
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Check if the node is a terminal node (i.e., it has no children).
|
|
58
|
+
*/
|
|
59
|
+
get isTerminal() {
|
|
60
|
+
return this.children.size === 0;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Check if the node is a root node (i.e., it has no parents).
|
|
64
|
+
*/
|
|
65
|
+
get isRoot() {
|
|
66
|
+
return this.parents.size === 0;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
class NodeMap {
|
|
70
|
+
map;
|
|
71
|
+
constructor(entries) {
|
|
72
|
+
const keyedEntries = entries == null ? void 0 : entries.map(
|
|
73
|
+
([index, node]) => [`${index[0]}-${index[1]}`, { index, node }]
|
|
74
|
+
);
|
|
75
|
+
this.map = new Map(keyedEntries);
|
|
76
|
+
}
|
|
77
|
+
get([hypIndex, refIndex]) {
|
|
78
|
+
var _a;
|
|
79
|
+
const key = `${hypIndex}-${refIndex}`;
|
|
80
|
+
return (_a = this.map.get(key)) == null ? void 0 : _a.node;
|
|
81
|
+
}
|
|
82
|
+
set([hypIndex, refIndex], node) {
|
|
83
|
+
const key = `${hypIndex}-${refIndex}`;
|
|
84
|
+
this.map.set(key, { index: [hypIndex, refIndex], node });
|
|
85
|
+
}
|
|
86
|
+
has([hypIndex, refIndex]) {
|
|
87
|
+
const key = `${hypIndex}-${refIndex}`;
|
|
88
|
+
return this.map.has(key);
|
|
89
|
+
}
|
|
90
|
+
*entries() {
|
|
91
|
+
for (const { index, node } of this.map.values()) {
|
|
92
|
+
yield [index, node];
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
*values() {
|
|
96
|
+
for (const { node } of this.map.values()) {
|
|
97
|
+
yield node;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
class BacktraceGraph {
|
|
102
|
+
constructor(backtrackMatrix) {
|
|
103
|
+
this.backtrackMatrix = backtrackMatrix;
|
|
104
|
+
this.hypDim = backtrackMatrix.length;
|
|
105
|
+
this.refDim = backtrackMatrix[0].length;
|
|
106
|
+
this.hypMaxIndex = this.hypDim - 1;
|
|
107
|
+
this.refMaxIndex = this.refDim - 1;
|
|
108
|
+
}
|
|
109
|
+
hypDim;
|
|
110
|
+
refDim;
|
|
111
|
+
hypMaxIndex;
|
|
112
|
+
refMaxIndex;
|
|
113
|
+
_nodes = null;
|
|
114
|
+
/**
|
|
115
|
+
* Get the nodes in the graph.
|
|
116
|
+
*/
|
|
117
|
+
get nodes() {
|
|
118
|
+
if (this._nodes) return this._nodes;
|
|
119
|
+
const terminalNode = new Node(this.hypMaxIndex, this.refMaxIndex);
|
|
120
|
+
this._nodes = new NodeMap([[terminalNode.index, terminalNode]]);
|
|
121
|
+
for (const index of this.iterTopologicalOrder({ reverse: true })) {
|
|
122
|
+
if (this._nodes.has(index) && (index[0] !== 0 || index[1] !== 0)) {
|
|
123
|
+
this.addParentsFromBacktrace(index);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
this._nodes = new NodeMap(
|
|
127
|
+
Array.from(this._nodes.entries()).toSorted(([[a1, a2]], [[b1, b2]]) => {
|
|
128
|
+
if (a1 === b1) return a2 - b2;
|
|
129
|
+
return a1 - b1;
|
|
130
|
+
})
|
|
131
|
+
);
|
|
132
|
+
return this._nodes;
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Get the node at the given index.
|
|
136
|
+
*
|
|
137
|
+
* @param hypIndex Hyp/row index.
|
|
138
|
+
* @param refIndex Ref/column index.
|
|
139
|
+
*/
|
|
140
|
+
getNode(hypIndex, refIndex) {
|
|
141
|
+
return this.nodes.get([hypIndex, refIndex]);
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Get the set of all node indices in the graph.
|
|
145
|
+
*/
|
|
146
|
+
getNodeSet() {
|
|
147
|
+
const transitions = /* @__PURE__ */ new Set();
|
|
148
|
+
for (const node of this.nodes.values()) {
|
|
149
|
+
transitions.add(node.offsetIndex);
|
|
150
|
+
}
|
|
151
|
+
return transitions;
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Get a path through the graph.
|
|
155
|
+
*
|
|
156
|
+
* @param sample If true, sample a path randomly based on transition probabilities.
|
|
157
|
+
* Otherwise, return the first path deterministically.
|
|
158
|
+
* @returns A list of nodes representing the path.
|
|
159
|
+
*/
|
|
160
|
+
getPath({ sample } = { sample: false }) {
|
|
161
|
+
let node = this.getNode(0, 0);
|
|
162
|
+
(0, import_node_assert.default)(node == null ? void 0 : node.isRoot, "The node at (-1, -1) was expected to be a root node.");
|
|
163
|
+
const path = [];
|
|
164
|
+
while (!node.isTerminal) {
|
|
165
|
+
const opType = sample ? choose(Array.from(node.children.keys())) : (
|
|
166
|
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
167
|
+
node.children.keys().next().value
|
|
168
|
+
);
|
|
169
|
+
node = node.children.get(opType);
|
|
170
|
+
path.push([opType, node]);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Get nodes that can only be accounted for by a match.
|
|
175
|
+
*
|
|
176
|
+
* @returns A list of index tuples representing the unambiguous node matches.
|
|
177
|
+
*/
|
|
178
|
+
getUnambiguousNodeMatches() {
|
|
179
|
+
const matchIndices = /* @__PURE__ */ new Set();
|
|
180
|
+
const matchPerToken = {
|
|
181
|
+
ref: new import_utils.Counter(),
|
|
182
|
+
hyp: new import_utils.Counter()
|
|
183
|
+
};
|
|
184
|
+
const refOpTypes = /* @__PURE__ */ new Set(["MATCH", "SUBSTITUTE", "DELETE"]);
|
|
185
|
+
const hypOpTypes = /* @__PURE__ */ new Set(["MATCH", "SUBSTITUTE", "INSERT"]);
|
|
186
|
+
for (const [[hypIndex, refIndex], node] of this.nodes.entries()) {
|
|
187
|
+
if (node.parents.size === 1 && node.parents.has("MATCH")) {
|
|
188
|
+
matchIndices.add([hypIndex, refIndex]);
|
|
189
|
+
}
|
|
190
|
+
if (refOpTypes.intersection(node.parents).size) {
|
|
191
|
+
matchPerToken.ref.set(refIndex, matchPerToken.ref.get(refIndex) + 1);
|
|
192
|
+
}
|
|
193
|
+
if (hypOpTypes.intersection(node.parents).size) {
|
|
194
|
+
matchPerToken.hyp.set(hypIndex, matchPerToken.hyp.get(hypIndex) + 1);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
const unambiguousMatches = [];
|
|
198
|
+
for (const [hypIndex, refIndex] of matchIndices) {
|
|
199
|
+
if (matchPerToken.ref.get(refIndex) === 1 && matchPerToken.hyp.get(hypIndex) === 1) {
|
|
200
|
+
unambiguousMatches.push([hypIndex - 1, refIndex - 1]);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
return unambiguousMatches.toSorted(([_a, a], [_b, b]) => a - b);
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Get word spans (i.e., <...>) that are unambiguously matched.
|
|
207
|
+
*
|
|
208
|
+
* That is, there is only one subpath that can account for the span using MATCH operations.
|
|
209
|
+
* Other subpaths that include INSERT, DELETE, SUBSTITUTE operations are not considered.
|
|
210
|
+
*
|
|
211
|
+
* @returns A list of index tuples representing the end node of unambiguous span matches.
|
|
212
|
+
*/
|
|
213
|
+
getUnambiguousTokenSpanMatches(ref) {
|
|
214
|
+
var _a;
|
|
215
|
+
ref = "_" + ref;
|
|
216
|
+
const monoMatchEndNodes = /* @__PURE__ */ new Set();
|
|
217
|
+
const refIndexes = new import_utils.Counter();
|
|
218
|
+
const hypIndexes = new import_utils.Counter();
|
|
219
|
+
for (const [[hypIndex, refIndex], node] of this.nodes.entries()) {
|
|
220
|
+
if (!node.parents.has("MATCH") || ref[refIndex] !== import_utils.START_DELIMITER)
|
|
221
|
+
continue;
|
|
222
|
+
let _refIndex = refIndex + 1;
|
|
223
|
+
let _hypIndex = hypIndex + 1;
|
|
224
|
+
while (true) {
|
|
225
|
+
const _index = [_hypIndex, _refIndex];
|
|
226
|
+
if (!this.nodes.has(_index)) {
|
|
227
|
+
break;
|
|
228
|
+
}
|
|
229
|
+
if (!((_a = this.nodes.get(_index)) == null ? void 0 : _a.parents)) {
|
|
230
|
+
break;
|
|
231
|
+
}
|
|
232
|
+
if (ref[_refIndex] === import_utils.END_DELIMITER) {
|
|
233
|
+
const endIndex = _index;
|
|
234
|
+
monoMatchEndNodes.add(endIndex);
|
|
235
|
+
refIndexes.set(_refIndex, refIndexes.get(_refIndex) + 1);
|
|
236
|
+
hypIndexes.set(_hypIndex, hypIndexes.get(_hypIndex) + 1);
|
|
237
|
+
break;
|
|
238
|
+
}
|
|
239
|
+
_refIndex += 1;
|
|
240
|
+
_hypIndex += 1;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
return new Set(
|
|
244
|
+
Array.from(monoMatchEndNodes).filter(([h, r]) => hypIndexes.get(h) === 1 && refIndexes.get(r) === 1).map(([h, r]) => [h - 1, r - 1])
|
|
245
|
+
);
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Create a parent node based on the index of the current node and the operation type.
|
|
249
|
+
*/
|
|
250
|
+
parentNodeFromOpType(index, opType) {
|
|
251
|
+
const hypIndex = opType !== "DELETE" ? index[0] - 1 : index[0];
|
|
252
|
+
const refIndex = opType !== "INSERT" ? index[1] - 1 : index[1];
|
|
253
|
+
const parentIndex = [hypIndex, refIndex];
|
|
254
|
+
const nodes = this._nodes;
|
|
255
|
+
(0, import_node_assert.default)(!!nodes, "Called parentIndexFromOpType before instantiating _nodes");
|
|
256
|
+
if (!nodes.has(parentIndex)) {
|
|
257
|
+
nodes.set(parentIndex, new Node(...parentIndex));
|
|
258
|
+
}
|
|
259
|
+
return nodes.get(parentIndex);
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Iterate through the nodes in topological order.
|
|
263
|
+
*/
|
|
264
|
+
*iterTopologicalOrder({ reverse } = { reverse: false }) {
|
|
265
|
+
for (const i of reverse ? (0, import_utils.reversed)((0, import_itertools.range)(this.hypDim)) : (0, import_itertools.range)(this.hypDim)) {
|
|
266
|
+
for (const j of reverse ? (0, import_utils.reversed)((0, import_itertools.range)(this.refDim)) : (0, import_itertools.range)(this.refDim)) {
|
|
267
|
+
yield [i, j];
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Add parents to the node at the given index based on the backtrace matrix.
|
|
273
|
+
*/
|
|
274
|
+
addParentsFromBacktrace(index) {
|
|
275
|
+
var _a;
|
|
276
|
+
const node = (_a = this._nodes) == null ? void 0 : _a.get(index);
|
|
277
|
+
(0, import_node_assert.default)(
|
|
278
|
+
!!node,
|
|
279
|
+
`Node at index ${index.toString()} does not exist in the graph.`
|
|
280
|
+
);
|
|
281
|
+
const opTypeComboCode = this.backtrackMatrix[node.hypIndex][node.refIndex];
|
|
282
|
+
const opTypeCombo = import_utils.OP_TYPE_COMBO_MAP[opTypeComboCode];
|
|
283
|
+
for (const opType of opTypeCombo) {
|
|
284
|
+
const parentNode = this.parentNodeFromOpType(node.index, opType);
|
|
285
|
+
node.parents.set(opType, parentNode);
|
|
286
|
+
parentNode.children.set(opType, node);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
function choose(choices) {
|
|
291
|
+
const index = Math.round(Math.random() * choices.length);
|
|
292
|
+
return choices[index];
|
|
293
|
+
}
|
|
294
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
295
|
+
0 && (module.exports = {
|
|
296
|
+
BacktraceGraph,
|
|
297
|
+
Node
|
|
298
|
+
});
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { OpType } from './utils.cjs';
|
|
2
|
+
|
|
3
|
+
type Index = readonly [number, number];
|
|
4
|
+
/**
|
|
5
|
+
* Node in the backtrace graph corresponding to the index (i, j) in the backtrace matrix.
|
|
6
|
+
*/
|
|
7
|
+
declare class Node {
|
|
8
|
+
hypIndex: number;
|
|
9
|
+
refIndex: number;
|
|
10
|
+
children: Map<OpType, Node>;
|
|
11
|
+
parents: Map<OpType, Node>;
|
|
12
|
+
constructor(hypIndex: number, refIndex: number);
|
|
13
|
+
get index(): Index;
|
|
14
|
+
/**
|
|
15
|
+
* Get the offset index of th enode so indices match the hypothesis and reference strings.
|
|
16
|
+
*
|
|
17
|
+
* Root will be at (-1, -1).
|
|
18
|
+
*/
|
|
19
|
+
get offsetIndex(): Index;
|
|
20
|
+
/**
|
|
21
|
+
* Check if the node is a terminal node (i.e., it has no children).
|
|
22
|
+
*/
|
|
23
|
+
get isTerminal(): boolean;
|
|
24
|
+
/**
|
|
25
|
+
* Check if the node is a root node (i.e., it has no parents).
|
|
26
|
+
*/
|
|
27
|
+
get isRoot(): boolean;
|
|
28
|
+
}
|
|
29
|
+
declare class NodeMap {
|
|
30
|
+
private map;
|
|
31
|
+
constructor(entries?: readonly (readonly [Index, Node])[] | null);
|
|
32
|
+
get([hypIndex, refIndex]: Index): Node | undefined;
|
|
33
|
+
set([hypIndex, refIndex]: Index, node: Node): void;
|
|
34
|
+
has([hypIndex, refIndex]: Index): boolean;
|
|
35
|
+
entries(): Generator<readonly [Index, Node], void, unknown>;
|
|
36
|
+
values(): Generator<Node, void, unknown>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Backtrace alignment graph
|
|
40
|
+
*/
|
|
41
|
+
declare class BacktraceGraph {
|
|
42
|
+
backtrackMatrix: number[][];
|
|
43
|
+
hypDim: number;
|
|
44
|
+
refDim: number;
|
|
45
|
+
hypMaxIndex: number;
|
|
46
|
+
refMaxIndex: number;
|
|
47
|
+
private _nodes;
|
|
48
|
+
constructor(backtrackMatrix: number[][]);
|
|
49
|
+
/**
|
|
50
|
+
* Get the nodes in the graph.
|
|
51
|
+
*/
|
|
52
|
+
get nodes(): NodeMap;
|
|
53
|
+
/**
|
|
54
|
+
* Get the node at the given index.
|
|
55
|
+
*
|
|
56
|
+
* @param hypIndex Hyp/row index.
|
|
57
|
+
* @param refIndex Ref/column index.
|
|
58
|
+
*/
|
|
59
|
+
getNode(hypIndex: number, refIndex: number): Node | undefined;
|
|
60
|
+
/**
|
|
61
|
+
* Get the set of all node indices in the graph.
|
|
62
|
+
*/
|
|
63
|
+
getNodeSet(): Set<Index>;
|
|
64
|
+
/**
|
|
65
|
+
* Get a path through the graph.
|
|
66
|
+
*
|
|
67
|
+
* @param sample If true, sample a path randomly based on transition probabilities.
|
|
68
|
+
* Otherwise, return the first path deterministically.
|
|
69
|
+
* @returns A list of nodes representing the path.
|
|
70
|
+
*/
|
|
71
|
+
getPath({ sample }?: {
|
|
72
|
+
sample: boolean;
|
|
73
|
+
}): void;
|
|
74
|
+
/**
|
|
75
|
+
* Get nodes that can only be accounted for by a match.
|
|
76
|
+
*
|
|
77
|
+
* @returns A list of index tuples representing the unambiguous node matches.
|
|
78
|
+
*/
|
|
79
|
+
getUnambiguousNodeMatches(): Index[];
|
|
80
|
+
/**
|
|
81
|
+
* Get word spans (i.e., <...>) that are unambiguously matched.
|
|
82
|
+
*
|
|
83
|
+
* That is, there is only one subpath that can account for the span using MATCH operations.
|
|
84
|
+
* Other subpaths that include INSERT, DELETE, SUBSTITUTE operations are not considered.
|
|
85
|
+
*
|
|
86
|
+
* @returns A list of index tuples representing the end node of unambiguous span matches.
|
|
87
|
+
*/
|
|
88
|
+
getUnambiguousTokenSpanMatches(ref: string): Set<Index>;
|
|
89
|
+
/**
|
|
90
|
+
* Create a parent node based on the index of the current node and the operation type.
|
|
91
|
+
*/
|
|
92
|
+
private parentNodeFromOpType;
|
|
93
|
+
/**
|
|
94
|
+
* Iterate through the nodes in topological order.
|
|
95
|
+
*/
|
|
96
|
+
private iterTopologicalOrder;
|
|
97
|
+
/**
|
|
98
|
+
* Add parents to the node at the given index based on the backtrace matrix.
|
|
99
|
+
*/
|
|
100
|
+
private addParentsFromBacktrace;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export { BacktraceGraph, type Index, Node };
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { OpType } from './utils.js';
|
|
2
|
+
|
|
3
|
+
type Index = readonly [number, number];
|
|
4
|
+
/**
|
|
5
|
+
* Node in the backtrace graph corresponding to the index (i, j) in the backtrace matrix.
|
|
6
|
+
*/
|
|
7
|
+
declare class Node {
|
|
8
|
+
hypIndex: number;
|
|
9
|
+
refIndex: number;
|
|
10
|
+
children: Map<OpType, Node>;
|
|
11
|
+
parents: Map<OpType, Node>;
|
|
12
|
+
constructor(hypIndex: number, refIndex: number);
|
|
13
|
+
get index(): Index;
|
|
14
|
+
/**
|
|
15
|
+
* Get the offset index of th enode so indices match the hypothesis and reference strings.
|
|
16
|
+
*
|
|
17
|
+
* Root will be at (-1, -1).
|
|
18
|
+
*/
|
|
19
|
+
get offsetIndex(): Index;
|
|
20
|
+
/**
|
|
21
|
+
* Check if the node is a terminal node (i.e., it has no children).
|
|
22
|
+
*/
|
|
23
|
+
get isTerminal(): boolean;
|
|
24
|
+
/**
|
|
25
|
+
* Check if the node is a root node (i.e., it has no parents).
|
|
26
|
+
*/
|
|
27
|
+
get isRoot(): boolean;
|
|
28
|
+
}
|
|
29
|
+
declare class NodeMap {
|
|
30
|
+
private map;
|
|
31
|
+
constructor(entries?: readonly (readonly [Index, Node])[] | null);
|
|
32
|
+
get([hypIndex, refIndex]: Index): Node | undefined;
|
|
33
|
+
set([hypIndex, refIndex]: Index, node: Node): void;
|
|
34
|
+
has([hypIndex, refIndex]: Index): boolean;
|
|
35
|
+
entries(): Generator<readonly [Index, Node], void, unknown>;
|
|
36
|
+
values(): Generator<Node, void, unknown>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Backtrace alignment graph
|
|
40
|
+
*/
|
|
41
|
+
declare class BacktraceGraph {
|
|
42
|
+
backtrackMatrix: number[][];
|
|
43
|
+
hypDim: number;
|
|
44
|
+
refDim: number;
|
|
45
|
+
hypMaxIndex: number;
|
|
46
|
+
refMaxIndex: number;
|
|
47
|
+
private _nodes;
|
|
48
|
+
constructor(backtrackMatrix: number[][]);
|
|
49
|
+
/**
|
|
50
|
+
* Get the nodes in the graph.
|
|
51
|
+
*/
|
|
52
|
+
get nodes(): NodeMap;
|
|
53
|
+
/**
|
|
54
|
+
* Get the node at the given index.
|
|
55
|
+
*
|
|
56
|
+
* @param hypIndex Hyp/row index.
|
|
57
|
+
* @param refIndex Ref/column index.
|
|
58
|
+
*/
|
|
59
|
+
getNode(hypIndex: number, refIndex: number): Node | undefined;
|
|
60
|
+
/**
|
|
61
|
+
* Get the set of all node indices in the graph.
|
|
62
|
+
*/
|
|
63
|
+
getNodeSet(): Set<Index>;
|
|
64
|
+
/**
|
|
65
|
+
* Get a path through the graph.
|
|
66
|
+
*
|
|
67
|
+
* @param sample If true, sample a path randomly based on transition probabilities.
|
|
68
|
+
* Otherwise, return the first path deterministically.
|
|
69
|
+
* @returns A list of nodes representing the path.
|
|
70
|
+
*/
|
|
71
|
+
getPath({ sample }?: {
|
|
72
|
+
sample: boolean;
|
|
73
|
+
}): void;
|
|
74
|
+
/**
|
|
75
|
+
* Get nodes that can only be accounted for by a match.
|
|
76
|
+
*
|
|
77
|
+
* @returns A list of index tuples representing the unambiguous node matches.
|
|
78
|
+
*/
|
|
79
|
+
getUnambiguousNodeMatches(): Index[];
|
|
80
|
+
/**
|
|
81
|
+
* Get word spans (i.e., <...>) that are unambiguously matched.
|
|
82
|
+
*
|
|
83
|
+
* That is, there is only one subpath that can account for the span using MATCH operations.
|
|
84
|
+
* Other subpaths that include INSERT, DELETE, SUBSTITUTE operations are not considered.
|
|
85
|
+
*
|
|
86
|
+
* @returns A list of index tuples representing the end node of unambiguous span matches.
|
|
87
|
+
*/
|
|
88
|
+
getUnambiguousTokenSpanMatches(ref: string): Set<Index>;
|
|
89
|
+
/**
|
|
90
|
+
* Create a parent node based on the index of the current node and the operation type.
|
|
91
|
+
*/
|
|
92
|
+
private parentNodeFromOpType;
|
|
93
|
+
/**
|
|
94
|
+
* Iterate through the nodes in topological order.
|
|
95
|
+
*/
|
|
96
|
+
private iterTopologicalOrder;
|
|
97
|
+
/**
|
|
98
|
+
* Add parents to the node at the given index based on the backtrace matrix.
|
|
99
|
+
*/
|
|
100
|
+
private addParentsFromBacktrace;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export { BacktraceGraph, type Index, Node };
|