@storyteller-platform/align 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/dist/align/__tests__/align.test.cjs +6 -5
  2. package/dist/align/__tests__/align.test.js +6 -5
  3. package/dist/align/align.cjs +133 -81
  4. package/dist/align/align.d.cts +1 -0
  5. package/dist/align/align.d.ts +1 -0
  6. package/dist/align/align.js +133 -81
  7. package/dist/align/getSentenceRanges.cjs +78 -149
  8. package/dist/align/getSentenceRanges.d.cts +1 -1
  9. package/dist/align/getSentenceRanges.d.ts +1 -1
  10. package/dist/align/getSentenceRanges.js +78 -149
  11. package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
  12. package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
  13. package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
  14. package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
  15. package/dist/errorAlign/__tests__/native.test.cjs +118 -0
  16. package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
  17. package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
  18. package/dist/errorAlign/__tests__/native.test.js +107 -0
  19. package/dist/errorAlign/backtraceGraph.cjs +298 -0
  20. package/dist/errorAlign/backtraceGraph.d.cts +103 -0
  21. package/dist/errorAlign/backtraceGraph.d.ts +103 -0
  22. package/dist/errorAlign/backtraceGraph.js +270 -0
  23. package/dist/errorAlign/beamSearch.cjs +302 -0
  24. package/dist/errorAlign/beamSearch.d.cts +53 -0
  25. package/dist/errorAlign/beamSearch.d.ts +53 -0
  26. package/dist/errorAlign/beamSearch.js +268 -0
  27. package/dist/errorAlign/core.cjs +33 -0
  28. package/dist/errorAlign/core.d.cts +5 -0
  29. package/dist/errorAlign/core.d.ts +5 -0
  30. package/dist/errorAlign/core.js +11 -0
  31. package/dist/errorAlign/editDistance.cjs +115 -0
  32. package/dist/errorAlign/editDistance.d.cts +46 -0
  33. package/dist/errorAlign/editDistance.d.ts +46 -0
  34. package/dist/errorAlign/editDistance.js +90 -0
  35. package/dist/errorAlign/errorAlign.cjs +159 -0
  36. package/dist/errorAlign/errorAlign.d.cts +15 -0
  37. package/dist/errorAlign/errorAlign.d.ts +15 -0
  38. package/dist/errorAlign/errorAlign.js +145 -0
  39. package/dist/errorAlign/graphMetadata.cjs +97 -0
  40. package/dist/errorAlign/graphMetadata.d.cts +44 -0
  41. package/dist/errorAlign/graphMetadata.d.ts +44 -0
  42. package/dist/errorAlign/graphMetadata.js +64 -0
  43. package/dist/errorAlign/hash.cjs +173 -0
  44. package/dist/errorAlign/hash.d.cts +28 -0
  45. package/dist/errorAlign/hash.d.ts +28 -0
  46. package/dist/errorAlign/hash.js +150 -0
  47. package/dist/errorAlign/native.cjs +60 -0
  48. package/dist/errorAlign/native.d.cts +18 -0
  49. package/dist/errorAlign/native.d.ts +18 -0
  50. package/dist/errorAlign/native.js +24 -0
  51. package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
  52. package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
  53. package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
  54. package/dist/errorAlign/node-gyp-build.d.js +0 -0
  55. package/dist/errorAlign/pathToAlignment.cjs +122 -0
  56. package/dist/errorAlign/pathToAlignment.d.cts +11 -0
  57. package/dist/errorAlign/pathToAlignment.d.ts +11 -0
  58. package/dist/errorAlign/pathToAlignment.js +89 -0
  59. package/dist/errorAlign/utils.cjs +301 -0
  60. package/dist/errorAlign/utils.d.cts +107 -0
  61. package/dist/errorAlign/utils.d.ts +107 -0
  62. package/dist/errorAlign/utils.js +248 -0
  63. package/dist/index.d.cts +1 -0
  64. package/dist/index.d.ts +1 -0
  65. package/dist/markup/__tests__/markup.test.cjs +108 -81
  66. package/dist/markup/__tests__/markup.test.js +109 -82
  67. package/dist/markup/__tests__/parseDom.test.cjs +112 -0
  68. package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
  69. package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
  70. package/dist/markup/__tests__/parseDom.test.js +89 -0
  71. package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
  72. package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
  73. package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
  74. package/dist/markup/__tests__/serializeDom.test.js +97 -0
  75. package/dist/markup/__tests__/transform.test.cjs +122 -0
  76. package/dist/markup/__tests__/transform.test.d.cts +2 -0
  77. package/dist/markup/__tests__/transform.test.d.ts +2 -0
  78. package/dist/markup/__tests__/transform.test.js +99 -0
  79. package/dist/markup/map.cjs +261 -0
  80. package/dist/markup/map.d.cts +50 -0
  81. package/dist/markup/map.d.ts +50 -0
  82. package/dist/markup/map.js +236 -0
  83. package/dist/markup/markup.cjs +23 -201
  84. package/dist/markup/markup.d.cts +5 -9
  85. package/dist/markup/markup.d.ts +5 -9
  86. package/dist/markup/markup.js +24 -203
  87. package/dist/markup/model.cjs +172 -0
  88. package/dist/markup/model.d.cts +57 -0
  89. package/dist/markup/model.d.ts +57 -0
  90. package/dist/markup/model.js +145 -0
  91. package/dist/markup/parseDom.cjs +59 -0
  92. package/dist/markup/parseDom.d.cts +7 -0
  93. package/dist/markup/parseDom.d.ts +7 -0
  94. package/dist/markup/parseDom.js +35 -0
  95. package/dist/markup/segmentation.cjs +11 -57
  96. package/dist/markup/segmentation.d.cts +6 -2
  97. package/dist/markup/segmentation.d.ts +6 -2
  98. package/dist/markup/segmentation.js +11 -58
  99. package/dist/markup/serializeDom.cjs +87 -0
  100. package/dist/markup/serializeDom.d.cts +7 -0
  101. package/dist/markup/serializeDom.d.ts +7 -0
  102. package/dist/markup/serializeDom.js +63 -0
  103. package/dist/markup/transform.cjs +92 -0
  104. package/dist/markup/transform.d.cts +11 -0
  105. package/dist/markup/transform.d.ts +11 -0
  106. package/dist/markup/transform.js +71 -0
  107. package/dist/types/node-gyp-build.d.cjs +1 -0
  108. package/dist/types/node-gyp-build.d.d.cts +3 -0
  109. package/dist/types/node-gyp-build.d.d.ts +3 -0
  110. package/dist/types/node-gyp-build.d.js +0 -0
  111. package/package.json +11 -4
@@ -0,0 +1,268 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ import assert from "node:assert";
3
+ import { hash } from "./hash.js";
4
+ import { END_DELIMITER, START_DELIMITER, translateSlice } from "./utils.js";
5
+ const INT64_MASK = (1n << 64n) - 1n;
6
+ const SORT_ID_BASE = 146527n;
7
+ class Path {
8
+ constructor(src) {
9
+ this.src = src;
10
+ }
11
+ refIndex = -1;
12
+ hypIndex = -1;
13
+ lastHypIndex = -1;
14
+ lastRefIndex = -1;
15
+ closedCost = 0;
16
+ openCost = 0;
17
+ atUnambiguousMatchNode = false;
18
+ endIndices = [];
19
+ sortId = 0n;
20
+ /**
21
+ * Get the ID of the path used for pruning.
22
+ */
23
+ get pruneId() {
24
+ return hash([
25
+ this.hypIndex,
26
+ this.refIndex,
27
+ this.lastHypIndex,
28
+ this.lastRefIndex
29
+ ]);
30
+ }
31
+ /**
32
+ * Get the cost of the path.
33
+ */
34
+ get cost() {
35
+ const isSub = isSubstitution(
36
+ this.hypIndex,
37
+ this.refIndex,
38
+ this.lastHypIndex,
39
+ this.lastRefIndex
40
+ );
41
+ return this.closedCost + this.openCost + (isSub ? this.openCost : 0);
42
+ }
43
+ /**
44
+ * Get the normalized cost of the path.
45
+ */
46
+ get normCost() {
47
+ const cost = this.cost;
48
+ if (cost === 0) return 0;
49
+ return cost / (this.refIndex + this.hypIndex + 3);
50
+ }
51
+ /**
52
+ * Get the current node index of the path.
53
+ */
54
+ get index() {
55
+ return [this.hypIndex, this.refIndex];
56
+ }
57
+ /**
58
+ * Check if the path has reached the terminal node.
59
+ */
60
+ get atEnd() {
61
+ return this.hypIndex === this.src.hypMaxIndex && this.refIndex === this.src.refMaxIndex;
62
+ }
63
+ /**
64
+ * Update the sort ID for path ordering. Ensures identical behavior as C++ implementation.
65
+ */
66
+ updateSortId(t) {
67
+ this.sortId = this.sortId * SORT_ID_BASE + t & INT64_MASK;
68
+ }
69
+ }
70
+ function* expand(parent) {
71
+ const deletePath = addDelete(parent);
72
+ if (deletePath) yield deletePath;
73
+ const insertPath = addInsert(parent);
74
+ if (insertPath) yield insertPath;
75
+ const subOrMatchPath = addSubstitutionOrMatch(parent);
76
+ if (subOrMatchPath) yield subOrMatchPath;
77
+ }
78
+ function addSubstitutionOrMatch(parent) {
79
+ if (parent.refIndex >= parent.src.refMaxIndex || parent.hypIndex >= parent.src.hypMaxIndex) {
80
+ return null;
81
+ }
82
+ let child = transitionToChildNode(parent, {
83
+ refStep: 1,
84
+ hypStep: 1
85
+ });
86
+ const isMatch = parent.src.ref[child.refIndex] === parent.src.hyp[child.hypIndex];
87
+ if (!isMatch) {
88
+ const refIsDelimiter = parent.src.refCharTypes[child.refIndex] === 0;
89
+ const hypIsDelimiter = parent.src.hypCharTypes[child.hypIndex] === 0;
90
+ if (refIsDelimiter || hypIsDelimiter) return null;
91
+ }
92
+ if (parent.src.ref[child.refIndex] === START_DELIMITER) {
93
+ endInsertionSegment(child, parent.hypIndex, parent.refIndex);
94
+ }
95
+ if (!isMatch) {
96
+ const isBacktrace = parent.src.backtraceNodeSet.has(parent.index);
97
+ const isLetterTypeMatch = parent.src.refCharTypes[child.refIndex] === parent.src.hypCharTypes[child.hypIndex];
98
+ child.openCost += isLetterTypeMatch ? 2 : 3;
99
+ child.openCost += isBacktrace ? 0 : 1;
100
+ }
101
+ if (child.src.ref[child.refIndex] === END_DELIMITER) {
102
+ child = endSegment(child);
103
+ }
104
+ return child;
105
+ }
106
+ function addInsert(parent) {
107
+ if (parent.refIndex >= parent.src.refMaxIndex) {
108
+ return null;
109
+ }
110
+ let child = transitionToChildNode(parent, {
111
+ refStep: 1,
112
+ hypStep: 0
113
+ });
114
+ if (parent.src.ref[child.refIndex] === START_DELIMITER) {
115
+ endInsertionSegment(child, parent.hypIndex, parent.refIndex);
116
+ }
117
+ const isBacktrace = parent.src.backtraceNodeSet.has(parent.index);
118
+ const isDelimiter = parent.src.refCharTypes[child.refIndex] === 0;
119
+ child.openCost += isDelimiter ? 1 : 2;
120
+ child.openCost += isBacktrace || isDelimiter ? 0 : 1;
121
+ if (child.src.ref[child.refIndex] === END_DELIMITER) {
122
+ child = endSegment(child);
123
+ }
124
+ return child;
125
+ }
126
+ function addDelete(parent) {
127
+ if (parent.hypIndex >= parent.src.hypMaxIndex) {
128
+ return null;
129
+ }
130
+ const child = transitionToChildNode(parent, { refStep: 0, hypStep: 1 });
131
+ const isBacktrace = parent.src.backtraceNodeSet.has(parent.index);
132
+ const isDelimiter = parent.src.hypCharTypes[child.hypIndex] === 0;
133
+ child.openCost += isDelimiter ? 1 : 2;
134
+ child.openCost += isBacktrace || isDelimiter ? 0 : 1;
135
+ if (child.src.hyp[child.hypIndex] === END_DELIMITER) {
136
+ endInsertionSegment(child, child.hypIndex, child.refIndex);
137
+ }
138
+ return child;
139
+ }
140
+ function resetSegmentVariables(path, hypIndex, refIndex) {
141
+ path.closedCost += path.openCost;
142
+ const isSub = isSubstitution(
143
+ hypIndex,
144
+ refIndex,
145
+ path.lastHypIndex,
146
+ path.lastRefIndex
147
+ );
148
+ path.closedCost += isSub ? path.openCost : 0;
149
+ path.lastHypIndex = hypIndex;
150
+ path.lastRefIndex = refIndex;
151
+ path.openCost = 0;
152
+ }
153
+ function endInsertionSegment(path, hypIndex, refIndex) {
154
+ const hypSlice = translateSlice(
155
+ [path.lastHypIndex + 1, hypIndex + 1],
156
+ path.src.hypIndexMap
157
+ );
158
+ const refIsEmpty = refIndex === path.lastRefIndex;
159
+ if (hypSlice && refIsEmpty) {
160
+ path.endIndices = path.endIndices.concat([
161
+ [path.hypIndex, path.refIndex, path.openCost]
162
+ ]);
163
+ resetSegmentVariables(path, hypIndex, refIndex);
164
+ }
165
+ }
166
+ function endSegment(path) {
167
+ const hypSlice = translateSlice(
168
+ [path.lastHypIndex + 1, path.hypIndex + 1],
169
+ path.src.hypIndexMap
170
+ );
171
+ const refSlice = translateSlice(
172
+ [path.lastRefIndex + 1, path.refIndex + 1],
173
+ path.src.refIndexMap
174
+ );
175
+ assert(!!refSlice);
176
+ const hypIsEmpty = path.hypIndex === path.lastHypIndex;
177
+ if (hypIsEmpty) {
178
+ path.endIndices = path.endIndices.concat([
179
+ [path.hypIndex, path.refIndex, path.openCost]
180
+ ]);
181
+ } else {
182
+ if (!hypSlice) {
183
+ return null;
184
+ }
185
+ const isMatchSegment = path.openCost === 0;
186
+ path.atUnambiguousMatchNode = isMatchSegment && path.src.unambiguousMatches.has(path.index);
187
+ path.endIndices = path.endIndices.concat([
188
+ [path.hypIndex, path.refIndex, path.openCost]
189
+ ]);
190
+ }
191
+ resetSegmentVariables(path, path.hypIndex, path.refIndex);
192
+ return path;
193
+ }
194
+ function transitionToChildNode(parent, { refStep, hypStep }) {
195
+ const child = new Path(parent.src);
196
+ child.refIndex = parent.refIndex + refStep;
197
+ child.hypIndex = parent.hypIndex + hypStep;
198
+ child.lastHypIndex = parent.lastHypIndex;
199
+ child.lastRefIndex = parent.lastRefIndex;
200
+ child.closedCost = parent.closedCost;
201
+ child.openCost = parent.openCost;
202
+ child.atUnambiguousMatchNode = false;
203
+ child.endIndices = parent.endIndices;
204
+ child.sortId = parent.sortId;
205
+ child.updateSortId(BigInt(refStep + refStep + hypStep));
206
+ return child;
207
+ }
208
+ function isSubstitution(hypIndex, refIndex, lastHypIndex, lastRefIndex) {
209
+ return !(refIndex === lastRefIndex || hypIndex === lastHypIndex);
210
+ }
211
+ function errorAlignBeamSearch(src, beamSize = 100) {
212
+ var _a;
213
+ const startPath = new Path(src);
214
+ let beam = [startPath];
215
+ let pruneMap = {};
216
+ const ended = [];
217
+ while (beam.length > 0) {
218
+ const newBeam = {};
219
+ for (const path of beam) {
220
+ if (path.atEnd) {
221
+ ended.push(path);
222
+ continue;
223
+ }
224
+ for (const newPath of expand(path)) {
225
+ const newPathCost = newPath.cost;
226
+ const newPathPruneId = newPath.pruneId;
227
+ if (newPathPruneId in pruneMap) {
228
+ if (newPathCost > pruneMap[newPathPruneId]) {
229
+ continue;
230
+ }
231
+ }
232
+ pruneMap[newPathPruneId] = newPathCost;
233
+ if (!(newPathPruneId in newBeam) || newPathCost < newBeam[newPathPruneId].cost) {
234
+ newBeam[newPathPruneId] = newPath;
235
+ }
236
+ }
237
+ }
238
+ const newBeamPaths = Object.values(newBeam).toSorted((a, b) => {
239
+ if (a.normCost === b.normCost) {
240
+ const comp = a.sortId - b.sortId;
241
+ if (comp < 0n) return -1;
242
+ if (comp > 0n) return 1;
243
+ return 0;
244
+ }
245
+ return a.normCost - b.normCost;
246
+ });
247
+ beam = newBeamPaths.slice(0, beamSize);
248
+ if ((_a = beam[0]) == null ? void 0 : _a.atUnambiguousMatchNode) {
249
+ beam = beam.slice(0, 1);
250
+ pruneMap = {};
251
+ }
252
+ }
253
+ const [result] = ended.toSorted((a, b) => {
254
+ if (a.cost === b.cost) {
255
+ const comp = a.sortId - b.sortId;
256
+ if (comp < 0n) return -1;
257
+ if (comp > 0n) return 1;
258
+ return 0;
259
+ }
260
+ return a.cost - b.cost;
261
+ });
262
+ assert(!!result);
263
+ return result;
264
+ }
265
+ export {
266
+ Path,
267
+ errorAlignBeamSearch
268
+ };
@@ -0,0 +1,33 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var core_exports = {};
20
+ __export(core_exports, {
21
+ computeErrorAlignDistanceMatrix: () => import_editDistance.computeErrorAlignDistanceMatrix,
22
+ computeLevenshteinDistanceMatrix: () => import_editDistance.computeLevenshteinDistanceMatrix,
23
+ errorAlignBeamSearch: () => import_beamSearch.errorAlignBeamSearch
24
+ });
25
+ module.exports = __toCommonJS(core_exports);
26
+ var import_editDistance = require("./editDistance.cjs");
27
+ var import_beamSearch = require("./beamSearch.cjs");
28
+ // Annotate the CommonJS export names for ESM import in node:
29
+ 0 && (module.exports = {
30
+ computeErrorAlignDistanceMatrix,
31
+ computeLevenshteinDistanceMatrix,
32
+ errorAlignBeamSearch
33
+ });
@@ -0,0 +1,5 @@
1
+ export { computeErrorAlignDistanceMatrix, computeLevenshteinDistanceMatrix } from './editDistance.cjs';
2
+ export { errorAlignBeamSearch } from './beamSearch.cjs';
3
+ import './backtraceGraph.cjs';
4
+ import './utils.cjs';
5
+ import './graphMetadata.cjs';
@@ -0,0 +1,5 @@
1
+ export { computeErrorAlignDistanceMatrix, computeLevenshteinDistanceMatrix } from './editDistance.js';
2
+ export { errorAlignBeamSearch } from './beamSearch.js';
3
+ import './backtraceGraph.js';
4
+ import './utils.js';
5
+ import './graphMetadata.js';
@@ -0,0 +1,11 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ import {
3
+ computeErrorAlignDistanceMatrix,
4
+ computeLevenshteinDistanceMatrix
5
+ } from "./editDistance.js";
6
+ import { errorAlignBeamSearch } from "./beamSearch.js";
7
+ export {
8
+ computeErrorAlignDistanceMatrix,
9
+ computeLevenshteinDistanceMatrix,
10
+ errorAlignBeamSearch
11
+ };
@@ -0,0 +1,115 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var editDistance_exports = {};
20
+ __export(editDistance_exports, {
21
+ computeDistanceMatrix: () => computeDistanceMatrix,
22
+ computeErrorAlignDistanceMatrix: () => computeErrorAlignDistanceMatrix,
23
+ computeLevenshteinDistanceMatrix: () => computeLevenshteinDistanceMatrix
24
+ });
25
+ module.exports = __toCommonJS(editDistance_exports);
26
+ var import_itertools = require("itertools");
27
+ var import_utils = require("./utils.cjs");
28
+ function getLevenshteinValues(refToken, hypToken) {
29
+ let diagCost;
30
+ if (hypToken === refToken) {
31
+ diagCost = 0;
32
+ } else {
33
+ diagCost = 1;
34
+ }
35
+ return [1, 1, diagCost];
36
+ }
37
+ function getErrorAlignValues(refToken, hypToken) {
38
+ let diagCost;
39
+ if (hypToken === refToken) {
40
+ diagCost = 0;
41
+ } else if (import_utils.DELIMITERS.has(hypToken) || import_utils.DELIMITERS.has(refToken)) {
42
+ diagCost = 3;
43
+ } else {
44
+ diagCost = 2;
45
+ }
46
+ return [1, 1, diagCost];
47
+ }
48
+ function computeDistanceMatrix(ref, hyp, scoreFunc, backtrace = false) {
49
+ const hypDim = hyp.length + 1;
50
+ const refDim = ref.length + 1;
51
+ const scoreMatrix = Array.from((0, import_itertools.range)(hypDim)).map(
52
+ (_) => Array.from((0, import_itertools.range)(refDim)).map((_2) => 0)
53
+ );
54
+ for (const j of (0, import_itertools.range)(refDim)) {
55
+ scoreMatrix[0][j] = j;
56
+ }
57
+ for (const i of (0, import_itertools.range)(hypDim)) {
58
+ scoreMatrix[i][0] = i;
59
+ }
60
+ let backtraceMatrix = null;
61
+ if (backtrace) {
62
+ backtraceMatrix = Array.from((0, import_itertools.range)(hypDim)).map(
63
+ (_) => Array.from((0, import_itertools.range)(refDim)).map((_2) => 0)
64
+ );
65
+ backtraceMatrix[0][0] = (0, import_utils.getOpTypeComboIndex)(["MATCH"]);
66
+ for (const j of (0, import_itertools.range)(1, refDim)) {
67
+ backtraceMatrix[0][j] = (0, import_utils.getOpTypeComboIndex)(["DELETE"]);
68
+ }
69
+ for (const i of (0, import_itertools.range)(1, hypDim)) {
70
+ backtraceMatrix[i][0] = (0, import_utils.getOpTypeComboIndex)(["INSERT"]);
71
+ }
72
+ }
73
+ for (const j of (0, import_itertools.range)(1, refDim)) {
74
+ for (const i of (0, import_itertools.range)(1, hypDim)) {
75
+ const [insCost, delCost, diagCost] = scoreFunc(ref[j - 1], hyp[i - 1]);
76
+ const insVal = scoreMatrix[i - 1][j] + insCost;
77
+ const delVal = scoreMatrix[i][j - 1] + delCost;
78
+ const diagVal = scoreMatrix[i - 1][j - 1] + diagCost;
79
+ const newVal = Math.min(insVal, delVal, diagVal);
80
+ scoreMatrix[i][j] = newVal;
81
+ if (backtraceMatrix) {
82
+ const posOps = [];
83
+ if (diagVal === newVal && diagCost <= 0) {
84
+ posOps.push("MATCH");
85
+ }
86
+ if (insVal === newVal) {
87
+ posOps.push("INSERT");
88
+ }
89
+ if (delVal === newVal) {
90
+ posOps.push("DELETE");
91
+ }
92
+ if (diagVal === newVal && diagCost > 0) {
93
+ posOps.push("SUBSTITUTE");
94
+ }
95
+ backtraceMatrix[i][j] = (0, import_utils.getOpTypeComboIndex)(posOps);
96
+ }
97
+ }
98
+ }
99
+ if (backtraceMatrix) {
100
+ return { scoreMatrix, backtraceMatrix };
101
+ }
102
+ return scoreMatrix;
103
+ }
104
+ function computeLevenshteinDistanceMatrix(ref, hyp, backtrace = false) {
105
+ return computeDistanceMatrix(ref, hyp, getLevenshteinValues, backtrace);
106
+ }
107
+ function computeErrorAlignDistanceMatrix(ref, hyp, backtrace = false) {
108
+ return computeDistanceMatrix(ref, hyp, getErrorAlignValues, backtrace);
109
+ }
110
+ // Annotate the CommonJS export names for ESM import in node:
111
+ 0 && (module.exports = {
112
+ computeDistanceMatrix,
113
+ computeErrorAlignDistanceMatrix,
114
+ computeLevenshteinDistanceMatrix
115
+ });
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Compute the edit distance score matrix between two sequences x (hyp) and y (ref)
3
+ * using only pure Python lists.
4
+ *
5
+ * @param ref The reference sequence/transcript.
6
+ * @param hyp The hypothesis sequence/transcript.
7
+ * @param scoreFunc A function that takes two tokens (refToken, hypToken) and returns
8
+ * a tuple of (deletionCost, insertionCost, diagonalCost)
9
+ * @param backtrace Whether to compute the backtrace matrix.
10
+ * @returns The score matrix and optionally the backtrace matrix
11
+ */
12
+ declare function computeDistanceMatrix(ref: string | string[], hyp: string | string[], scoreFunc: (refToken: string, hypToken: string) => [number, number, number]): number[][];
13
+ declare function computeDistanceMatrix(ref: string | string[], hyp: string | string[], scoreFunc: (refToken: string, hypToken: string) => [number, number, number], backtrace: boolean): {
14
+ scoreMatrix: number[][];
15
+ backtraceMatrix: number[][];
16
+ };
17
+ /**
18
+ * Compute the Levenshtein distance matrix between two sequences.
19
+ *
20
+ * @param ref The reference sequence/transcript.
21
+ * @param hyp The hypothesis sequence/transcript.
22
+ * @param backtrace Whether to compute the backtrace matrix.
23
+ *
24
+ * @returns The score matrix and optionally the backtrace matrix
25
+ */
26
+ declare function computeLevenshteinDistanceMatrix(ref: string | string[], hyp: string | string[]): number[][];
27
+ declare function computeLevenshteinDistanceMatrix(ref: string | string[], hyp: string | string[], backtrace: true): {
28
+ scoreMatrix: number[][];
29
+ backtraceMatrix: number[][];
30
+ };
31
+ /**
32
+ * Compute the error alignment distance matrix between two sequences.
33
+ *
34
+ * @param ref The reference sequence/transcript.
35
+ * @param hyp The hypothesis sequence/transcript.
36
+ * @param backtrace Whether to compute the backtrace matrix.
37
+ *
38
+ * @returns The score matrix and optionally the backtrace matrix.
39
+ */
40
+ declare function computeErrorAlignDistanceMatrix(ref: string | string[], hyp: string | string[]): number[][];
41
+ declare function computeErrorAlignDistanceMatrix(ref: string | string[], hyp: string | string[], backtrace: true): {
42
+ scoreMatrix: number[][];
43
+ backtraceMatrix: number[][];
44
+ };
45
+
46
+ export { computeDistanceMatrix, computeErrorAlignDistanceMatrix, computeLevenshteinDistanceMatrix };
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Compute the edit distance score matrix between two sequences x (hyp) and y (ref)
3
+ * using only pure Python lists.
4
+ *
5
+ * @param ref The reference sequence/transcript.
6
+ * @param hyp The hypothesis sequence/transcript.
7
+ * @param scoreFunc A function that takes two tokens (refToken, hypToken) and returns
8
+ * a tuple of (deletionCost, insertionCost, diagonalCost)
9
+ * @param backtrace Whether to compute the backtrace matrix.
10
+ * @returns The score matrix and optionally the backtrace matrix
11
+ */
12
+ declare function computeDistanceMatrix(ref: string | string[], hyp: string | string[], scoreFunc: (refToken: string, hypToken: string) => [number, number, number]): number[][];
13
+ declare function computeDistanceMatrix(ref: string | string[], hyp: string | string[], scoreFunc: (refToken: string, hypToken: string) => [number, number, number], backtrace: boolean): {
14
+ scoreMatrix: number[][];
15
+ backtraceMatrix: number[][];
16
+ };
17
+ /**
18
+ * Compute the Levenshtein distance matrix between two sequences.
19
+ *
20
+ * @param ref The reference sequence/transcript.
21
+ * @param hyp The hypothesis sequence/transcript.
22
+ * @param backtrace Whether to compute the backtrace matrix.
23
+ *
24
+ * @returns The score matrix and optionally the backtrace matrix
25
+ */
26
+ declare function computeLevenshteinDistanceMatrix(ref: string | string[], hyp: string | string[]): number[][];
27
+ declare function computeLevenshteinDistanceMatrix(ref: string | string[], hyp: string | string[], backtrace: true): {
28
+ scoreMatrix: number[][];
29
+ backtraceMatrix: number[][];
30
+ };
31
+ /**
32
+ * Compute the error alignment distance matrix between two sequences.
33
+ *
34
+ * @param ref The reference sequence/transcript.
35
+ * @param hyp The hypothesis sequence/transcript.
36
+ * @param backtrace Whether to compute the backtrace matrix.
37
+ *
38
+ * @returns The score matrix and optionally the backtrace matrix.
39
+ */
40
+ declare function computeErrorAlignDistanceMatrix(ref: string | string[], hyp: string | string[]): number[][];
41
+ declare function computeErrorAlignDistanceMatrix(ref: string | string[], hyp: string | string[], backtrace: true): {
42
+ scoreMatrix: number[][];
43
+ backtraceMatrix: number[][];
44
+ };
45
+
46
+ export { computeDistanceMatrix, computeErrorAlignDistanceMatrix, computeLevenshteinDistanceMatrix };
@@ -0,0 +1,90 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ import { range } from "itertools";
3
+ import { DELIMITERS, getOpTypeComboIndex } from "./utils.js";
4
+ function getLevenshteinValues(refToken, hypToken) {
5
+ let diagCost;
6
+ if (hypToken === refToken) {
7
+ diagCost = 0;
8
+ } else {
9
+ diagCost = 1;
10
+ }
11
+ return [1, 1, diagCost];
12
+ }
13
+ function getErrorAlignValues(refToken, hypToken) {
14
+ let diagCost;
15
+ if (hypToken === refToken) {
16
+ diagCost = 0;
17
+ } else if (DELIMITERS.has(hypToken) || DELIMITERS.has(refToken)) {
18
+ diagCost = 3;
19
+ } else {
20
+ diagCost = 2;
21
+ }
22
+ return [1, 1, diagCost];
23
+ }
24
+ function computeDistanceMatrix(ref, hyp, scoreFunc, backtrace = false) {
25
+ const hypDim = hyp.length + 1;
26
+ const refDim = ref.length + 1;
27
+ const scoreMatrix = Array.from(range(hypDim)).map(
28
+ (_) => Array.from(range(refDim)).map((_2) => 0)
29
+ );
30
+ for (const j of range(refDim)) {
31
+ scoreMatrix[0][j] = j;
32
+ }
33
+ for (const i of range(hypDim)) {
34
+ scoreMatrix[i][0] = i;
35
+ }
36
+ let backtraceMatrix = null;
37
+ if (backtrace) {
38
+ backtraceMatrix = Array.from(range(hypDim)).map(
39
+ (_) => Array.from(range(refDim)).map((_2) => 0)
40
+ );
41
+ backtraceMatrix[0][0] = getOpTypeComboIndex(["MATCH"]);
42
+ for (const j of range(1, refDim)) {
43
+ backtraceMatrix[0][j] = getOpTypeComboIndex(["DELETE"]);
44
+ }
45
+ for (const i of range(1, hypDim)) {
46
+ backtraceMatrix[i][0] = getOpTypeComboIndex(["INSERT"]);
47
+ }
48
+ }
49
+ for (const j of range(1, refDim)) {
50
+ for (const i of range(1, hypDim)) {
51
+ const [insCost, delCost, diagCost] = scoreFunc(ref[j - 1], hyp[i - 1]);
52
+ const insVal = scoreMatrix[i - 1][j] + insCost;
53
+ const delVal = scoreMatrix[i][j - 1] + delCost;
54
+ const diagVal = scoreMatrix[i - 1][j - 1] + diagCost;
55
+ const newVal = Math.min(insVal, delVal, diagVal);
56
+ scoreMatrix[i][j] = newVal;
57
+ if (backtraceMatrix) {
58
+ const posOps = [];
59
+ if (diagVal === newVal && diagCost <= 0) {
60
+ posOps.push("MATCH");
61
+ }
62
+ if (insVal === newVal) {
63
+ posOps.push("INSERT");
64
+ }
65
+ if (delVal === newVal) {
66
+ posOps.push("DELETE");
67
+ }
68
+ if (diagVal === newVal && diagCost > 0) {
69
+ posOps.push("SUBSTITUTE");
70
+ }
71
+ backtraceMatrix[i][j] = getOpTypeComboIndex(posOps);
72
+ }
73
+ }
74
+ }
75
+ if (backtraceMatrix) {
76
+ return { scoreMatrix, backtraceMatrix };
77
+ }
78
+ return scoreMatrix;
79
+ }
80
+ function computeLevenshteinDistanceMatrix(ref, hyp, backtrace = false) {
81
+ return computeDistanceMatrix(ref, hyp, getLevenshteinValues, backtrace);
82
+ }
83
+ function computeErrorAlignDistanceMatrix(ref, hyp, backtrace = false) {
84
+ return computeDistanceMatrix(ref, hyp, getErrorAlignValues, backtrace);
85
+ }
86
+ export {
87
+ computeDistanceMatrix,
88
+ computeErrorAlignDistanceMatrix,
89
+ computeLevenshteinDistanceMatrix
90
+ };