@storyteller-platform/align 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/dist/align/__tests__/align.test.cjs +6 -5
  2. package/dist/align/__tests__/align.test.js +6 -5
  3. package/dist/align/align.cjs +133 -81
  4. package/dist/align/align.d.cts +1 -0
  5. package/dist/align/align.d.ts +1 -0
  6. package/dist/align/align.js +133 -81
  7. package/dist/align/getSentenceRanges.cjs +78 -149
  8. package/dist/align/getSentenceRanges.d.cts +1 -1
  9. package/dist/align/getSentenceRanges.d.ts +1 -1
  10. package/dist/align/getSentenceRanges.js +78 -149
  11. package/dist/align/slugify.cjs +2 -0
  12. package/dist/align/slugify.js +2 -0
  13. package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
  14. package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
  15. package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
  16. package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
  17. package/dist/errorAlign/__tests__/native.test.cjs +118 -0
  18. package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
  19. package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
  20. package/dist/errorAlign/__tests__/native.test.js +107 -0
  21. package/dist/errorAlign/backtraceGraph.cjs +298 -0
  22. package/dist/errorAlign/backtraceGraph.d.cts +103 -0
  23. package/dist/errorAlign/backtraceGraph.d.ts +103 -0
  24. package/dist/errorAlign/backtraceGraph.js +270 -0
  25. package/dist/errorAlign/beamSearch.cjs +302 -0
  26. package/dist/errorAlign/beamSearch.d.cts +53 -0
  27. package/dist/errorAlign/beamSearch.d.ts +53 -0
  28. package/dist/errorAlign/beamSearch.js +268 -0
  29. package/dist/errorAlign/core.cjs +33 -0
  30. package/dist/errorAlign/core.d.cts +5 -0
  31. package/dist/errorAlign/core.d.ts +5 -0
  32. package/dist/errorAlign/core.js +11 -0
  33. package/dist/errorAlign/editDistance.cjs +115 -0
  34. package/dist/errorAlign/editDistance.d.cts +46 -0
  35. package/dist/errorAlign/editDistance.d.ts +46 -0
  36. package/dist/errorAlign/editDistance.js +90 -0
  37. package/dist/errorAlign/errorAlign.cjs +159 -0
  38. package/dist/errorAlign/errorAlign.d.cts +15 -0
  39. package/dist/errorAlign/errorAlign.d.ts +15 -0
  40. package/dist/errorAlign/errorAlign.js +145 -0
  41. package/dist/errorAlign/graphMetadata.cjs +97 -0
  42. package/dist/errorAlign/graphMetadata.d.cts +44 -0
  43. package/dist/errorAlign/graphMetadata.d.ts +44 -0
  44. package/dist/errorAlign/graphMetadata.js +64 -0
  45. package/dist/errorAlign/hash.cjs +173 -0
  46. package/dist/errorAlign/hash.d.cts +28 -0
  47. package/dist/errorAlign/hash.d.ts +28 -0
  48. package/dist/errorAlign/hash.js +150 -0
  49. package/dist/errorAlign/native.cjs +60 -0
  50. package/dist/errorAlign/native.d.cts +18 -0
  51. package/dist/errorAlign/native.d.ts +18 -0
  52. package/dist/errorAlign/native.js +24 -0
  53. package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
  54. package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
  55. package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
  56. package/dist/errorAlign/node-gyp-build.d.js +0 -0
  57. package/dist/errorAlign/pathToAlignment.cjs +122 -0
  58. package/dist/errorAlign/pathToAlignment.d.cts +11 -0
  59. package/dist/errorAlign/pathToAlignment.d.ts +11 -0
  60. package/dist/errorAlign/pathToAlignment.js +89 -0
  61. package/dist/errorAlign/utils.cjs +301 -0
  62. package/dist/errorAlign/utils.d.cts +107 -0
  63. package/dist/errorAlign/utils.d.ts +107 -0
  64. package/dist/errorAlign/utils.js +248 -0
  65. package/dist/index.d.cts +1 -0
  66. package/dist/index.d.ts +1 -0
  67. package/dist/markup/__tests__/markup.test.cjs +108 -81
  68. package/dist/markup/__tests__/markup.test.js +109 -82
  69. package/dist/markup/__tests__/parseDom.test.cjs +112 -0
  70. package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
  71. package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
  72. package/dist/markup/__tests__/parseDom.test.js +89 -0
  73. package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
  74. package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
  75. package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
  76. package/dist/markup/__tests__/serializeDom.test.js +97 -0
  77. package/dist/markup/__tests__/transform.test.cjs +122 -0
  78. package/dist/markup/__tests__/transform.test.d.cts +2 -0
  79. package/dist/markup/__tests__/transform.test.d.ts +2 -0
  80. package/dist/markup/__tests__/transform.test.js +99 -0
  81. package/dist/markup/map.cjs +261 -0
  82. package/dist/markup/map.d.cts +50 -0
  83. package/dist/markup/map.d.ts +50 -0
  84. package/dist/markup/map.js +236 -0
  85. package/dist/markup/markup.cjs +23 -201
  86. package/dist/markup/markup.d.cts +5 -9
  87. package/dist/markup/markup.d.ts +5 -9
  88. package/dist/markup/markup.js +24 -203
  89. package/dist/markup/model.cjs +172 -0
  90. package/dist/markup/model.d.cts +57 -0
  91. package/dist/markup/model.d.ts +57 -0
  92. package/dist/markup/model.js +145 -0
  93. package/dist/markup/parseDom.cjs +59 -0
  94. package/dist/markup/parseDom.d.cts +7 -0
  95. package/dist/markup/parseDom.d.ts +7 -0
  96. package/dist/markup/parseDom.js +35 -0
  97. package/dist/markup/segmentation.cjs +11 -57
  98. package/dist/markup/segmentation.d.cts +6 -2
  99. package/dist/markup/segmentation.d.ts +6 -2
  100. package/dist/markup/segmentation.js +11 -58
  101. package/dist/markup/serializeDom.cjs +87 -0
  102. package/dist/markup/serializeDom.d.cts +7 -0
  103. package/dist/markup/serializeDom.d.ts +7 -0
  104. package/dist/markup/serializeDom.js +63 -0
  105. package/dist/markup/transform.cjs +92 -0
  106. package/dist/markup/transform.d.cts +11 -0
  107. package/dist/markup/transform.d.ts +11 -0
  108. package/dist/markup/transform.js +71 -0
  109. package/dist/types/node-gyp-build.d.cjs +1 -0
  110. package/dist/types/node-gyp-build.d.d.cts +3 -0
  111. package/dist/types/node-gyp-build.d.d.ts +3 -0
  112. package/dist/types/node-gyp-build.d.js +0 -0
  113. package/package.json +11 -4
@@ -0,0 +1,159 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var errorAlign_exports = {};
20
+ __export(errorAlign_exports, {
21
+ errorAlign: () => errorAlign
22
+ });
23
+ module.exports = __toCommonJS(errorAlign_exports);
24
+ var import_itertools = require("itertools");
25
+ var import_backtraceGraph = require("./backtraceGraph.cjs");
26
+ var import_graphMetadata = require("./graphMetadata.cjs");
27
+ var import_native = require("./native.cjs");
28
+ var import_pathToAlignment = require("./pathToAlignment.cjs");
29
+ var import_utils = require("./utils.cjs");
30
+ function errorAlign(ref, hyp, tokenizer = import_utils.basicTokenizer, normalizer = import_utils.basicNormalizer, beamSize = 100, wordLevelPass = true) {
31
+ const graphMetadata = prepareGraphMetadata(ref, hyp, tokenizer, normalizer);
32
+ if (graphMetadata.refNorm === graphMetadata.hypNorm) {
33
+ return alignIdenticalInputs(graphMetadata);
34
+ }
35
+ if (!wordLevelPass) {
36
+ return alignBeamSearch(graphMetadata, beamSize);
37
+ }
38
+ return alignWithWordLevelPass(graphMetadata, beamSize);
39
+ }
40
+ function prepareGraphMetadata(ref, hyp, tokenizer = import_utils.basicTokenizer, normalizer = import_utils.basicNormalizer) {
41
+ const unpackedTokenizer = (0, import_utils.unpackRegexMatch)(tokenizer);
42
+ const refTokenMatches = unpackedTokenizer(ref);
43
+ const hypTokenMatches = unpackedTokenizer(hyp);
44
+ const ensuredNormalizer = (0, import_utils.ensureLengthPreservation)(normalizer);
45
+ const refNorm = refTokenMatches.map(([r]) => ensuredNormalizer(r));
46
+ const hypNorm = hypTokenMatches.map(([h]) => ensuredNormalizer(h));
47
+ return {
48
+ refRaw: ref,
49
+ hypRaw: hyp,
50
+ refTokenMatches,
51
+ hypTokenMatches,
52
+ refNorm,
53
+ hypNorm
54
+ };
55
+ }
56
+ function alignIdenticalInputs(graphMetadata) {
57
+ const alignments = [];
58
+ for (const i of (0, import_itertools.range)(graphMetadata.refTokenMatches.length)) {
59
+ const alignment = getMatchAlignmentFromTokenIndices(graphMetadata, {
60
+ refIndex: i,
61
+ hypIndex: i
62
+ });
63
+ alignments.push(alignment);
64
+ }
65
+ return alignments;
66
+ }
67
+ function alignBeamSearch(graphMetadata, beamSize, refStart, refEnd, hypStart, hypEnd) {
68
+ const src = new import_graphMetadata.SubgraphMetadata(
69
+ graphMetadata.refRaw,
70
+ graphMetadata.hypRaw,
71
+ graphMetadata.refTokenMatches.slice(refStart, refEnd),
72
+ graphMetadata.hypTokenMatches.slice(hypStart, hypEnd),
73
+ graphMetadata.refNorm.slice(refStart, refEnd),
74
+ graphMetadata.hypNorm.slice(hypStart, hypEnd)
75
+ );
76
+ const path = (0, import_native.errorAlignBeamSearch)(src, beamSize);
77
+ return (0, import_pathToAlignment.getAlignments)(path);
78
+ }
79
+ function alignWithWordLevelPass(graphMetadata, beamSize) {
80
+ const { backtraceMatrix } = (0, import_native.computeLevenshteinDistanceMatrix)(
81
+ graphMetadata.refNorm,
82
+ graphMetadata.hypNorm,
83
+ true
84
+ );
85
+ const backtraceGraph = new import_backtraceGraph.BacktraceGraph(backtraceMatrix);
86
+ const matchIndices = backtraceGraph.getUnambiguousNodeMatches();
87
+ matchIndices.push([
88
+ graphMetadata.hypNorm.length,
89
+ graphMetadata.refNorm.length
90
+ ]);
91
+ let hypStart = 0;
92
+ let refStart = 0;
93
+ const alignments = [];
94
+ const endIndex = matchIndices.length - 1;
95
+ for (const [i, [hypEnd, refEnd]] of (0, import_itertools.enumerate)(matchIndices)) {
96
+ const refIsEmpty = refStart === refEnd;
97
+ const hypIsEmpty = hypStart === hypEnd;
98
+ if (!refIsEmpty && !hypIsEmpty) {
99
+ alignments.push(
100
+ ...alignBeamSearch(
101
+ graphMetadata,
102
+ beamSize,
103
+ refStart,
104
+ refEnd,
105
+ hypStart,
106
+ hypEnd
107
+ )
108
+ );
109
+ } else if (refIsEmpty && !hypIsEmpty) {
110
+ for (const tokenIndex of (0, import_itertools.range)(hypStart, hypEnd)) {
111
+ alignments.push(
112
+ getInsertAlignmentFromTokenIndex(graphMetadata, tokenIndex)
113
+ );
114
+ }
115
+ } else if (hypIsEmpty && !refIsEmpty) {
116
+ for (const tokenIndex of (0, import_itertools.range)(refStart, refEnd)) {
117
+ alignments.push(
118
+ getDeleteAlignmentFromTokenIndex(graphMetadata, tokenIndex)
119
+ );
120
+ }
121
+ }
122
+ if (i < endIndex) {
123
+ alignments.push(
124
+ getMatchAlignmentFromTokenIndices(graphMetadata, {
125
+ refIndex: refEnd,
126
+ hypIndex: hypEnd
127
+ })
128
+ );
129
+ }
130
+ refStart = refEnd + 1;
131
+ hypStart = hypEnd + 1;
132
+ }
133
+ return alignments;
134
+ }
135
+ function getMatchAlignmentFromTokenIndices(graphMetadata, { refIndex, hypIndex }) {
136
+ const refSlice = graphMetadata.refTokenMatches[refIndex][1];
137
+ const hypSlice = graphMetadata.hypTokenMatches[hypIndex][1];
138
+ return new import_utils.Alignment(
139
+ "MATCH",
140
+ refSlice,
141
+ hypSlice,
142
+ graphMetadata.refRaw.slice(...refSlice),
143
+ graphMetadata.hypRaw.slice(...hypSlice)
144
+ );
145
+ }
146
+ function getInsertAlignmentFromTokenIndex(graphMetadata, hypIndex) {
147
+ const slice = graphMetadata.hypTokenMatches[hypIndex][1];
148
+ const token = graphMetadata.hypRaw.slice(...slice);
149
+ return new import_utils.Alignment("INSERT", null, slice, null, token);
150
+ }
151
+ function getDeleteAlignmentFromTokenIndex(graphMetadata, refIndex) {
152
+ const slice = graphMetadata.refTokenMatches[refIndex][1];
153
+ const token = graphMetadata.refRaw.slice(...slice);
154
+ return new import_utils.Alignment("DELETE", slice, null, token);
155
+ }
156
+ // Annotate the CommonJS export names for ESM import in node:
157
+ 0 && (module.exports = {
158
+ errorAlign
159
+ });
@@ -0,0 +1,15 @@
1
+ import { basicTokenizer, basicNormalizer, Alignment } from './utils.cjs';
2
+
3
+ /**
4
+ * Run error alignment between reference and hypothesis texts.
5
+ *
6
+ * @param ref The reference sequence/transcript.
7
+ * @param hyp The hypothesis sequence/transcript.
8
+ * @param tokenizer A function to tokenize the sequence. Must be regex-based and return Match objects.
9
+ * @param normalizer A function to normalize the tokens. Defaults to basicNormalizer.
10
+ * @param beamSize The beam size for the beam search alignment.
11
+ * @param wordLevelPass Whether to perform a word-level aligment pass to identify unambiguous matches.
12
+ */
13
+ declare function errorAlign(ref: string, hyp: string, tokenizer?: typeof basicTokenizer, normalizer?: typeof basicNormalizer, beamSize?: number, wordLevelPass?: boolean): Alignment[];
14
+
15
+ export { errorAlign };
@@ -0,0 +1,15 @@
1
+ import { basicTokenizer, basicNormalizer, Alignment } from './utils.js';
2
+
3
+ /**
4
+ * Run error alignment between reference and hypothesis texts.
5
+ *
6
+ * @param ref The reference sequence/transcript.
7
+ * @param hyp The hypothesis sequence/transcript.
8
+ * @param tokenizer A function to tokenize the sequence. Must be regex-based and return Match objects.
9
+ * @param normalizer A function to normalize the tokens. Defaults to basicNormalizer.
10
+ * @param beamSize The beam size for the beam search alignment.
11
+ * @param wordLevelPass Whether to perform a word-level aligment pass to identify unambiguous matches.
12
+ */
13
+ declare function errorAlign(ref: string, hyp: string, tokenizer?: typeof basicTokenizer, normalizer?: typeof basicNormalizer, beamSize?: number, wordLevelPass?: boolean): Alignment[];
14
+
15
+ export { errorAlign };
@@ -0,0 +1,145 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ import { enumerate, range } from "itertools";
3
+ import { BacktraceGraph } from "./backtraceGraph.js";
4
+ import { SubgraphMetadata } from "./graphMetadata.js";
5
+ import {
6
+ computeLevenshteinDistanceMatrix,
7
+ errorAlignBeamSearch
8
+ } from "./native.js";
9
+ import { getAlignments } from "./pathToAlignment.js";
10
+ import {
11
+ Alignment,
12
+ basicNormalizer,
13
+ basicTokenizer,
14
+ ensureLengthPreservation,
15
+ unpackRegexMatch
16
+ } from "./utils.js";
17
+ function errorAlign(ref, hyp, tokenizer = basicTokenizer, normalizer = basicNormalizer, beamSize = 100, wordLevelPass = true) {
18
+ const graphMetadata = prepareGraphMetadata(ref, hyp, tokenizer, normalizer);
19
+ if (graphMetadata.refNorm === graphMetadata.hypNorm) {
20
+ return alignIdenticalInputs(graphMetadata);
21
+ }
22
+ if (!wordLevelPass) {
23
+ return alignBeamSearch(graphMetadata, beamSize);
24
+ }
25
+ return alignWithWordLevelPass(graphMetadata, beamSize);
26
+ }
27
+ function prepareGraphMetadata(ref, hyp, tokenizer = basicTokenizer, normalizer = basicNormalizer) {
28
+ const unpackedTokenizer = unpackRegexMatch(tokenizer);
29
+ const refTokenMatches = unpackedTokenizer(ref);
30
+ const hypTokenMatches = unpackedTokenizer(hyp);
31
+ const ensuredNormalizer = ensureLengthPreservation(normalizer);
32
+ const refNorm = refTokenMatches.map(([r]) => ensuredNormalizer(r));
33
+ const hypNorm = hypTokenMatches.map(([h]) => ensuredNormalizer(h));
34
+ return {
35
+ refRaw: ref,
36
+ hypRaw: hyp,
37
+ refTokenMatches,
38
+ hypTokenMatches,
39
+ refNorm,
40
+ hypNorm
41
+ };
42
+ }
43
+ function alignIdenticalInputs(graphMetadata) {
44
+ const alignments = [];
45
+ for (const i of range(graphMetadata.refTokenMatches.length)) {
46
+ const alignment = getMatchAlignmentFromTokenIndices(graphMetadata, {
47
+ refIndex: i,
48
+ hypIndex: i
49
+ });
50
+ alignments.push(alignment);
51
+ }
52
+ return alignments;
53
+ }
54
+ function alignBeamSearch(graphMetadata, beamSize, refStart, refEnd, hypStart, hypEnd) {
55
+ const src = new SubgraphMetadata(
56
+ graphMetadata.refRaw,
57
+ graphMetadata.hypRaw,
58
+ graphMetadata.refTokenMatches.slice(refStart, refEnd),
59
+ graphMetadata.hypTokenMatches.slice(hypStart, hypEnd),
60
+ graphMetadata.refNorm.slice(refStart, refEnd),
61
+ graphMetadata.hypNorm.slice(hypStart, hypEnd)
62
+ );
63
+ const path = errorAlignBeamSearch(src, beamSize);
64
+ return getAlignments(path);
65
+ }
66
+ function alignWithWordLevelPass(graphMetadata, beamSize) {
67
+ const { backtraceMatrix } = computeLevenshteinDistanceMatrix(
68
+ graphMetadata.refNorm,
69
+ graphMetadata.hypNorm,
70
+ true
71
+ );
72
+ const backtraceGraph = new BacktraceGraph(backtraceMatrix);
73
+ const matchIndices = backtraceGraph.getUnambiguousNodeMatches();
74
+ matchIndices.push([
75
+ graphMetadata.hypNorm.length,
76
+ graphMetadata.refNorm.length
77
+ ]);
78
+ let hypStart = 0;
79
+ let refStart = 0;
80
+ const alignments = [];
81
+ const endIndex = matchIndices.length - 1;
82
+ for (const [i, [hypEnd, refEnd]] of enumerate(matchIndices)) {
83
+ const refIsEmpty = refStart === refEnd;
84
+ const hypIsEmpty = hypStart === hypEnd;
85
+ if (!refIsEmpty && !hypIsEmpty) {
86
+ alignments.push(
87
+ ...alignBeamSearch(
88
+ graphMetadata,
89
+ beamSize,
90
+ refStart,
91
+ refEnd,
92
+ hypStart,
93
+ hypEnd
94
+ )
95
+ );
96
+ } else if (refIsEmpty && !hypIsEmpty) {
97
+ for (const tokenIndex of range(hypStart, hypEnd)) {
98
+ alignments.push(
99
+ getInsertAlignmentFromTokenIndex(graphMetadata, tokenIndex)
100
+ );
101
+ }
102
+ } else if (hypIsEmpty && !refIsEmpty) {
103
+ for (const tokenIndex of range(refStart, refEnd)) {
104
+ alignments.push(
105
+ getDeleteAlignmentFromTokenIndex(graphMetadata, tokenIndex)
106
+ );
107
+ }
108
+ }
109
+ if (i < endIndex) {
110
+ alignments.push(
111
+ getMatchAlignmentFromTokenIndices(graphMetadata, {
112
+ refIndex: refEnd,
113
+ hypIndex: hypEnd
114
+ })
115
+ );
116
+ }
117
+ refStart = refEnd + 1;
118
+ hypStart = hypEnd + 1;
119
+ }
120
+ return alignments;
121
+ }
122
+ function getMatchAlignmentFromTokenIndices(graphMetadata, { refIndex, hypIndex }) {
123
+ const refSlice = graphMetadata.refTokenMatches[refIndex][1];
124
+ const hypSlice = graphMetadata.hypTokenMatches[hypIndex][1];
125
+ return new Alignment(
126
+ "MATCH",
127
+ refSlice,
128
+ hypSlice,
129
+ graphMetadata.refRaw.slice(...refSlice),
130
+ graphMetadata.hypRaw.slice(...hypSlice)
131
+ );
132
+ }
133
+ function getInsertAlignmentFromTokenIndex(graphMetadata, hypIndex) {
134
+ const slice = graphMetadata.hypTokenMatches[hypIndex][1];
135
+ const token = graphMetadata.hypRaw.slice(...slice);
136
+ return new Alignment("INSERT", null, slice, null, token);
137
+ }
138
+ function getDeleteAlignmentFromTokenIndex(graphMetadata, refIndex) {
139
+ const slice = graphMetadata.refTokenMatches[refIndex][1];
140
+ const token = graphMetadata.refRaw.slice(...slice);
141
+ return new Alignment("DELETE", slice, null, token);
142
+ }
143
+ export {
144
+ errorAlign
145
+ };
@@ -0,0 +1,97 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+ var graphMetadata_exports = {};
30
+ __export(graphMetadata_exports, {
31
+ SubgraphMetadata: () => SubgraphMetadata
32
+ });
33
+ module.exports = __toCommonJS(graphMetadata_exports);
34
+ var import_itertools = require("itertools");
35
+ var import_memoize = __toESM(require("memoize"), 1);
36
+ var import_backtraceGraph = require("./backtraceGraph.cjs");
37
+ var import_editDistance = require("./editDistance.cjs");
38
+ var import_utils = require("./utils.cjs");
39
+ class SubgraphMetadata {
40
+ constructor(refRaw, hypRaw, refTokenMatches, hypTokenMatches, refNorm, hypNorm) {
41
+ this.refRaw = refRaw;
42
+ this.hypRaw = hypRaw;
43
+ this.refTokenMatches = refTokenMatches;
44
+ this.hypTokenMatches = hypTokenMatches;
45
+ this.refNorm = refNorm;
46
+ this.hypNorm = hypNorm;
47
+ this.ref = embedTokens(refNorm);
48
+ this.hyp = embedTokens(hypNorm);
49
+ this.refMaxIndex = this.ref.length - 1;
50
+ this.hypMaxIndex = this.hyp.length - 1;
51
+ this.refCharTypes = getCharTypes(this.ref);
52
+ this.hypCharTypes = getCharTypes(this.hyp);
53
+ this.refIndexMap = createIndexMap(refTokenMatches);
54
+ this.hypIndexMap = createIndexMap(hypTokenMatches);
55
+ const { backtraceMatrix } = (0, import_editDistance.computeErrorAlignDistanceMatrix)(
56
+ this.ref,
57
+ this.hyp,
58
+ true
59
+ );
60
+ this.backtraceGraph = new import_backtraceGraph.BacktraceGraph(backtraceMatrix);
61
+ this.backtraceNodeSet = this.backtraceGraph.getNodeSet();
62
+ this.unambiguousMatches = this.backtraceGraph.getUnambiguousTokenSpanMatches(this.ref);
63
+ }
64
+ ref;
65
+ hyp;
66
+ refMaxIndex;
67
+ hypMaxIndex;
68
+ refCharTypes;
69
+ hypCharTypes;
70
+ refIndexMap;
71
+ hypIndexMap;
72
+ backtraceGraph;
73
+ backtraceNodeSet;
74
+ unambiguousMatches;
75
+ }
76
+ function embedTokens(textTokens) {
77
+ return textTokens.map((t) => `${import_utils.START_DELIMITER}${t}${import_utils.END_DELIMITER}`).join("");
78
+ }
79
+ const categorizeCharCached = (0, import_memoize.default)(function categorizeCharCached2(c) {
80
+ return (0, import_utils.categorizeChar)(c);
81
+ });
82
+ function getCharTypes(text) {
83
+ return text.split("").map((c) => categorizeCharCached(c));
84
+ }
85
+ function createIndexMap(textTokens) {
86
+ const indexMap = [];
87
+ for (const [_, span] of textTokens) {
88
+ indexMap.push(-1);
89
+ indexMap.push(...(0, import_itertools.range)(...span));
90
+ indexMap.push(-1);
91
+ }
92
+ return indexMap;
93
+ }
94
+ // Annotate the CommonJS export names for ESM import in node:
95
+ 0 && (module.exports = {
96
+ SubgraphMetadata
97
+ });
@@ -0,0 +1,44 @@
1
+ import { BacktraceGraph, Index } from './backtraceGraph.cjs';
2
+ import './utils.cjs';
3
+
4
+ type TokenWithSpan = [string, [number, number]];
5
+ interface GraphMetadata {
6
+ refRaw: string;
7
+ hypRaw: string;
8
+ refTokenMatches: TokenWithSpan[];
9
+ hypTokenMatches: TokenWithSpan[];
10
+ refNorm: string[];
11
+ hypNorm: string[];
12
+ }
13
+ /**
14
+ * Data class to hold information needed for beam search alignment.
15
+ *
16
+ * This data class encapsulates all necessary infomation about a subgraph
17
+ * derived from the reference and hypothesis texts, including their tokenized
18
+ * and normalized forms, as well as derived attributes used during
19
+ * the alignment process.
20
+ *
21
+ * It works as a reference for the `Path` class during beam search alignment.
22
+ */
23
+ declare class SubgraphMetadata {
24
+ refRaw: string;
25
+ hypRaw: string;
26
+ refTokenMatches: [string, [number, number]][];
27
+ hypTokenMatches: [string, [number, number]][];
28
+ refNorm: string[];
29
+ hypNorm: string[];
30
+ ref: string;
31
+ hyp: string;
32
+ refMaxIndex: number;
33
+ hypMaxIndex: number;
34
+ refCharTypes: number[];
35
+ hypCharTypes: number[];
36
+ refIndexMap: number[];
37
+ hypIndexMap: number[];
38
+ backtraceGraph: BacktraceGraph;
39
+ backtraceNodeSet: Set<Index>;
40
+ unambiguousMatches: Set<Index>;
41
+ constructor(refRaw: string, hypRaw: string, refTokenMatches: [string, [number, number]][], hypTokenMatches: [string, [number, number]][], refNorm: string[], hypNorm: string[]);
42
+ }
43
+
44
+ export { type GraphMetadata, SubgraphMetadata, type TokenWithSpan };
@@ -0,0 +1,44 @@
1
+ import { BacktraceGraph, Index } from './backtraceGraph.js';
2
+ import './utils.js';
3
+
4
+ type TokenWithSpan = [string, [number, number]];
5
+ interface GraphMetadata {
6
+ refRaw: string;
7
+ hypRaw: string;
8
+ refTokenMatches: TokenWithSpan[];
9
+ hypTokenMatches: TokenWithSpan[];
10
+ refNorm: string[];
11
+ hypNorm: string[];
12
+ }
13
+ /**
14
+ * Data class to hold information needed for beam search alignment.
15
+ *
16
+ * This data class encapsulates all necessary infomation about a subgraph
17
+ * derived from the reference and hypothesis texts, including their tokenized
18
+ * and normalized forms, as well as derived attributes used during
19
+ * the alignment process.
20
+ *
21
+ * It works as a reference for the `Path` class during beam search alignment.
22
+ */
23
+ declare class SubgraphMetadata {
24
+ refRaw: string;
25
+ hypRaw: string;
26
+ refTokenMatches: [string, [number, number]][];
27
+ hypTokenMatches: [string, [number, number]][];
28
+ refNorm: string[];
29
+ hypNorm: string[];
30
+ ref: string;
31
+ hyp: string;
32
+ refMaxIndex: number;
33
+ hypMaxIndex: number;
34
+ refCharTypes: number[];
35
+ hypCharTypes: number[];
36
+ refIndexMap: number[];
37
+ hypIndexMap: number[];
38
+ backtraceGraph: BacktraceGraph;
39
+ backtraceNodeSet: Set<Index>;
40
+ unambiguousMatches: Set<Index>;
41
+ constructor(refRaw: string, hypRaw: string, refTokenMatches: [string, [number, number]][], hypTokenMatches: [string, [number, number]][], refNorm: string[], hypNorm: string[]);
42
+ }
43
+
44
+ export { type GraphMetadata, SubgraphMetadata, type TokenWithSpan };
@@ -0,0 +1,64 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ import { range } from "itertools";
3
+ import memoize from "memoize";
4
+ import { BacktraceGraph } from "./backtraceGraph.js";
5
+ import { computeErrorAlignDistanceMatrix } from "./editDistance.js";
6
+ import { END_DELIMITER, START_DELIMITER, categorizeChar } from "./utils.js";
7
+ class SubgraphMetadata {
8
+ constructor(refRaw, hypRaw, refTokenMatches, hypTokenMatches, refNorm, hypNorm) {
9
+ this.refRaw = refRaw;
10
+ this.hypRaw = hypRaw;
11
+ this.refTokenMatches = refTokenMatches;
12
+ this.hypTokenMatches = hypTokenMatches;
13
+ this.refNorm = refNorm;
14
+ this.hypNorm = hypNorm;
15
+ this.ref = embedTokens(refNorm);
16
+ this.hyp = embedTokens(hypNorm);
17
+ this.refMaxIndex = this.ref.length - 1;
18
+ this.hypMaxIndex = this.hyp.length - 1;
19
+ this.refCharTypes = getCharTypes(this.ref);
20
+ this.hypCharTypes = getCharTypes(this.hyp);
21
+ this.refIndexMap = createIndexMap(refTokenMatches);
22
+ this.hypIndexMap = createIndexMap(hypTokenMatches);
23
+ const { backtraceMatrix } = computeErrorAlignDistanceMatrix(
24
+ this.ref,
25
+ this.hyp,
26
+ true
27
+ );
28
+ this.backtraceGraph = new BacktraceGraph(backtraceMatrix);
29
+ this.backtraceNodeSet = this.backtraceGraph.getNodeSet();
30
+ this.unambiguousMatches = this.backtraceGraph.getUnambiguousTokenSpanMatches(this.ref);
31
+ }
32
+ ref;
33
+ hyp;
34
+ refMaxIndex;
35
+ hypMaxIndex;
36
+ refCharTypes;
37
+ hypCharTypes;
38
+ refIndexMap;
39
+ hypIndexMap;
40
+ backtraceGraph;
41
+ backtraceNodeSet;
42
+ unambiguousMatches;
43
+ }
44
+ function embedTokens(textTokens) {
45
+ return textTokens.map((t) => `${START_DELIMITER}${t}${END_DELIMITER}`).join("");
46
+ }
47
+ const categorizeCharCached = memoize(function categorizeCharCached2(c) {
48
+ return categorizeChar(c);
49
+ });
50
+ function getCharTypes(text) {
51
+ return text.split("").map((c) => categorizeCharCached(c));
52
+ }
53
+ function createIndexMap(textTokens) {
54
+ const indexMap = [];
55
+ for (const [_, span] of textTokens) {
56
+ indexMap.push(-1);
57
+ indexMap.push(...range(...span));
58
+ indexMap.push(-1);
59
+ }
60
+ return indexMap;
61
+ }
62
+ export {
63
+ SubgraphMetadata
64
+ };