@storyteller-platform/align 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/align/__tests__/align.test.cjs +6 -5
- package/dist/align/__tests__/align.test.js +6 -5
- package/dist/align/align.cjs +133 -81
- package/dist/align/align.d.cts +1 -0
- package/dist/align/align.d.ts +1 -0
- package/dist/align/align.js +133 -81
- package/dist/align/getSentenceRanges.cjs +78 -149
- package/dist/align/getSentenceRanges.d.cts +1 -1
- package/dist/align/getSentenceRanges.d.ts +1 -1
- package/dist/align/getSentenceRanges.js +78 -149
- package/dist/align/slugify.cjs +16 -8
- package/dist/align/slugify.js +16 -8
- package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
- package/dist/errorAlign/__tests__/native.test.cjs +118 -0
- package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
- package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
- package/dist/errorAlign/__tests__/native.test.js +107 -0
- package/dist/errorAlign/backtraceGraph.cjs +298 -0
- package/dist/errorAlign/backtraceGraph.d.cts +103 -0
- package/dist/errorAlign/backtraceGraph.d.ts +103 -0
- package/dist/errorAlign/backtraceGraph.js +270 -0
- package/dist/errorAlign/beamSearch.cjs +302 -0
- package/dist/errorAlign/beamSearch.d.cts +53 -0
- package/dist/errorAlign/beamSearch.d.ts +53 -0
- package/dist/errorAlign/beamSearch.js +268 -0
- package/dist/errorAlign/core.cjs +33 -0
- package/dist/errorAlign/core.d.cts +5 -0
- package/dist/errorAlign/core.d.ts +5 -0
- package/dist/errorAlign/core.js +11 -0
- package/dist/errorAlign/editDistance.cjs +115 -0
- package/dist/errorAlign/editDistance.d.cts +46 -0
- package/dist/errorAlign/editDistance.d.ts +46 -0
- package/dist/errorAlign/editDistance.js +90 -0
- package/dist/errorAlign/errorAlign.cjs +159 -0
- package/dist/errorAlign/errorAlign.d.cts +15 -0
- package/dist/errorAlign/errorAlign.d.ts +15 -0
- package/dist/errorAlign/errorAlign.js +145 -0
- package/dist/errorAlign/graphMetadata.cjs +97 -0
- package/dist/errorAlign/graphMetadata.d.cts +44 -0
- package/dist/errorAlign/graphMetadata.d.ts +44 -0
- package/dist/errorAlign/graphMetadata.js +64 -0
- package/dist/errorAlign/hash.cjs +173 -0
- package/dist/errorAlign/hash.d.cts +28 -0
- package/dist/errorAlign/hash.d.ts +28 -0
- package/dist/errorAlign/hash.js +150 -0
- package/dist/errorAlign/native.cjs +60 -0
- package/dist/errorAlign/native.d.cts +18 -0
- package/dist/errorAlign/native.d.ts +18 -0
- package/dist/errorAlign/native.js +24 -0
- package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
- package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
- package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
- package/dist/errorAlign/node-gyp-build.d.js +0 -0
- package/dist/errorAlign/pathToAlignment.cjs +122 -0
- package/dist/errorAlign/pathToAlignment.d.cts +11 -0
- package/dist/errorAlign/pathToAlignment.d.ts +11 -0
- package/dist/errorAlign/pathToAlignment.js +89 -0
- package/dist/errorAlign/utils.cjs +301 -0
- package/dist/errorAlign/utils.d.cts +107 -0
- package/dist/errorAlign/utils.d.ts +107 -0
- package/dist/errorAlign/utils.js +248 -0
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/markup/__tests__/markup.test.cjs +108 -81
- package/dist/markup/__tests__/markup.test.js +109 -82
- package/dist/markup/__tests__/parseDom.test.cjs +112 -0
- package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
- package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
- package/dist/markup/__tests__/parseDom.test.js +89 -0
- package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
- package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
- package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
- package/dist/markup/__tests__/serializeDom.test.js +97 -0
- package/dist/markup/__tests__/transform.test.cjs +122 -0
- package/dist/markup/__tests__/transform.test.d.cts +2 -0
- package/dist/markup/__tests__/transform.test.d.ts +2 -0
- package/dist/markup/__tests__/transform.test.js +99 -0
- package/dist/markup/map.cjs +261 -0
- package/dist/markup/map.d.cts +50 -0
- package/dist/markup/map.d.ts +50 -0
- package/dist/markup/map.js +236 -0
- package/dist/markup/markup.cjs +23 -201
- package/dist/markup/markup.d.cts +5 -9
- package/dist/markup/markup.d.ts +5 -9
- package/dist/markup/markup.js +24 -203
- package/dist/markup/model.cjs +172 -0
- package/dist/markup/model.d.cts +57 -0
- package/dist/markup/model.d.ts +57 -0
- package/dist/markup/model.js +145 -0
- package/dist/markup/parseDom.cjs +59 -0
- package/dist/markup/parseDom.d.cts +7 -0
- package/dist/markup/parseDom.d.ts +7 -0
- package/dist/markup/parseDom.js +35 -0
- package/dist/markup/segmentation.cjs +11 -57
- package/dist/markup/segmentation.d.cts +6 -2
- package/dist/markup/segmentation.d.ts +6 -2
- package/dist/markup/segmentation.js +11 -58
- package/dist/markup/serializeDom.cjs +87 -0
- package/dist/markup/serializeDom.d.cts +7 -0
- package/dist/markup/serializeDom.d.ts +7 -0
- package/dist/markup/serializeDom.js +63 -0
- package/dist/markup/transform.cjs +92 -0
- package/dist/markup/transform.d.cts +11 -0
- package/dist/markup/transform.d.ts +11 -0
- package/dist/markup/transform.js +71 -0
- package/dist/types/node-gyp-build.d.cjs +1 -0
- package/dist/types/node-gyp-build.d.d.cts +3 -0
- package/dist/types/node-gyp-build.d.d.ts +3 -0
- package/dist/types/node-gyp-build.d.js +0 -0
- package/package.json +11 -4
|
@@ -1,27 +1,8 @@
|
|
|
1
1
|
import "../chunk-BIEQXUOY.js";
|
|
2
|
-
import {
|
|
2
|
+
import { enumerate } from "itertools";
|
|
3
3
|
import { getTrackDuration } from "../common/ffmpeg.js";
|
|
4
|
-
import {
|
|
4
|
+
import { errorAlign } from "../errorAlign/errorAlign.js";
|
|
5
5
|
import { slugify } from "./slugify.js";
|
|
6
|
-
async function getSentencesWithOffsets(text) {
|
|
7
|
-
const sentences = await segmentText(text).then(
|
|
8
|
-
(r) => r.sentences.map((s) => s.text)
|
|
9
|
-
);
|
|
10
|
-
const sentencesWithOffsets = [];
|
|
11
|
-
let lastSentenceEnd = 0;
|
|
12
|
-
for (const sentence of sentences) {
|
|
13
|
-
const sentenceStart = text.indexOf(sentence, lastSentenceEnd);
|
|
14
|
-
if (sentenceStart > lastSentenceEnd) {
|
|
15
|
-
sentencesWithOffsets.push(text.slice(lastSentenceEnd, sentenceStart));
|
|
16
|
-
}
|
|
17
|
-
sentencesWithOffsets.push(sentence);
|
|
18
|
-
lastSentenceEnd = sentenceStart + sentence.length;
|
|
19
|
-
}
|
|
20
|
-
if (text.length > lastSentenceEnd) {
|
|
21
|
-
sentencesWithOffsets.push(text.slice(lastSentenceEnd));
|
|
22
|
-
}
|
|
23
|
-
return sentencesWithOffsets;
|
|
24
|
-
}
|
|
25
6
|
function findStartTimestamp(matchStartIndex, transcription) {
|
|
26
7
|
const entry = transcription.timeline.find(
|
|
27
8
|
(entry2) => (entry2.endOffsetUtf16 ?? 0) > matchStartIndex
|
|
@@ -39,144 +20,92 @@ function findEndTimestamp(matchEndIndex, transcription) {
|
|
|
39
20
|
);
|
|
40
21
|
return (entry == null ? void 0 : entry.endTime) ?? null;
|
|
41
22
|
}
|
|
42
|
-
function
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
23
|
+
function getAlignmentsForSentence(sentence, alignments) {
|
|
24
|
+
const result = [];
|
|
25
|
+
let sentenceIndex = 0;
|
|
26
|
+
for (const alignment of alignments) {
|
|
27
|
+
if (sentenceIndex === sentence.length) break;
|
|
28
|
+
if (alignment.opType !== "INSERT") {
|
|
29
|
+
sentenceIndex += alignment.ref.length + (sentenceIndex === 0 ? 0 : 1);
|
|
30
|
+
}
|
|
31
|
+
result.push(alignment);
|
|
47
32
|
}
|
|
48
|
-
return
|
|
33
|
+
return result;
|
|
49
34
|
}
|
|
50
|
-
function
|
|
51
|
-
return input.replaceAll(/\s+/g, " ");
|
|
52
|
-
}
|
|
53
|
-
async function getSentenceRanges(startSentence, transcription, sentences, chapterOffset, locale, lastSentenceRange) {
|
|
35
|
+
async function getSentenceRanges(startSentence, endSentence, transcription, sentences, chapterOffset, chapterEndOffset, locale) {
|
|
54
36
|
const sentenceRanges = [];
|
|
55
|
-
const
|
|
56
|
-
const
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
let notFound = 0;
|
|
74
|
-
let sentenceIndex = startSentenceEntry;
|
|
75
|
-
let lastMatchEnd = chapterOffset;
|
|
76
|
-
while (sentenceIndex < sentenceEntries.length) {
|
|
77
|
-
const [sentenceId, sentence] = sentenceEntries[sentenceIndex];
|
|
78
|
-
const transcriptionWindowList = transcriptionSentences.slice(
|
|
79
|
-
transcriptionWindowIndex,
|
|
80
|
-
transcriptionWindowIndex + 10
|
|
81
|
-
);
|
|
82
|
-
const { result: transcriptionWindow, mapping } = await slugify(
|
|
83
|
-
transcriptionWindowList.join("-").slice(transcriptionWindowOffset),
|
|
84
|
-
locale
|
|
85
|
-
);
|
|
86
|
-
const inverted = mapping.invert();
|
|
87
|
-
const query = collapseWhitespace(sentence.trim()).toLowerCase();
|
|
88
|
-
const firstMatch = findNearestMatch(
|
|
89
|
-
query,
|
|
90
|
-
transcriptionWindow,
|
|
91
|
-
Math.max(Math.floor(0.25 * query.length), 1)
|
|
92
|
-
);
|
|
93
|
-
if (!firstMatch) {
|
|
94
|
-
sentenceIndex += 1;
|
|
95
|
-
notFound += 1;
|
|
96
|
-
if (notFound === 3 || sentenceIndex === sentenceEntries.length) {
|
|
97
|
-
transcriptionWindowIndex += 1;
|
|
98
|
-
if (transcriptionWindowIndex == lastGoodTranscriptionWindow + 30) {
|
|
99
|
-
transcriptionWindowIndex = lastGoodTranscriptionWindow;
|
|
100
|
-
notFound = 0;
|
|
101
|
-
continue;
|
|
102
|
-
}
|
|
103
|
-
sentenceIndex -= notFound;
|
|
104
|
-
notFound = 0;
|
|
105
|
-
}
|
|
106
|
-
continue;
|
|
37
|
+
const fullTranscript = transcription.transcript;
|
|
38
|
+
const chapterTranscript = fullTranscript.slice(
|
|
39
|
+
chapterOffset,
|
|
40
|
+
chapterEndOffset
|
|
41
|
+
);
|
|
42
|
+
const { result: slugifiedChapterTranscript, mapping: transcriptMapping } = await slugify(chapterTranscript, locale);
|
|
43
|
+
let chapterTranscriptEndIndex = chapterOffset;
|
|
44
|
+
let chapterSentenceIndex = startSentence;
|
|
45
|
+
let slugifiedChapterTranscriptWindowStartIndex = 0;
|
|
46
|
+
while (chapterSentenceIndex < endSentence) {
|
|
47
|
+
const slugifiedChapterSentenceWindowList = [];
|
|
48
|
+
let sentenceWindowLength = 0;
|
|
49
|
+
let i = chapterSentenceIndex;
|
|
50
|
+
while (sentenceWindowLength < 5e3 && i < sentences.length) {
|
|
51
|
+
const { result: sentence } = await slugify(sentences[i], locale);
|
|
52
|
+
slugifiedChapterSentenceWindowList.push(sentence);
|
|
53
|
+
sentenceWindowLength += sentence.length;
|
|
54
|
+
i++;
|
|
107
55
|
}
|
|
108
|
-
const
|
|
109
|
-
const
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
-1
|
|
56
|
+
const slugifiedChapterSentenceWindow = slugifiedChapterSentenceWindowList.join("-");
|
|
57
|
+
const slugifiedChapterTranscriptWindow = slugifiedChapterTranscript.slice(
|
|
58
|
+
slugifiedChapterTranscriptWindowStartIndex,
|
|
59
|
+
slugifiedChapterTranscriptWindowStartIndex + sentenceWindowLength * 1.2
|
|
113
60
|
);
|
|
114
|
-
const
|
|
115
|
-
|
|
116
|
-
|
|
61
|
+
const alignments = errorAlign(
|
|
62
|
+
slugifiedChapterSentenceWindow,
|
|
63
|
+
slugifiedChapterTranscriptWindow
|
|
117
64
|
);
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
const
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
65
|
+
let alignmentIndex = 0;
|
|
66
|
+
let currentTranscriptWindowIndex = 0;
|
|
67
|
+
for (const [i2, slugifiedSentence] of enumerate(
|
|
68
|
+
slugifiedChapterSentenceWindowList
|
|
69
|
+
)) {
|
|
70
|
+
if (!slugifiedSentence) continue;
|
|
71
|
+
const sentenceAlignments = getAlignmentsForSentence(
|
|
72
|
+
slugifiedSentence,
|
|
73
|
+
alignments.slice(alignmentIndex)
|
|
74
|
+
);
|
|
75
|
+
const sentenceLengthInSlugifiedTranscript = sentenceAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
|
|
76
|
+
const start = findStartTimestamp(
|
|
77
|
+
chapterOffset + transcriptMapping.invert().map(
|
|
78
|
+
slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
|
|
79
|
+
1
|
|
80
|
+
),
|
|
81
|
+
transcription
|
|
82
|
+
);
|
|
83
|
+
chapterTranscriptEndIndex = chapterOffset + transcriptMapping.invert().map(
|
|
84
|
+
slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex + sentenceLengthInSlugifiedTranscript,
|
|
85
|
+
-1
|
|
86
|
+
);
|
|
87
|
+
const end = findEndTimestamp(chapterTranscriptEndIndex, transcription);
|
|
88
|
+
if (start && end !== null) {
|
|
89
|
+
sentenceRanges.push({
|
|
90
|
+
id: i2 + chapterSentenceIndex,
|
|
91
|
+
start: start.start,
|
|
92
|
+
audiofile: start.audiofile,
|
|
93
|
+
end
|
|
94
|
+
});
|
|
141
95
|
}
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
}
|
|
147
|
-
} else {
|
|
148
|
-
const lastTrackDuration = await getTrackDuration(
|
|
149
|
-
lastSentenceRange.audiofile
|
|
150
|
-
);
|
|
151
|
-
lastSentenceRange.end = lastTrackDuration;
|
|
152
|
-
if (sentenceId === 0) {
|
|
153
|
-
start = 0;
|
|
154
|
-
}
|
|
96
|
+
alignmentIndex += sentenceAlignments.length;
|
|
97
|
+
currentTranscriptWindowIndex += sentenceLengthInSlugifiedTranscript;
|
|
98
|
+
if (slugifiedChapterTranscriptWindow[currentTranscriptWindowIndex] === "-") {
|
|
99
|
+
currentTranscriptWindowIndex++;
|
|
155
100
|
}
|
|
156
|
-
} else if (sentenceId === 0) {
|
|
157
|
-
start = 0;
|
|
158
101
|
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
});
|
|
165
|
-
notFound = 0;
|
|
166
|
-
lastMatchEnd = matchEnd + transcriptionOffset + transcriptionWindowOffset + chapterOffset;
|
|
167
|
-
const windowIndexResult = getWindowIndexFromOffset(
|
|
168
|
-
transcriptionWindowList,
|
|
169
|
-
matchEnd + transcriptionWindowOffset
|
|
170
|
-
);
|
|
171
|
-
transcriptionWindowIndex += windowIndexResult.index;
|
|
172
|
-
transcriptionWindowOffset = windowIndexResult.offset;
|
|
173
|
-
lastGoodTranscriptionWindow = transcriptionWindowIndex;
|
|
174
|
-
sentenceIndex += 1;
|
|
102
|
+
chapterSentenceIndex += slugifiedChapterSentenceWindowList.length;
|
|
103
|
+
slugifiedChapterTranscriptWindowStartIndex += currentTranscriptWindowIndex;
|
|
104
|
+
if (slugifiedChapterTranscript[slugifiedChapterTranscriptWindowStartIndex] === "-") {
|
|
105
|
+
slugifiedChapterTranscriptWindowStartIndex++;
|
|
106
|
+
}
|
|
175
107
|
}
|
|
176
|
-
return {
|
|
177
|
-
sentenceRanges,
|
|
178
|
-
transcriptionOffset: lastMatchEnd
|
|
179
|
-
};
|
|
108
|
+
return { sentenceRanges, transcriptionOffset: chapterTranscriptEndIndex };
|
|
180
109
|
}
|
|
181
110
|
async function getLargestGap(trailing, leading) {
|
|
182
111
|
const leadingGap = leading.start;
|
package/dist/align/slugify.cjs
CHANGED
|
@@ -67,11 +67,15 @@ function createReplacers(locale) {
|
|
|
67
67
|
const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${currencySymbols.group}`, "g"), "").replace(new RegExp(`\\${currencySymbols.decimal}`), ".");
|
|
68
68
|
const number = parseFloat(normalizedNumeral);
|
|
69
69
|
if (Number.isNaN(number)) return match[0];
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
70
|
+
try {
|
|
71
|
+
return (0, import_to_words.toWords)(number, {
|
|
72
|
+
localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`,
|
|
73
|
+
currency: true,
|
|
74
|
+
doNotAddOnly: true
|
|
75
|
+
});
|
|
76
|
+
} catch {
|
|
77
|
+
return match[0];
|
|
78
|
+
}
|
|
75
79
|
}
|
|
76
80
|
const numberFormat = new Intl.NumberFormat(locale);
|
|
77
81
|
const numberParts = numberFormat.formatToParts(demoNumber);
|
|
@@ -103,9 +107,13 @@ function createReplacers(locale) {
|
|
|
103
107
|
const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${numberSymbols.group}`, "g"), "").replace(new RegExp(`\\${numberSymbols.decimal}`), ".");
|
|
104
108
|
const number = parseFloat(normalizedNumeral);
|
|
105
109
|
if (Number.isNaN(number)) return match[0];
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
110
|
+
try {
|
|
111
|
+
return (0, import_to_words.toWords)(number, {
|
|
112
|
+
localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`
|
|
113
|
+
});
|
|
114
|
+
} catch {
|
|
115
|
+
return match[0];
|
|
116
|
+
}
|
|
109
117
|
}
|
|
110
118
|
return [
|
|
111
119
|
[currencyRegex, currencyReplacer],
|
package/dist/align/slugify.js
CHANGED
|
@@ -45,11 +45,15 @@ function createReplacers(locale) {
|
|
|
45
45
|
const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${currencySymbols.group}`, "g"), "").replace(new RegExp(`\\${currencySymbols.decimal}`), ".");
|
|
46
46
|
const number = parseFloat(normalizedNumeral);
|
|
47
47
|
if (Number.isNaN(number)) return match[0];
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
48
|
+
try {
|
|
49
|
+
return toWords(number, {
|
|
50
|
+
localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`,
|
|
51
|
+
currency: true,
|
|
52
|
+
doNotAddOnly: true
|
|
53
|
+
});
|
|
54
|
+
} catch {
|
|
55
|
+
return match[0];
|
|
56
|
+
}
|
|
53
57
|
}
|
|
54
58
|
const numberFormat = new Intl.NumberFormat(locale);
|
|
55
59
|
const numberParts = numberFormat.formatToParts(demoNumber);
|
|
@@ -81,9 +85,13 @@ function createReplacers(locale) {
|
|
|
81
85
|
const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${numberSymbols.group}`, "g"), "").replace(new RegExp(`\\${numberSymbols.decimal}`), ".");
|
|
82
86
|
const number = parseFloat(normalizedNumeral);
|
|
83
87
|
if (Number.isNaN(number)) return match[0];
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
88
|
+
try {
|
|
89
|
+
return toWords(number, {
|
|
90
|
+
localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`
|
|
91
|
+
});
|
|
92
|
+
} catch {
|
|
93
|
+
return match[0];
|
|
94
|
+
}
|
|
87
95
|
}
|
|
88
96
|
return [
|
|
89
97
|
[currencyRegex, currencyReplacer],
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __copyProps = (to, from, except, desc) => {
|
|
9
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
10
|
+
for (let key of __getOwnPropNames(from))
|
|
11
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
12
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
13
|
+
}
|
|
14
|
+
return to;
|
|
15
|
+
};
|
|
16
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
17
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
18
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
19
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
20
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
21
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
22
|
+
mod
|
|
23
|
+
));
|
|
24
|
+
var import_node_assert = __toESM(require("node:assert"), 1);
|
|
25
|
+
var import_node_test = require("node:test");
|
|
26
|
+
var import_editDistance = require("../editDistance.cjs");
|
|
27
|
+
var import_errorAlign = require("../errorAlign.cjs");
|
|
28
|
+
var import_utils = require("../utils.cjs");
|
|
29
|
+
void (0, import_node_test.describe)("errorAlign", () => {
|
|
30
|
+
void (0, import_node_test.test)("error alignment for an example including all substitution types", () => {
|
|
31
|
+
const ref = "This is a substitution test deleted.";
|
|
32
|
+
const hyp = "Inserted this is a contribution test.";
|
|
33
|
+
const alignments = (0, import_errorAlign.errorAlign)(ref, hyp);
|
|
34
|
+
import_node_assert.default.deepStrictEqual(
|
|
35
|
+
alignments.map((a) => a.opType),
|
|
36
|
+
["INSERT", "MATCH", "MATCH", "MATCH", "SUBSTITUTE", "MATCH", "DELETE"]
|
|
37
|
+
);
|
|
38
|
+
});
|
|
39
|
+
void (0, import_node_test.test)("error alignment for full match", () => {
|
|
40
|
+
const ref = "This is a test.";
|
|
41
|
+
const hyp = "This is a test.";
|
|
42
|
+
const alignments = (0, import_errorAlign.errorAlign)(ref, hyp);
|
|
43
|
+
import_node_assert.default.deepStrictEqual(
|
|
44
|
+
alignments.map((a) => a.opType),
|
|
45
|
+
["MATCH", "MATCH", "MATCH", "MATCH"]
|
|
46
|
+
);
|
|
47
|
+
});
|
|
48
|
+
void (0, import_node_test.test)("error alignment for partial substitutions and insertions with compound markers", () => {
|
|
49
|
+
var _a, _b;
|
|
50
|
+
const ref = "test";
|
|
51
|
+
const hyp = "testpartial";
|
|
52
|
+
const alignments = (0, import_errorAlign.errorAlign)(ref, hyp);
|
|
53
|
+
import_node_assert.default.strictEqual(alignments.length, 2);
|
|
54
|
+
import_node_assert.default.strictEqual((_a = alignments[0]) == null ? void 0 : _a.opType, "SUBSTITUTE");
|
|
55
|
+
import_node_assert.default.strictEqual(alignments[0].leftCompound, false);
|
|
56
|
+
import_node_assert.default.strictEqual(alignments[0].rightCompound, true);
|
|
57
|
+
import_node_assert.default.strictEqual((_b = alignments[1]) == null ? void 0 : _b.opType, "INSERT");
|
|
58
|
+
import_node_assert.default.strictEqual(alignments[1].leftCompound, true);
|
|
59
|
+
import_node_assert.default.strictEqual(alignments[1].rightCompound, false);
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
void (0, import_node_test.test)("character categorization", () => {
|
|
63
|
+
import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("<"), 0);
|
|
64
|
+
import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("b"), 1);
|
|
65
|
+
import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("a"), 2);
|
|
66
|
+
import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("'"), 3);
|
|
67
|
+
});
|
|
68
|
+
void (0, import_node_test.test)("string representation of alignment objects", () => {
|
|
69
|
+
const deleteAlignment = (0, import_errorAlign.errorAlign)("deleted", "")[0];
|
|
70
|
+
import_node_assert.default.strictEqual(
|
|
71
|
+
deleteAlignment == null ? void 0 : deleteAlignment.toString(),
|
|
72
|
+
'Alignment(DELETE: "deleted")'
|
|
73
|
+
);
|
|
74
|
+
const insertAlignment = (0, import_errorAlign.errorAlign)("", "inserted")[0];
|
|
75
|
+
import_node_assert.default.strictEqual(
|
|
76
|
+
insertAlignment == null ? void 0 : insertAlignment.toString(),
|
|
77
|
+
'Alignment(INSERT: "inserted")'
|
|
78
|
+
);
|
|
79
|
+
const substituteAlignment = (0, import_errorAlign.errorAlign)(
|
|
80
|
+
"substitution",
|
|
81
|
+
"substitutiontesting"
|
|
82
|
+
)[0];
|
|
83
|
+
import_node_assert.default.strictEqual(substituteAlignment == null ? void 0 : substituteAlignment.leftCompound, false);
|
|
84
|
+
import_node_assert.default.strictEqual(substituteAlignment.rightCompound, true);
|
|
85
|
+
import_node_assert.default.strictEqual(
|
|
86
|
+
substituteAlignment.toString(),
|
|
87
|
+
'Alignment(SUBSTITUTE: "substitution"- -> "substitution")'
|
|
88
|
+
);
|
|
89
|
+
const matchAlignment = (0, import_errorAlign.errorAlign)("test", "test")[0];
|
|
90
|
+
import_node_assert.default.strictEqual(
|
|
91
|
+
matchAlignment == null ? void 0 : matchAlignment.toString(),
|
|
92
|
+
'Alignment(MATCH: "test" == "test")'
|
|
93
|
+
);
|
|
94
|
+
});
|
|
95
|
+
void (0, import_node_test.test)("Levenshtein distance matrix computation", () => {
|
|
96
|
+
const ref = "kitten";
|
|
97
|
+
const hyp = "sitting";
|
|
98
|
+
const distanceMatrix = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp);
|
|
99
|
+
import_node_assert.default.strictEqual(distanceMatrix.at(-1).at(-1), 3);
|
|
100
|
+
});
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import assert from "node:assert";
|
|
2
|
+
import { describe, test } from "node:test";
|
|
3
|
+
import { computeLevenshteinDistanceMatrix } from "../editDistance.js";
|
|
4
|
+
import { errorAlign } from "../errorAlign.js";
|
|
5
|
+
import { categorizeChar } from "../utils.js";
|
|
6
|
+
void describe("errorAlign", () => {
|
|
7
|
+
void test("error alignment for an example including all substitution types", () => {
|
|
8
|
+
const ref = "This is a substitution test deleted.";
|
|
9
|
+
const hyp = "Inserted this is a contribution test.";
|
|
10
|
+
const alignments = errorAlign(ref, hyp);
|
|
11
|
+
assert.deepStrictEqual(
|
|
12
|
+
alignments.map((a) => a.opType),
|
|
13
|
+
["INSERT", "MATCH", "MATCH", "MATCH", "SUBSTITUTE", "MATCH", "DELETE"]
|
|
14
|
+
);
|
|
15
|
+
});
|
|
16
|
+
void test("error alignment for full match", () => {
|
|
17
|
+
const ref = "This is a test.";
|
|
18
|
+
const hyp = "This is a test.";
|
|
19
|
+
const alignments = errorAlign(ref, hyp);
|
|
20
|
+
assert.deepStrictEqual(
|
|
21
|
+
alignments.map((a) => a.opType),
|
|
22
|
+
["MATCH", "MATCH", "MATCH", "MATCH"]
|
|
23
|
+
);
|
|
24
|
+
});
|
|
25
|
+
void test("error alignment for partial substitutions and insertions with compound markers", () => {
|
|
26
|
+
var _a, _b;
|
|
27
|
+
const ref = "test";
|
|
28
|
+
const hyp = "testpartial";
|
|
29
|
+
const alignments = errorAlign(ref, hyp);
|
|
30
|
+
assert.strictEqual(alignments.length, 2);
|
|
31
|
+
assert.strictEqual((_a = alignments[0]) == null ? void 0 : _a.opType, "SUBSTITUTE");
|
|
32
|
+
assert.strictEqual(alignments[0].leftCompound, false);
|
|
33
|
+
assert.strictEqual(alignments[0].rightCompound, true);
|
|
34
|
+
assert.strictEqual((_b = alignments[1]) == null ? void 0 : _b.opType, "INSERT");
|
|
35
|
+
assert.strictEqual(alignments[1].leftCompound, true);
|
|
36
|
+
assert.strictEqual(alignments[1].rightCompound, false);
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
void test("character categorization", () => {
|
|
40
|
+
assert.strictEqual(categorizeChar("<"), 0);
|
|
41
|
+
assert.strictEqual(categorizeChar("b"), 1);
|
|
42
|
+
assert.strictEqual(categorizeChar("a"), 2);
|
|
43
|
+
assert.strictEqual(categorizeChar("'"), 3);
|
|
44
|
+
});
|
|
45
|
+
void test("string representation of alignment objects", () => {
|
|
46
|
+
const deleteAlignment = errorAlign("deleted", "")[0];
|
|
47
|
+
assert.strictEqual(
|
|
48
|
+
deleteAlignment == null ? void 0 : deleteAlignment.toString(),
|
|
49
|
+
'Alignment(DELETE: "deleted")'
|
|
50
|
+
);
|
|
51
|
+
const insertAlignment = errorAlign("", "inserted")[0];
|
|
52
|
+
assert.strictEqual(
|
|
53
|
+
insertAlignment == null ? void 0 : insertAlignment.toString(),
|
|
54
|
+
'Alignment(INSERT: "inserted")'
|
|
55
|
+
);
|
|
56
|
+
const substituteAlignment = errorAlign(
|
|
57
|
+
"substitution",
|
|
58
|
+
"substitutiontesting"
|
|
59
|
+
)[0];
|
|
60
|
+
assert.strictEqual(substituteAlignment == null ? void 0 : substituteAlignment.leftCompound, false);
|
|
61
|
+
assert.strictEqual(substituteAlignment.rightCompound, true);
|
|
62
|
+
assert.strictEqual(
|
|
63
|
+
substituteAlignment.toString(),
|
|
64
|
+
'Alignment(SUBSTITUTE: "substitution"- -> "substitution")'
|
|
65
|
+
);
|
|
66
|
+
const matchAlignment = errorAlign("test", "test")[0];
|
|
67
|
+
assert.strictEqual(
|
|
68
|
+
matchAlignment == null ? void 0 : matchAlignment.toString(),
|
|
69
|
+
'Alignment(MATCH: "test" == "test")'
|
|
70
|
+
);
|
|
71
|
+
});
|
|
72
|
+
void test("Levenshtein distance matrix computation", () => {
|
|
73
|
+
const ref = "kitten";
|
|
74
|
+
const hyp = "sitting";
|
|
75
|
+
const distanceMatrix = computeLevenshteinDistanceMatrix(ref, hyp);
|
|
76
|
+
assert.strictEqual(distanceMatrix.at(-1).at(-1), 3);
|
|
77
|
+
});
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __copyProps = (to, from, except, desc) => {
|
|
9
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
10
|
+
for (let key of __getOwnPropNames(from))
|
|
11
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
12
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
13
|
+
}
|
|
14
|
+
return to;
|
|
15
|
+
};
|
|
16
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
17
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
18
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
19
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
20
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
21
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
22
|
+
mod
|
|
23
|
+
));
|
|
24
|
+
var import_node_assert = __toESM(require("node:assert"), 1);
|
|
25
|
+
var import_node_test = require("node:test");
|
|
26
|
+
var import_beamSearch = require("../beamSearch.cjs");
|
|
27
|
+
var import_editDistance = require("../editDistance.cjs");
|
|
28
|
+
var import_graphMetadata = require("../graphMetadata.cjs");
|
|
29
|
+
var import_native = require("../native.cjs");
|
|
30
|
+
var import_pathToAlignment = require("../pathToAlignment.cjs");
|
|
31
|
+
var import_utils = require("../utils.cjs");
|
|
32
|
+
void (0, import_node_test.describe)("native C++ vs TypeScript implementations", () => {
|
|
33
|
+
void (0, import_node_test.describe)("Levenshtein distance matrix", () => {
|
|
34
|
+
void (0, import_node_test.test)("string input", () => {
|
|
35
|
+
const ref = "kitten";
|
|
36
|
+
const hyp = "sitting";
|
|
37
|
+
const tsResult = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp);
|
|
38
|
+
const nativeResult = (0, import_native.computeLevenshteinDistanceMatrix)(ref, hyp);
|
|
39
|
+
import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
|
|
40
|
+
});
|
|
41
|
+
void (0, import_node_test.test)("string array input", () => {
|
|
42
|
+
const ref = ["hello", "world"];
|
|
43
|
+
const hyp = ["hello", "there"];
|
|
44
|
+
const tsResult = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp);
|
|
45
|
+
const nativeResult = (0, import_native.computeLevenshteinDistanceMatrix)(ref, hyp);
|
|
46
|
+
import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
|
|
47
|
+
});
|
|
48
|
+
void (0, import_node_test.test)("with backtrace", () => {
|
|
49
|
+
const ref = "kitten";
|
|
50
|
+
const hyp = "sitting";
|
|
51
|
+
const tsResult = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp, true);
|
|
52
|
+
const nativeResult = (0, import_native.computeLevenshteinDistanceMatrix)(ref, hyp, true);
|
|
53
|
+
import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
void (0, import_node_test.describe)("error align distance matrix", () => {
|
|
57
|
+
void (0, import_node_test.test)("string input", () => {
|
|
58
|
+
const ref = "test";
|
|
59
|
+
const hyp = "best";
|
|
60
|
+
const tsResult = (0, import_editDistance.computeErrorAlignDistanceMatrix)(ref, hyp);
|
|
61
|
+
const nativeResult = (0, import_native.computeErrorAlignDistanceMatrix)(ref, hyp);
|
|
62
|
+
import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
|
|
63
|
+
});
|
|
64
|
+
void (0, import_node_test.test)("with backtrace", () => {
|
|
65
|
+
const ref = "test";
|
|
66
|
+
const hyp = "best";
|
|
67
|
+
const tsResult = (0, import_editDistance.computeErrorAlignDistanceMatrix)(ref, hyp, true);
|
|
68
|
+
const nativeResult = (0, import_native.computeErrorAlignDistanceMatrix)(ref, hyp, true);
|
|
69
|
+
import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
void (0, import_node_test.describe)("beam search", () => {
|
|
73
|
+
function buildSubgraphMetadata(ref, hyp) {
|
|
74
|
+
const tokenizer = import_utils.basicTokenizer;
|
|
75
|
+
const normalizer = import_utils.basicNormalizer;
|
|
76
|
+
const unpackedTokenizer = (0, import_utils.unpackRegexMatch)(tokenizer);
|
|
77
|
+
const ensuredNormalizer = (0, import_utils.ensureLengthPreservation)(normalizer);
|
|
78
|
+
const refTokenMatches = unpackedTokenizer(ref);
|
|
79
|
+
const hypTokenMatches = unpackedTokenizer(hyp);
|
|
80
|
+
const refNorm = refTokenMatches.map(([r]) => ensuredNormalizer(r));
|
|
81
|
+
const hypNorm = hypTokenMatches.map(([h]) => ensuredNormalizer(h));
|
|
82
|
+
return new import_graphMetadata.SubgraphMetadata(
|
|
83
|
+
ref,
|
|
84
|
+
hyp,
|
|
85
|
+
refTokenMatches,
|
|
86
|
+
hypTokenMatches,
|
|
87
|
+
refNorm,
|
|
88
|
+
hypNorm
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
void (0, import_node_test.test)("simple substitution", () => {
|
|
92
|
+
const src = buildSubgraphMetadata("hello", "jello");
|
|
93
|
+
const tsPath = (0, import_beamSearch.errorAlignBeamSearch)(src);
|
|
94
|
+
const nativePath = (0, import_native.errorAlignBeamSearch)(src);
|
|
95
|
+
const tsAlignments = (0, import_pathToAlignment.getAlignments)(tsPath);
|
|
96
|
+
const nativeAlignments = (0, import_pathToAlignment.getAlignments)(nativePath);
|
|
97
|
+
import_node_assert.default.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
98
|
+
});
|
|
99
|
+
void (0, import_node_test.test)("multi-word alignment with all op types", () => {
|
|
100
|
+
const ref = "This is a substitution test deleted.";
|
|
101
|
+
const hyp = "Inserted this is a contribution test.";
|
|
102
|
+
const src = buildSubgraphMetadata(ref, hyp);
|
|
103
|
+
const tsPath = (0, import_beamSearch.errorAlignBeamSearch)(src);
|
|
104
|
+
const nativePath = (0, import_native.errorAlignBeamSearch)(src);
|
|
105
|
+
const tsAlignments = (0, import_pathToAlignment.getAlignments)(tsPath);
|
|
106
|
+
const nativeAlignments = (0, import_pathToAlignment.getAlignments)(nativePath);
|
|
107
|
+
import_node_assert.default.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
108
|
+
});
|
|
109
|
+
void (0, import_node_test.test)("identical strings", () => {
|
|
110
|
+
const src = buildSubgraphMetadata("test words", "test words");
|
|
111
|
+
const tsPath = (0, import_beamSearch.errorAlignBeamSearch)(src);
|
|
112
|
+
const nativePath = (0, import_native.errorAlignBeamSearch)(src);
|
|
113
|
+
const tsAlignments = (0, import_pathToAlignment.getAlignments)(tsPath);
|
|
114
|
+
const nativeAlignments = (0, import_pathToAlignment.getAlignments)(nativePath);
|
|
115
|
+
import_node_assert.default.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
116
|
+
});
|
|
117
|
+
});
|
|
118
|
+
});
|