@storyteller-platform/align 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +26 -0
- package/dist/align/align.cjs +6 -7
- package/dist/align/align.js +6 -7
- package/dist/cli/bin.cjs +1 -1
- package/dist/cli/bin.js +2 -2
- package/dist/common/ffmpeg.cjs +12 -12
- package/dist/common/ffmpeg.js +12 -12
- package/dist/errorAlign/backtraceGraph.cjs +5 -8
- package/dist/errorAlign/backtraceGraph.js +5 -8
- package/dist/errorAlign/beamSearch.cjs +1 -2
- package/dist/errorAlign/beamSearch.js +1 -2
- package/dist/markup/markup.cjs +3 -4
- package/dist/markup/markup.js +3 -4
- package/dist/markup/serializeDom.cjs +1 -1
- package/dist/markup/serializeDom.js +1 -1
- package/dist/process/processAudiobook.cjs +8 -12
- package/dist/process/processAudiobook.js +8 -12
- package/dist/process/ranges.cjs +3 -3
- package/dist/process/ranges.js +3 -3
- package/dist/transcribe/transcribe.cjs +9 -14
- package/dist/transcribe/transcribe.js +9 -14
- package/package.json +3 -1
- package/prebuilds/darwin-arm64/@storyteller-platform+align.node +0 -0
- package/prebuilds/linux-arm64/@storyteller-platform+align.node +0 -0
- package/prebuilds/linux-x64/@storyteller-platform+align.node +0 -0
- package/dist/align/__tests__/align.test.cjs +0 -283
- package/dist/align/__tests__/align.test.d.cts +0 -2
- package/dist/align/__tests__/align.test.d.ts +0 -2
- package/dist/align/__tests__/align.test.js +0 -219
- package/dist/align/__tests__/slugify.test.cjs +0 -64
- package/dist/align/__tests__/slugify.test.d.cts +0 -2
- package/dist/align/__tests__/slugify.test.d.ts +0 -2
- package/dist/align/__tests__/slugify.test.js +0 -41
- package/dist/errorAlign/__tests__/errorAlign.test.cjs +0 -100
- package/dist/errorAlign/__tests__/errorAlign.test.d.cts +0 -2
- package/dist/errorAlign/__tests__/errorAlign.test.d.ts +0 -2
- package/dist/errorAlign/__tests__/errorAlign.test.js +0 -77
- package/dist/errorAlign/__tests__/native.test.cjs +0 -118
- package/dist/errorAlign/__tests__/native.test.d.cts +0 -2
- package/dist/errorAlign/__tests__/native.test.d.ts +0 -2
- package/dist/errorAlign/__tests__/native.test.js +0 -107
- package/dist/markup/__tests__/markup.test.cjs +0 -491
- package/dist/markup/__tests__/markup.test.d.cts +0 -2
- package/dist/markup/__tests__/markup.test.d.ts +0 -2
- package/dist/markup/__tests__/markup.test.js +0 -468
- package/dist/markup/__tests__/parseDom.test.cjs +0 -112
- package/dist/markup/__tests__/parseDom.test.d.cts +0 -2
- package/dist/markup/__tests__/parseDom.test.d.ts +0 -2
- package/dist/markup/__tests__/parseDom.test.js +0 -89
- package/dist/markup/__tests__/serializeDom.test.cjs +0 -120
- package/dist/markup/__tests__/serializeDom.test.d.cts +0 -2
- package/dist/markup/__tests__/serializeDom.test.d.ts +0 -2
- package/dist/markup/__tests__/serializeDom.test.js +0 -97
- package/dist/markup/__tests__/transform.test.cjs +0 -122
- package/dist/markup/__tests__/transform.test.d.cts +0 -2
- package/dist/markup/__tests__/transform.test.d.ts +0 -2
- package/dist/markup/__tests__/transform.test.js +0 -99
- package/dist/process/__tests__/processAudiobook.test.cjs +0 -232
- package/dist/process/__tests__/processAudiobook.test.d.cts +0 -2
- package/dist/process/__tests__/processAudiobook.test.d.ts +0 -2
- package/dist/process/__tests__/processAudiobook.test.js +0 -209
|
@@ -1,219 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
__callDispose,
|
|
3
|
-
__using
|
|
4
|
-
} from "../../chunk-BIEQXUOY.js";
|
|
5
|
-
import assert from "node:assert";
|
|
6
|
-
import { mkdir, readFile, readdir, writeFile } from "node:fs/promises";
|
|
7
|
-
import { basename, dirname, extname, join } from "node:path";
|
|
8
|
-
import { basename as posixBasename } from "node:path/posix";
|
|
9
|
-
import { describe, it } from "node:test";
|
|
10
|
-
import { isAudioFile } from "@storyteller-platform/audiobook";
|
|
11
|
-
import { Epub } from "@storyteller-platform/epub";
|
|
12
|
-
import { createLogger } from "../../common/logging.js";
|
|
13
|
-
import { getXhtmlSegmentation } from "../../markup/segmentation.js";
|
|
14
|
-
import { Aligner } from "../align.js";
|
|
15
|
-
function createTestLogger() {
|
|
16
|
-
return createLogger(process.env["CI"] ? "silent" : "info");
|
|
17
|
-
}
|
|
18
|
-
function sanitizeFilename(title) {
|
|
19
|
-
return title.replace(/[/\\:*?"<>|]/g, "-").replace(/\s+/g, " ").trim().replace(/[.]+$/, "");
|
|
20
|
-
}
|
|
21
|
-
function truncate(input, byteLimit, suffix = "") {
|
|
22
|
-
const normalized = input.normalize("NFC");
|
|
23
|
-
const encoder = new TextEncoder();
|
|
24
|
-
let result = "";
|
|
25
|
-
for (const char of normalized) {
|
|
26
|
-
const withSuffix = result + char + suffix;
|
|
27
|
-
const byteLength = encoder.encode(withSuffix).length;
|
|
28
|
-
if (byteLength > byteLimit) break;
|
|
29
|
-
result += char;
|
|
30
|
-
}
|
|
31
|
-
return result + suffix;
|
|
32
|
-
}
|
|
33
|
-
function getSafeFilepathSegment(name, suffix = "") {
|
|
34
|
-
return truncate(sanitizeFilename(name), 150, suffix);
|
|
35
|
-
}
|
|
36
|
-
async function assertAlignSnapshot(context, epub, transcriptionFilepaths) {
|
|
37
|
-
var _a, _b, _c, _d, _e, _f;
|
|
38
|
-
const snapshotFilename = getSafeFilepathSegment(context.fullName, ".snapshot");
|
|
39
|
-
const snapshotFilepath = join(
|
|
40
|
-
"src",
|
|
41
|
-
"align",
|
|
42
|
-
"__snapshots__",
|
|
43
|
-
snapshotFilename
|
|
44
|
-
);
|
|
45
|
-
let newSnapshot = "";
|
|
46
|
-
const manifest = await epub.getManifest();
|
|
47
|
-
const mediaOverlayItems = Object.values(manifest).map((item) => item.mediaOverlay).filter((mediaOverlayId) => !!mediaOverlayId).map((id) => manifest[id]);
|
|
48
|
-
const mediaOverlays = [];
|
|
49
|
-
for (const item of mediaOverlayItems) {
|
|
50
|
-
const contents = await epub.readItemContents(item.id, "utf-8");
|
|
51
|
-
const parsed = Epub.xmlParser.parse(contents);
|
|
52
|
-
mediaOverlays.push(parsed);
|
|
53
|
-
const smil = Epub.findXmlChildByName("smil", parsed);
|
|
54
|
-
if (!smil) continue;
|
|
55
|
-
const body = Epub.findXmlChildByName("body", Epub.getXmlChildren(smil));
|
|
56
|
-
if (!body) continue;
|
|
57
|
-
const seq = Epub.findXmlChildByName("seq", Epub.getXmlChildren(body));
|
|
58
|
-
if (!seq) continue;
|
|
59
|
-
const textref = (_a = seq[":@"]) == null ? void 0 : _a["@_epub:textref"];
|
|
60
|
-
if (!textref) continue;
|
|
61
|
-
newSnapshot += `// ${posixBasename(textref)}
|
|
62
|
-
|
|
63
|
-
`;
|
|
64
|
-
const chapterContents = await epub.readFileContents(
|
|
65
|
-
textref,
|
|
66
|
-
item.href,
|
|
67
|
-
"utf-8"
|
|
68
|
-
);
|
|
69
|
-
const chapterXml = Epub.xhtmlParser.parse(chapterContents);
|
|
70
|
-
const { result: segmentation } = await getXhtmlSegmentation(
|
|
71
|
-
Epub.getXhtmlBody(chapterXml),
|
|
72
|
-
{
|
|
73
|
-
primaryLocale: new Intl.Locale("en-US")
|
|
74
|
-
}
|
|
75
|
-
);
|
|
76
|
-
const chapterSentences = segmentation.map((s) => s.text).filter((s) => s.match(/\S/));
|
|
77
|
-
for (const par of Epub.getXmlChildren(seq)) {
|
|
78
|
-
newSnapshot += `
|
|
79
|
-
`;
|
|
80
|
-
const text = Epub.findXmlChildByName("text", Epub.getXmlChildren(par));
|
|
81
|
-
if (!text) continue;
|
|
82
|
-
const audio = Epub.findXmlChildByName("audio", Epub.getXmlChildren(par));
|
|
83
|
-
if (!audio) continue;
|
|
84
|
-
const textSrc = (_b = text[":@"]) == null ? void 0 : _b["@_src"];
|
|
85
|
-
if (!textSrc) continue;
|
|
86
|
-
const sentenceId = (_c = textSrc.match(/[0-9]+$/)) == null ? void 0 : _c[0];
|
|
87
|
-
if (sentenceId === void 0) continue;
|
|
88
|
-
const textSentence = chapterSentences[parseInt(sentenceId)];
|
|
89
|
-
if (!textSentence) continue;
|
|
90
|
-
newSnapshot += `Text: ${textSentence.replace(/\n/, "")}
|
|
91
|
-
`;
|
|
92
|
-
const audioSrc = (_d = audio[":@"]) == null ? void 0 : _d["@_src"];
|
|
93
|
-
if (!audioSrc) continue;
|
|
94
|
-
const audioStart = (_e = audio[":@"]) == null ? void 0 : _e["@_clipBegin"];
|
|
95
|
-
const audioEnd = (_f = audio[":@"]) == null ? void 0 : _f["@_clipEnd"];
|
|
96
|
-
if (!audioStart || !audioEnd) continue;
|
|
97
|
-
const audioStartTime = parseFloat(audioStart.slice(0, -1)) - 2e-3;
|
|
98
|
-
const audioEndTime = parseFloat(audioEnd.slice(0, -1));
|
|
99
|
-
const audioFilename = posixBasename(audioSrc, extname(audioSrc));
|
|
100
|
-
const transcriptionFilepath = transcriptionFilepaths.find(
|
|
101
|
-
(f) => basename(f, extname(f)) === audioFilename
|
|
102
|
-
);
|
|
103
|
-
if (!transcriptionFilepath) continue;
|
|
104
|
-
const transcription = JSON.parse(
|
|
105
|
-
await readFile(transcriptionFilepath, { encoding: "utf-8" })
|
|
106
|
-
);
|
|
107
|
-
const transcriptionWords = [];
|
|
108
|
-
let started = false;
|
|
109
|
-
let i = 0;
|
|
110
|
-
let word = transcription.timeline[i];
|
|
111
|
-
while (word && word.endTime <= audioEndTime) {
|
|
112
|
-
if (word.startTime >= audioStartTime) {
|
|
113
|
-
started = true;
|
|
114
|
-
}
|
|
115
|
-
if (started) {
|
|
116
|
-
transcriptionWords.push(word.text);
|
|
117
|
-
}
|
|
118
|
-
word = transcription.timeline[++i];
|
|
119
|
-
}
|
|
120
|
-
const transcriptionSentence = transcriptionWords.join(" ");
|
|
121
|
-
newSnapshot += `Audio: ${transcriptionSentence}
|
|
122
|
-
`;
|
|
123
|
-
}
|
|
124
|
-
newSnapshot += `
|
|
125
|
-
`;
|
|
126
|
-
}
|
|
127
|
-
if (process.env["UPDATE_SNAPSHOTS"]) {
|
|
128
|
-
await mkdir(dirname(snapshotFilepath), { recursive: true });
|
|
129
|
-
await writeFile(snapshotFilepath, newSnapshot, { encoding: "utf-8" });
|
|
130
|
-
return;
|
|
131
|
-
}
|
|
132
|
-
try {
|
|
133
|
-
const existingSnapshot = await readFile(snapshotFilepath, {
|
|
134
|
-
encoding: "utf-8"
|
|
135
|
-
});
|
|
136
|
-
const existingLines = existingSnapshot.split("\n");
|
|
137
|
-
const newLines = newSnapshot.split("\n");
|
|
138
|
-
for (let i = 0; i < existingLines.length; i++) {
|
|
139
|
-
const existingLine = existingLines[i];
|
|
140
|
-
const newLine = newLines[i];
|
|
141
|
-
if (existingLine !== newLine) {
|
|
142
|
-
assert.strictEqual(
|
|
143
|
-
newLines.slice(Math.max(0, i - 5), i + 5),
|
|
144
|
-
existingLines.slice(Math.max(0, i - 5), i + 5)
|
|
145
|
-
);
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
} catch (e) {
|
|
149
|
-
if (e instanceof assert.AssertionError) {
|
|
150
|
-
throw e;
|
|
151
|
-
}
|
|
152
|
-
throw new assert.AssertionError({
|
|
153
|
-
actual: newSnapshot,
|
|
154
|
-
expected: "",
|
|
155
|
-
diff: "simple"
|
|
156
|
-
});
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
void describe("align", () => {
|
|
160
|
-
void it("should align Peter and Wendy", async (context) => {
|
|
161
|
-
var _stack = [];
|
|
162
|
-
try {
|
|
163
|
-
const epub = __using(_stack, await Epub.from(
|
|
164
|
-
join(
|
|
165
|
-
"src",
|
|
166
|
-
"align",
|
|
167
|
-
"__fixtures__",
|
|
168
|
-
"peter-and-wendy",
|
|
169
|
-
"text",
|
|
170
|
-
"Peter and Wendy.epub"
|
|
171
|
-
)
|
|
172
|
-
));
|
|
173
|
-
const audiobookDir = join(
|
|
174
|
-
"src",
|
|
175
|
-
"align",
|
|
176
|
-
"__fixtures__",
|
|
177
|
-
"peter-and-wendy",
|
|
178
|
-
"audio"
|
|
179
|
-
);
|
|
180
|
-
const audiobookFiles = await readdir(audiobookDir).then(
|
|
181
|
-
(filenames) => filenames.filter((f) => isAudioFile(f)).map((f) => join(audiobookDir, f))
|
|
182
|
-
);
|
|
183
|
-
const transcriptionsDir = join(
|
|
184
|
-
"src",
|
|
185
|
-
"align",
|
|
186
|
-
"__fixtures__",
|
|
187
|
-
"peter-and-wendy",
|
|
188
|
-
"transcriptions"
|
|
189
|
-
);
|
|
190
|
-
const transcriptionFilepaths = await readdir(transcriptionsDir).then(
|
|
191
|
-
(filenames) => filenames.filter((f) => f.endsWith(".json")).map((f) => join(transcriptionsDir, f))
|
|
192
|
-
);
|
|
193
|
-
const transcriptions = await Promise.all(
|
|
194
|
-
transcriptionFilepaths.map(
|
|
195
|
-
async (p) => readFile(p, { encoding: "utf-8" })
|
|
196
|
-
)
|
|
197
|
-
).then(
|
|
198
|
-
(contents) => contents.map(
|
|
199
|
-
(c) => JSON.parse(c)
|
|
200
|
-
)
|
|
201
|
-
);
|
|
202
|
-
const aligner = new Aligner(
|
|
203
|
-
epub,
|
|
204
|
-
audiobookFiles,
|
|
205
|
-
transcriptions,
|
|
206
|
-
"sentence",
|
|
207
|
-
void 0,
|
|
208
|
-
createTestLogger()
|
|
209
|
-
);
|
|
210
|
-
const timing = await aligner.alignBook();
|
|
211
|
-
if (!process.env["CI"]) timing.print();
|
|
212
|
-
await assertAlignSnapshot(context, epub, transcriptionFilepaths);
|
|
213
|
-
} catch (_) {
|
|
214
|
-
var _error = _, _hasError = true;
|
|
215
|
-
} finally {
|
|
216
|
-
__callDispose(_stack, _error, _hasError);
|
|
217
|
-
}
|
|
218
|
-
});
|
|
219
|
-
});
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
|
-
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
-
var __copyProps = (to, from, except, desc) => {
|
|
9
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
10
|
-
for (let key of __getOwnPropNames(from))
|
|
11
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
12
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
13
|
-
}
|
|
14
|
-
return to;
|
|
15
|
-
};
|
|
16
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
17
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
18
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
19
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
20
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
21
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
22
|
-
mod
|
|
23
|
-
));
|
|
24
|
-
var import_node_assert = __toESM(require("node:assert"), 1);
|
|
25
|
-
var import_node_test = require("node:test");
|
|
26
|
-
var import_slugify = require("../slugify.cjs");
|
|
27
|
-
void (0, import_node_test.describe)("slugify", () => {
|
|
28
|
-
void (0, import_node_test.it)("should slugify numbers", async () => {
|
|
29
|
-
const input = "There were 10 cars.";
|
|
30
|
-
const output = "there-were-ten-cars";
|
|
31
|
-
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("en"));
|
|
32
|
-
import_node_assert.default.strictEqual(result, output);
|
|
33
|
-
});
|
|
34
|
-
void (0, import_node_test.it)("should slugify numbers with decimals", async () => {
|
|
35
|
-
const input = "It was 74.6 degrees out.";
|
|
36
|
-
const output = "it-was-seventy-four-point-six-degrees-out";
|
|
37
|
-
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("en"));
|
|
38
|
-
import_node_assert.default.strictEqual(result, output);
|
|
39
|
-
});
|
|
40
|
-
void (0, import_node_test.it)("should slugify numbers with groupings", async () => {
|
|
41
|
-
const input = "It weighed over 1,000 pounds";
|
|
42
|
-
const output = "it-weighed-over-one-thousand-pounds";
|
|
43
|
-
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("en"));
|
|
44
|
-
import_node_assert.default.strictEqual(result, output);
|
|
45
|
-
});
|
|
46
|
-
void (0, import_node_test.it)("should slugify currency", async () => {
|
|
47
|
-
const input = "It costs $5,500.50.";
|
|
48
|
-
const output = "it-costs-five-thousand-five-hundred-dollars-and-fifty-cents";
|
|
49
|
-
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("en"));
|
|
50
|
-
import_node_assert.default.strictEqual(result, output);
|
|
51
|
-
});
|
|
52
|
-
void (0, import_node_test.it)("should slugify German currency", async () => {
|
|
53
|
-
const input = "Es kostet 5.500,50 \u20AC";
|
|
54
|
-
const output = "es-kostet-funf-tausend-funf-hundert-euro-und-funfzig-cent";
|
|
55
|
-
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("de"));
|
|
56
|
-
import_node_assert.default.strictEqual(result, output);
|
|
57
|
-
});
|
|
58
|
-
void (0, import_node_test.it)("should slugify Chinese currency", async () => {
|
|
59
|
-
const input = "\u4EF7\u683C\u4E3A \xA55,500.50";
|
|
60
|
-
const output = "jie-ge-wei-wu-qian-wu-bai-yuan-wu-shi-fen";
|
|
61
|
-
const { result } = await (0, import_slugify.slugify)(input, new Intl.Locale("zh"));
|
|
62
|
-
import_node_assert.default.strictEqual(result, output);
|
|
63
|
-
});
|
|
64
|
-
});
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
import assert from "node:assert";
|
|
2
|
-
import { describe, it } from "node:test";
|
|
3
|
-
import { slugify } from "../slugify.js";
|
|
4
|
-
void describe("slugify", () => {
|
|
5
|
-
void it("should slugify numbers", async () => {
|
|
6
|
-
const input = "There were 10 cars.";
|
|
7
|
-
const output = "there-were-ten-cars";
|
|
8
|
-
const { result } = await slugify(input, new Intl.Locale("en"));
|
|
9
|
-
assert.strictEqual(result, output);
|
|
10
|
-
});
|
|
11
|
-
void it("should slugify numbers with decimals", async () => {
|
|
12
|
-
const input = "It was 74.6 degrees out.";
|
|
13
|
-
const output = "it-was-seventy-four-point-six-degrees-out";
|
|
14
|
-
const { result } = await slugify(input, new Intl.Locale("en"));
|
|
15
|
-
assert.strictEqual(result, output);
|
|
16
|
-
});
|
|
17
|
-
void it("should slugify numbers with groupings", async () => {
|
|
18
|
-
const input = "It weighed over 1,000 pounds";
|
|
19
|
-
const output = "it-weighed-over-one-thousand-pounds";
|
|
20
|
-
const { result } = await slugify(input, new Intl.Locale("en"));
|
|
21
|
-
assert.strictEqual(result, output);
|
|
22
|
-
});
|
|
23
|
-
void it("should slugify currency", async () => {
|
|
24
|
-
const input = "It costs $5,500.50.";
|
|
25
|
-
const output = "it-costs-five-thousand-five-hundred-dollars-and-fifty-cents";
|
|
26
|
-
const { result } = await slugify(input, new Intl.Locale("en"));
|
|
27
|
-
assert.strictEqual(result, output);
|
|
28
|
-
});
|
|
29
|
-
void it("should slugify German currency", async () => {
|
|
30
|
-
const input = "Es kostet 5.500,50 \u20AC";
|
|
31
|
-
const output = "es-kostet-funf-tausend-funf-hundert-euro-und-funfzig-cent";
|
|
32
|
-
const { result } = await slugify(input, new Intl.Locale("de"));
|
|
33
|
-
assert.strictEqual(result, output);
|
|
34
|
-
});
|
|
35
|
-
void it("should slugify Chinese currency", async () => {
|
|
36
|
-
const input = "\u4EF7\u683C\u4E3A \xA55,500.50";
|
|
37
|
-
const output = "jie-ge-wei-wu-qian-wu-bai-yuan-wu-shi-fen";
|
|
38
|
-
const { result } = await slugify(input, new Intl.Locale("zh"));
|
|
39
|
-
assert.strictEqual(result, output);
|
|
40
|
-
});
|
|
41
|
-
});
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
|
-
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
-
var __copyProps = (to, from, except, desc) => {
|
|
9
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
10
|
-
for (let key of __getOwnPropNames(from))
|
|
11
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
12
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
13
|
-
}
|
|
14
|
-
return to;
|
|
15
|
-
};
|
|
16
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
17
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
18
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
19
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
20
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
21
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
22
|
-
mod
|
|
23
|
-
));
|
|
24
|
-
var import_node_assert = __toESM(require("node:assert"), 1);
|
|
25
|
-
var import_node_test = require("node:test");
|
|
26
|
-
var import_editDistance = require("../editDistance.cjs");
|
|
27
|
-
var import_errorAlign = require("../errorAlign.cjs");
|
|
28
|
-
var import_utils = require("../utils.cjs");
|
|
29
|
-
void (0, import_node_test.describe)("errorAlign", () => {
|
|
30
|
-
void (0, import_node_test.test)("error alignment for an example including all substitution types", () => {
|
|
31
|
-
const ref = "This is a substitution test deleted.";
|
|
32
|
-
const hyp = "Inserted this is a contribution test.";
|
|
33
|
-
const alignments = (0, import_errorAlign.errorAlign)(ref, hyp);
|
|
34
|
-
import_node_assert.default.deepStrictEqual(
|
|
35
|
-
alignments.map((a) => a.opType),
|
|
36
|
-
["INSERT", "MATCH", "MATCH", "MATCH", "SUBSTITUTE", "MATCH", "DELETE"]
|
|
37
|
-
);
|
|
38
|
-
});
|
|
39
|
-
void (0, import_node_test.test)("error alignment for full match", () => {
|
|
40
|
-
const ref = "This is a test.";
|
|
41
|
-
const hyp = "This is a test.";
|
|
42
|
-
const alignments = (0, import_errorAlign.errorAlign)(ref, hyp);
|
|
43
|
-
import_node_assert.default.deepStrictEqual(
|
|
44
|
-
alignments.map((a) => a.opType),
|
|
45
|
-
["MATCH", "MATCH", "MATCH", "MATCH"]
|
|
46
|
-
);
|
|
47
|
-
});
|
|
48
|
-
void (0, import_node_test.test)("error alignment for partial substitutions and insertions with compound markers", () => {
|
|
49
|
-
var _a, _b;
|
|
50
|
-
const ref = "test";
|
|
51
|
-
const hyp = "testpartial";
|
|
52
|
-
const alignments = (0, import_errorAlign.errorAlign)(ref, hyp);
|
|
53
|
-
import_node_assert.default.strictEqual(alignments.length, 2);
|
|
54
|
-
import_node_assert.default.strictEqual((_a = alignments[0]) == null ? void 0 : _a.opType, "SUBSTITUTE");
|
|
55
|
-
import_node_assert.default.strictEqual(alignments[0].leftCompound, false);
|
|
56
|
-
import_node_assert.default.strictEqual(alignments[0].rightCompound, true);
|
|
57
|
-
import_node_assert.default.strictEqual((_b = alignments[1]) == null ? void 0 : _b.opType, "INSERT");
|
|
58
|
-
import_node_assert.default.strictEqual(alignments[1].leftCompound, true);
|
|
59
|
-
import_node_assert.default.strictEqual(alignments[1].rightCompound, false);
|
|
60
|
-
});
|
|
61
|
-
});
|
|
62
|
-
void (0, import_node_test.test)("character categorization", () => {
|
|
63
|
-
import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("<"), 0);
|
|
64
|
-
import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("b"), 1);
|
|
65
|
-
import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("a"), 2);
|
|
66
|
-
import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("'"), 3);
|
|
67
|
-
});
|
|
68
|
-
void (0, import_node_test.test)("string representation of alignment objects", () => {
|
|
69
|
-
const deleteAlignment = (0, import_errorAlign.errorAlign)("deleted", "")[0];
|
|
70
|
-
import_node_assert.default.strictEqual(
|
|
71
|
-
deleteAlignment == null ? void 0 : deleteAlignment.toString(),
|
|
72
|
-
'Alignment(DELETE: "deleted")'
|
|
73
|
-
);
|
|
74
|
-
const insertAlignment = (0, import_errorAlign.errorAlign)("", "inserted")[0];
|
|
75
|
-
import_node_assert.default.strictEqual(
|
|
76
|
-
insertAlignment == null ? void 0 : insertAlignment.toString(),
|
|
77
|
-
'Alignment(INSERT: "inserted")'
|
|
78
|
-
);
|
|
79
|
-
const substituteAlignment = (0, import_errorAlign.errorAlign)(
|
|
80
|
-
"substitution",
|
|
81
|
-
"substitutiontesting"
|
|
82
|
-
)[0];
|
|
83
|
-
import_node_assert.default.strictEqual(substituteAlignment == null ? void 0 : substituteAlignment.leftCompound, false);
|
|
84
|
-
import_node_assert.default.strictEqual(substituteAlignment.rightCompound, true);
|
|
85
|
-
import_node_assert.default.strictEqual(
|
|
86
|
-
substituteAlignment.toString(),
|
|
87
|
-
'Alignment(SUBSTITUTE: "substitution"- -> "substitution")'
|
|
88
|
-
);
|
|
89
|
-
const matchAlignment = (0, import_errorAlign.errorAlign)("test", "test")[0];
|
|
90
|
-
import_node_assert.default.strictEqual(
|
|
91
|
-
matchAlignment == null ? void 0 : matchAlignment.toString(),
|
|
92
|
-
'Alignment(MATCH: "test" == "test")'
|
|
93
|
-
);
|
|
94
|
-
});
|
|
95
|
-
void (0, import_node_test.test)("Levenshtein distance matrix computation", () => {
|
|
96
|
-
const ref = "kitten";
|
|
97
|
-
const hyp = "sitting";
|
|
98
|
-
const distanceMatrix = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp);
|
|
99
|
-
import_node_assert.default.strictEqual(distanceMatrix.at(-1).at(-1), 3);
|
|
100
|
-
});
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
import assert from "node:assert";
|
|
2
|
-
import { describe, test } from "node:test";
|
|
3
|
-
import { computeLevenshteinDistanceMatrix } from "../editDistance.js";
|
|
4
|
-
import { errorAlign } from "../errorAlign.js";
|
|
5
|
-
import { categorizeChar } from "../utils.js";
|
|
6
|
-
void describe("errorAlign", () => {
|
|
7
|
-
void test("error alignment for an example including all substitution types", () => {
|
|
8
|
-
const ref = "This is a substitution test deleted.";
|
|
9
|
-
const hyp = "Inserted this is a contribution test.";
|
|
10
|
-
const alignments = errorAlign(ref, hyp);
|
|
11
|
-
assert.deepStrictEqual(
|
|
12
|
-
alignments.map((a) => a.opType),
|
|
13
|
-
["INSERT", "MATCH", "MATCH", "MATCH", "SUBSTITUTE", "MATCH", "DELETE"]
|
|
14
|
-
);
|
|
15
|
-
});
|
|
16
|
-
void test("error alignment for full match", () => {
|
|
17
|
-
const ref = "This is a test.";
|
|
18
|
-
const hyp = "This is a test.";
|
|
19
|
-
const alignments = errorAlign(ref, hyp);
|
|
20
|
-
assert.deepStrictEqual(
|
|
21
|
-
alignments.map((a) => a.opType),
|
|
22
|
-
["MATCH", "MATCH", "MATCH", "MATCH"]
|
|
23
|
-
);
|
|
24
|
-
});
|
|
25
|
-
void test("error alignment for partial substitutions and insertions with compound markers", () => {
|
|
26
|
-
var _a, _b;
|
|
27
|
-
const ref = "test";
|
|
28
|
-
const hyp = "testpartial";
|
|
29
|
-
const alignments = errorAlign(ref, hyp);
|
|
30
|
-
assert.strictEqual(alignments.length, 2);
|
|
31
|
-
assert.strictEqual((_a = alignments[0]) == null ? void 0 : _a.opType, "SUBSTITUTE");
|
|
32
|
-
assert.strictEqual(alignments[0].leftCompound, false);
|
|
33
|
-
assert.strictEqual(alignments[0].rightCompound, true);
|
|
34
|
-
assert.strictEqual((_b = alignments[1]) == null ? void 0 : _b.opType, "INSERT");
|
|
35
|
-
assert.strictEqual(alignments[1].leftCompound, true);
|
|
36
|
-
assert.strictEqual(alignments[1].rightCompound, false);
|
|
37
|
-
});
|
|
38
|
-
});
|
|
39
|
-
void test("character categorization", () => {
|
|
40
|
-
assert.strictEqual(categorizeChar("<"), 0);
|
|
41
|
-
assert.strictEqual(categorizeChar("b"), 1);
|
|
42
|
-
assert.strictEqual(categorizeChar("a"), 2);
|
|
43
|
-
assert.strictEqual(categorizeChar("'"), 3);
|
|
44
|
-
});
|
|
45
|
-
void test("string representation of alignment objects", () => {
|
|
46
|
-
const deleteAlignment = errorAlign("deleted", "")[0];
|
|
47
|
-
assert.strictEqual(
|
|
48
|
-
deleteAlignment == null ? void 0 : deleteAlignment.toString(),
|
|
49
|
-
'Alignment(DELETE: "deleted")'
|
|
50
|
-
);
|
|
51
|
-
const insertAlignment = errorAlign("", "inserted")[0];
|
|
52
|
-
assert.strictEqual(
|
|
53
|
-
insertAlignment == null ? void 0 : insertAlignment.toString(),
|
|
54
|
-
'Alignment(INSERT: "inserted")'
|
|
55
|
-
);
|
|
56
|
-
const substituteAlignment = errorAlign(
|
|
57
|
-
"substitution",
|
|
58
|
-
"substitutiontesting"
|
|
59
|
-
)[0];
|
|
60
|
-
assert.strictEqual(substituteAlignment == null ? void 0 : substituteAlignment.leftCompound, false);
|
|
61
|
-
assert.strictEqual(substituteAlignment.rightCompound, true);
|
|
62
|
-
assert.strictEqual(
|
|
63
|
-
substituteAlignment.toString(),
|
|
64
|
-
'Alignment(SUBSTITUTE: "substitution"- -> "substitution")'
|
|
65
|
-
);
|
|
66
|
-
const matchAlignment = errorAlign("test", "test")[0];
|
|
67
|
-
assert.strictEqual(
|
|
68
|
-
matchAlignment == null ? void 0 : matchAlignment.toString(),
|
|
69
|
-
'Alignment(MATCH: "test" == "test")'
|
|
70
|
-
);
|
|
71
|
-
});
|
|
72
|
-
void test("Levenshtein distance matrix computation", () => {
|
|
73
|
-
const ref = "kitten";
|
|
74
|
-
const hyp = "sitting";
|
|
75
|
-
const distanceMatrix = computeLevenshteinDistanceMatrix(ref, hyp);
|
|
76
|
-
assert.strictEqual(distanceMatrix.at(-1).at(-1), 3);
|
|
77
|
-
});
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
|
-
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
-
var __copyProps = (to, from, except, desc) => {
|
|
9
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
10
|
-
for (let key of __getOwnPropNames(from))
|
|
11
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
12
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
13
|
-
}
|
|
14
|
-
return to;
|
|
15
|
-
};
|
|
16
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
17
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
18
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
19
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
20
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
21
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
22
|
-
mod
|
|
23
|
-
));
|
|
24
|
-
var import_node_assert = __toESM(require("node:assert"), 1);
|
|
25
|
-
var import_node_test = require("node:test");
|
|
26
|
-
var import_beamSearch = require("../beamSearch.cjs");
|
|
27
|
-
var import_editDistance = require("../editDistance.cjs");
|
|
28
|
-
var import_graphMetadata = require("../graphMetadata.cjs");
|
|
29
|
-
var import_native = require("../native.cjs");
|
|
30
|
-
var import_pathToAlignment = require("../pathToAlignment.cjs");
|
|
31
|
-
var import_utils = require("../utils.cjs");
|
|
32
|
-
void (0, import_node_test.describe)("native C++ vs TypeScript implementations", () => {
|
|
33
|
-
void (0, import_node_test.describe)("Levenshtein distance matrix", () => {
|
|
34
|
-
void (0, import_node_test.test)("string input", () => {
|
|
35
|
-
const ref = "kitten";
|
|
36
|
-
const hyp = "sitting";
|
|
37
|
-
const tsResult = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp);
|
|
38
|
-
const nativeResult = (0, import_native.computeLevenshteinDistanceMatrix)(ref, hyp);
|
|
39
|
-
import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
|
|
40
|
-
});
|
|
41
|
-
void (0, import_node_test.test)("string array input", () => {
|
|
42
|
-
const ref = ["hello", "world"];
|
|
43
|
-
const hyp = ["hello", "there"];
|
|
44
|
-
const tsResult = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp);
|
|
45
|
-
const nativeResult = (0, import_native.computeLevenshteinDistanceMatrix)(ref, hyp);
|
|
46
|
-
import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
|
|
47
|
-
});
|
|
48
|
-
void (0, import_node_test.test)("with backtrace", () => {
|
|
49
|
-
const ref = "kitten";
|
|
50
|
-
const hyp = "sitting";
|
|
51
|
-
const tsResult = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp, true);
|
|
52
|
-
const nativeResult = (0, import_native.computeLevenshteinDistanceMatrix)(ref, hyp, true);
|
|
53
|
-
import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
|
|
54
|
-
});
|
|
55
|
-
});
|
|
56
|
-
void (0, import_node_test.describe)("error align distance matrix", () => {
|
|
57
|
-
void (0, import_node_test.test)("string input", () => {
|
|
58
|
-
const ref = "test";
|
|
59
|
-
const hyp = "best";
|
|
60
|
-
const tsResult = (0, import_editDistance.computeErrorAlignDistanceMatrix)(ref, hyp);
|
|
61
|
-
const nativeResult = (0, import_native.computeErrorAlignDistanceMatrix)(ref, hyp);
|
|
62
|
-
import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
|
|
63
|
-
});
|
|
64
|
-
void (0, import_node_test.test)("with backtrace", () => {
|
|
65
|
-
const ref = "test";
|
|
66
|
-
const hyp = "best";
|
|
67
|
-
const tsResult = (0, import_editDistance.computeErrorAlignDistanceMatrix)(ref, hyp, true);
|
|
68
|
-
const nativeResult = (0, import_native.computeErrorAlignDistanceMatrix)(ref, hyp, true);
|
|
69
|
-
import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
|
|
70
|
-
});
|
|
71
|
-
});
|
|
72
|
-
void (0, import_node_test.describe)("beam search", () => {
|
|
73
|
-
function buildSubgraphMetadata(ref, hyp) {
|
|
74
|
-
const tokenizer = import_utils.basicTokenizer;
|
|
75
|
-
const normalizer = import_utils.basicNormalizer;
|
|
76
|
-
const unpackedTokenizer = (0, import_utils.unpackRegexMatch)(tokenizer);
|
|
77
|
-
const ensuredNormalizer = (0, import_utils.ensureLengthPreservation)(normalizer);
|
|
78
|
-
const refTokenMatches = unpackedTokenizer(ref);
|
|
79
|
-
const hypTokenMatches = unpackedTokenizer(hyp);
|
|
80
|
-
const refNorm = refTokenMatches.map(([r]) => ensuredNormalizer(r));
|
|
81
|
-
const hypNorm = hypTokenMatches.map(([h]) => ensuredNormalizer(h));
|
|
82
|
-
return new import_graphMetadata.SubgraphMetadata(
|
|
83
|
-
ref,
|
|
84
|
-
hyp,
|
|
85
|
-
refTokenMatches,
|
|
86
|
-
hypTokenMatches,
|
|
87
|
-
refNorm,
|
|
88
|
-
hypNorm
|
|
89
|
-
);
|
|
90
|
-
}
|
|
91
|
-
void (0, import_node_test.test)("simple substitution", () => {
|
|
92
|
-
const src = buildSubgraphMetadata("hello", "jello");
|
|
93
|
-
const tsPath = (0, import_beamSearch.errorAlignBeamSearch)(src);
|
|
94
|
-
const nativePath = (0, import_native.errorAlignBeamSearch)(src);
|
|
95
|
-
const tsAlignments = (0, import_pathToAlignment.getAlignments)(tsPath);
|
|
96
|
-
const nativeAlignments = (0, import_pathToAlignment.getAlignments)(nativePath);
|
|
97
|
-
import_node_assert.default.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
98
|
-
});
|
|
99
|
-
void (0, import_node_test.test)("multi-word alignment with all op types", () => {
|
|
100
|
-
const ref = "This is a substitution test deleted.";
|
|
101
|
-
const hyp = "Inserted this is a contribution test.";
|
|
102
|
-
const src = buildSubgraphMetadata(ref, hyp);
|
|
103
|
-
const tsPath = (0, import_beamSearch.errorAlignBeamSearch)(src);
|
|
104
|
-
const nativePath = (0, import_native.errorAlignBeamSearch)(src);
|
|
105
|
-
const tsAlignments = (0, import_pathToAlignment.getAlignments)(tsPath);
|
|
106
|
-
const nativeAlignments = (0, import_pathToAlignment.getAlignments)(nativePath);
|
|
107
|
-
import_node_assert.default.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
108
|
-
});
|
|
109
|
-
void (0, import_node_test.test)("identical strings", () => {
|
|
110
|
-
const src = buildSubgraphMetadata("test words", "test words");
|
|
111
|
-
const tsPath = (0, import_beamSearch.errorAlignBeamSearch)(src);
|
|
112
|
-
const nativePath = (0, import_native.errorAlignBeamSearch)(src);
|
|
113
|
-
const tsAlignments = (0, import_pathToAlignment.getAlignments)(tsPath);
|
|
114
|
-
const nativeAlignments = (0, import_pathToAlignment.getAlignments)(nativePath);
|
|
115
|
-
import_node_assert.default.deepStrictEqual(nativeAlignments, tsAlignments);
|
|
116
|
-
});
|
|
117
|
-
});
|
|
118
|
-
});
|