@storyteller-platform/align 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/dist/align/__tests__/align.test.cjs +6 -5
  2. package/dist/align/__tests__/align.test.js +6 -5
  3. package/dist/align/align.cjs +133 -81
  4. package/dist/align/align.d.cts +1 -0
  5. package/dist/align/align.d.ts +1 -0
  6. package/dist/align/align.js +133 -81
  7. package/dist/align/getSentenceRanges.cjs +78 -149
  8. package/dist/align/getSentenceRanges.d.cts +1 -1
  9. package/dist/align/getSentenceRanges.d.ts +1 -1
  10. package/dist/align/getSentenceRanges.js +78 -149
  11. package/dist/align/slugify.cjs +16 -8
  12. package/dist/align/slugify.js +16 -8
  13. package/dist/errorAlign/__tests__/errorAlign.test.cjs +100 -0
  14. package/dist/errorAlign/__tests__/errorAlign.test.d.cts +2 -0
  15. package/dist/errorAlign/__tests__/errorAlign.test.d.ts +2 -0
  16. package/dist/errorAlign/__tests__/errorAlign.test.js +77 -0
  17. package/dist/errorAlign/__tests__/native.test.cjs +118 -0
  18. package/dist/errorAlign/__tests__/native.test.d.cts +2 -0
  19. package/dist/errorAlign/__tests__/native.test.d.ts +2 -0
  20. package/dist/errorAlign/__tests__/native.test.js +107 -0
  21. package/dist/errorAlign/backtraceGraph.cjs +298 -0
  22. package/dist/errorAlign/backtraceGraph.d.cts +103 -0
  23. package/dist/errorAlign/backtraceGraph.d.ts +103 -0
  24. package/dist/errorAlign/backtraceGraph.js +270 -0
  25. package/dist/errorAlign/beamSearch.cjs +302 -0
  26. package/dist/errorAlign/beamSearch.d.cts +53 -0
  27. package/dist/errorAlign/beamSearch.d.ts +53 -0
  28. package/dist/errorAlign/beamSearch.js +268 -0
  29. package/dist/errorAlign/core.cjs +33 -0
  30. package/dist/errorAlign/core.d.cts +5 -0
  31. package/dist/errorAlign/core.d.ts +5 -0
  32. package/dist/errorAlign/core.js +11 -0
  33. package/dist/errorAlign/editDistance.cjs +115 -0
  34. package/dist/errorAlign/editDistance.d.cts +46 -0
  35. package/dist/errorAlign/editDistance.d.ts +46 -0
  36. package/dist/errorAlign/editDistance.js +90 -0
  37. package/dist/errorAlign/errorAlign.cjs +159 -0
  38. package/dist/errorAlign/errorAlign.d.cts +15 -0
  39. package/dist/errorAlign/errorAlign.d.ts +15 -0
  40. package/dist/errorAlign/errorAlign.js +145 -0
  41. package/dist/errorAlign/graphMetadata.cjs +97 -0
  42. package/dist/errorAlign/graphMetadata.d.cts +44 -0
  43. package/dist/errorAlign/graphMetadata.d.ts +44 -0
  44. package/dist/errorAlign/graphMetadata.js +64 -0
  45. package/dist/errorAlign/hash.cjs +173 -0
  46. package/dist/errorAlign/hash.d.cts +28 -0
  47. package/dist/errorAlign/hash.d.ts +28 -0
  48. package/dist/errorAlign/hash.js +150 -0
  49. package/dist/errorAlign/native.cjs +60 -0
  50. package/dist/errorAlign/native.d.cts +18 -0
  51. package/dist/errorAlign/native.d.ts +18 -0
  52. package/dist/errorAlign/native.js +24 -0
  53. package/dist/errorAlign/node-gyp-build.d.cjs +1 -0
  54. package/dist/errorAlign/node-gyp-build.d.d.cts +3 -0
  55. package/dist/errorAlign/node-gyp-build.d.d.ts +3 -0
  56. package/dist/errorAlign/node-gyp-build.d.js +0 -0
  57. package/dist/errorAlign/pathToAlignment.cjs +122 -0
  58. package/dist/errorAlign/pathToAlignment.d.cts +11 -0
  59. package/dist/errorAlign/pathToAlignment.d.ts +11 -0
  60. package/dist/errorAlign/pathToAlignment.js +89 -0
  61. package/dist/errorAlign/utils.cjs +301 -0
  62. package/dist/errorAlign/utils.d.cts +107 -0
  63. package/dist/errorAlign/utils.d.ts +107 -0
  64. package/dist/errorAlign/utils.js +248 -0
  65. package/dist/index.d.cts +1 -0
  66. package/dist/index.d.ts +1 -0
  67. package/dist/markup/__tests__/markup.test.cjs +108 -81
  68. package/dist/markup/__tests__/markup.test.js +109 -82
  69. package/dist/markup/__tests__/parseDom.test.cjs +112 -0
  70. package/dist/markup/__tests__/parseDom.test.d.cts +2 -0
  71. package/dist/markup/__tests__/parseDom.test.d.ts +2 -0
  72. package/dist/markup/__tests__/parseDom.test.js +89 -0
  73. package/dist/markup/__tests__/serializeDom.test.cjs +120 -0
  74. package/dist/markup/__tests__/serializeDom.test.d.cts +2 -0
  75. package/dist/markup/__tests__/serializeDom.test.d.ts +2 -0
  76. package/dist/markup/__tests__/serializeDom.test.js +97 -0
  77. package/dist/markup/__tests__/transform.test.cjs +122 -0
  78. package/dist/markup/__tests__/transform.test.d.cts +2 -0
  79. package/dist/markup/__tests__/transform.test.d.ts +2 -0
  80. package/dist/markup/__tests__/transform.test.js +99 -0
  81. package/dist/markup/map.cjs +261 -0
  82. package/dist/markup/map.d.cts +50 -0
  83. package/dist/markup/map.d.ts +50 -0
  84. package/dist/markup/map.js +236 -0
  85. package/dist/markup/markup.cjs +23 -201
  86. package/dist/markup/markup.d.cts +5 -9
  87. package/dist/markup/markup.d.ts +5 -9
  88. package/dist/markup/markup.js +24 -203
  89. package/dist/markup/model.cjs +172 -0
  90. package/dist/markup/model.d.cts +57 -0
  91. package/dist/markup/model.d.ts +57 -0
  92. package/dist/markup/model.js +145 -0
  93. package/dist/markup/parseDom.cjs +59 -0
  94. package/dist/markup/parseDom.d.cts +7 -0
  95. package/dist/markup/parseDom.d.ts +7 -0
  96. package/dist/markup/parseDom.js +35 -0
  97. package/dist/markup/segmentation.cjs +11 -57
  98. package/dist/markup/segmentation.d.cts +6 -2
  99. package/dist/markup/segmentation.d.ts +6 -2
  100. package/dist/markup/segmentation.js +11 -58
  101. package/dist/markup/serializeDom.cjs +87 -0
  102. package/dist/markup/serializeDom.d.cts +7 -0
  103. package/dist/markup/serializeDom.d.ts +7 -0
  104. package/dist/markup/serializeDom.js +63 -0
  105. package/dist/markup/transform.cjs +92 -0
  106. package/dist/markup/transform.d.cts +11 -0
  107. package/dist/markup/transform.d.ts +11 -0
  108. package/dist/markup/transform.js +71 -0
  109. package/dist/types/node-gyp-build.d.cjs +1 -0
  110. package/dist/types/node-gyp-build.d.d.cts +3 -0
  111. package/dist/types/node-gyp-build.d.d.ts +3 -0
  112. package/dist/types/node-gyp-build.d.js +0 -0
  113. package/package.json +11 -4
@@ -1,27 +1,8 @@
1
1
  import "../chunk-BIEQXUOY.js";
2
- import { segmentText } from "@echogarden/text-segmentation";
2
+ import { enumerate } from "itertools";
3
3
  import { getTrackDuration } from "../common/ffmpeg.js";
4
- import { findNearestMatch } from "./fuzzy.js";
4
+ import { errorAlign } from "../errorAlign/errorAlign.js";
5
5
  import { slugify } from "./slugify.js";
6
- async function getSentencesWithOffsets(text) {
7
- const sentences = await segmentText(text).then(
8
- (r) => r.sentences.map((s) => s.text)
9
- );
10
- const sentencesWithOffsets = [];
11
- let lastSentenceEnd = 0;
12
- for (const sentence of sentences) {
13
- const sentenceStart = text.indexOf(sentence, lastSentenceEnd);
14
- if (sentenceStart > lastSentenceEnd) {
15
- sentencesWithOffsets.push(text.slice(lastSentenceEnd, sentenceStart));
16
- }
17
- sentencesWithOffsets.push(sentence);
18
- lastSentenceEnd = sentenceStart + sentence.length;
19
- }
20
- if (text.length > lastSentenceEnd) {
21
- sentencesWithOffsets.push(text.slice(lastSentenceEnd));
22
- }
23
- return sentencesWithOffsets;
24
- }
25
6
  function findStartTimestamp(matchStartIndex, transcription) {
26
7
  const entry = transcription.timeline.find(
27
8
  (entry2) => (entry2.endOffsetUtf16 ?? 0) > matchStartIndex
@@ -39,144 +20,92 @@ function findEndTimestamp(matchEndIndex, transcription) {
39
20
  );
40
21
  return (entry == null ? void 0 : entry.endTime) ?? null;
41
22
  }
42
- function getWindowIndexFromOffset(window, offset) {
43
- let index = 0;
44
- while (index < window.length - 1 && offset >= window[index].length) {
45
- offset -= window[index].length;
46
- index += 1;
23
+ function getAlignmentsForSentence(sentence, alignments) {
24
+ const result = [];
25
+ let sentenceIndex = 0;
26
+ for (const alignment of alignments) {
27
+ if (sentenceIndex === sentence.length) break;
28
+ if (alignment.opType !== "INSERT") {
29
+ sentenceIndex += alignment.ref.length + (sentenceIndex === 0 ? 0 : 1);
30
+ }
31
+ result.push(alignment);
47
32
  }
48
- return { index, offset };
33
+ return result;
49
34
  }
50
- function collapseWhitespace(input) {
51
- return input.replaceAll(/\s+/g, " ");
52
- }
53
- async function getSentenceRanges(startSentence, transcription, sentences, chapterOffset, locale, lastSentenceRange) {
35
+ async function getSentenceRanges(startSentence, endSentence, transcription, sentences, chapterOffset, chapterEndOffset, locale) {
54
36
  const sentenceRanges = [];
55
- const fullTranscriptionText = transcription.transcript;
56
- const transcriptionText = fullTranscriptionText.slice(chapterOffset);
57
- const transcriptionSentences = await getSentencesWithOffsets(
58
- transcriptionText
59
- ).then((s) => s.map((sentence) => sentence.toLowerCase()));
60
- let startSentenceEntry = startSentence;
61
- const sentenceEntries = [];
62
- for (let i = 0; i < sentences.length; i++) {
63
- const sentence = (await slugify(sentences[i], locale)).result;
64
- if (sentence.length <= 3) {
65
- if (i < startSentence) startSentenceEntry--;
66
- continue;
67
- }
68
- sentenceEntries.push([i, sentence]);
69
- }
70
- let transcriptionWindowIndex = 0;
71
- let transcriptionWindowOffset = 0;
72
- let lastGoodTranscriptionWindow = 0;
73
- let notFound = 0;
74
- let sentenceIndex = startSentenceEntry;
75
- let lastMatchEnd = chapterOffset;
76
- while (sentenceIndex < sentenceEntries.length) {
77
- const [sentenceId, sentence] = sentenceEntries[sentenceIndex];
78
- const transcriptionWindowList = transcriptionSentences.slice(
79
- transcriptionWindowIndex,
80
- transcriptionWindowIndex + 10
81
- );
82
- const { result: transcriptionWindow, mapping } = await slugify(
83
- transcriptionWindowList.join("-").slice(transcriptionWindowOffset),
84
- locale
85
- );
86
- const inverted = mapping.invert();
87
- const query = collapseWhitespace(sentence.trim()).toLowerCase();
88
- const firstMatch = findNearestMatch(
89
- query,
90
- transcriptionWindow,
91
- Math.max(Math.floor(0.25 * query.length), 1)
92
- );
93
- if (!firstMatch) {
94
- sentenceIndex += 1;
95
- notFound += 1;
96
- if (notFound === 3 || sentenceIndex === sentenceEntries.length) {
97
- transcriptionWindowIndex += 1;
98
- if (transcriptionWindowIndex == lastGoodTranscriptionWindow + 30) {
99
- transcriptionWindowIndex = lastGoodTranscriptionWindow;
100
- notFound = 0;
101
- continue;
102
- }
103
- sentenceIndex -= notFound;
104
- notFound = 0;
105
- }
106
- continue;
37
+ const fullTranscript = transcription.transcript;
38
+ const chapterTranscript = fullTranscript.slice(
39
+ chapterOffset,
40
+ chapterEndOffset
41
+ );
42
+ const { result: slugifiedChapterTranscript, mapping: transcriptMapping } = await slugify(chapterTranscript, locale);
43
+ let chapterTranscriptEndIndex = chapterOffset;
44
+ let chapterSentenceIndex = startSentence;
45
+ let slugifiedChapterTranscriptWindowStartIndex = 0;
46
+ while (chapterSentenceIndex < endSentence) {
47
+ const slugifiedChapterSentenceWindowList = [];
48
+ let sentenceWindowLength = 0;
49
+ let i = chapterSentenceIndex;
50
+ while (sentenceWindowLength < 5e3 && i < sentences.length) {
51
+ const { result: sentence } = await slugify(sentences[i], locale);
52
+ slugifiedChapterSentenceWindowList.push(sentence);
53
+ sentenceWindowLength += sentence.length;
54
+ i++;
107
55
  }
108
- const transcriptionOffset = transcriptionSentences.slice(0, transcriptionWindowIndex).join("").length;
109
- const matchStart = inverted.map(firstMatch.index, 1);
110
- const matchEnd = inverted.map(
111
- firstMatch.index + firstMatch.match.length,
112
- -1
56
+ const slugifiedChapterSentenceWindow = slugifiedChapterSentenceWindowList.join("-");
57
+ const slugifiedChapterTranscriptWindow = slugifiedChapterTranscript.slice(
58
+ slugifiedChapterTranscriptWindowStartIndex,
59
+ slugifiedChapterTranscriptWindowStartIndex + sentenceWindowLength * 1.2
113
60
  );
114
- const startResult = findStartTimestamp(
115
- matchStart + transcriptionOffset + transcriptionWindowOffset + chapterOffset,
116
- transcription
61
+ const alignments = errorAlign(
62
+ slugifiedChapterSentenceWindow,
63
+ slugifiedChapterTranscriptWindow
117
64
  );
118
- if (!startResult) {
119
- sentenceIndex += 1;
120
- continue;
121
- }
122
- let start = startResult.start;
123
- const audiofile = startResult.audiofile;
124
- const end = findEndTimestamp(
125
- matchEnd + transcriptionOffset + transcriptionWindowOffset + chapterOffset,
126
- transcription
127
- ) ?? startResult.end;
128
- if (sentenceRanges.length > 0) {
129
- const previousSentenceRange = sentenceRanges[sentenceRanges.length - 1];
130
- const previousAudiofile = previousSentenceRange.audiofile;
131
- if (audiofile === previousAudiofile) {
132
- if (previousSentenceRange.id === sentenceId - 1) {
133
- previousSentenceRange.end = start;
134
- }
135
- } else {
136
- if (previousSentenceRange.id === sentenceId - 1) {
137
- const lastTrackDuration = await getTrackDuration(previousAudiofile);
138
- previousSentenceRange.end = lastTrackDuration;
139
- start = 0;
140
- }
65
+ let alignmentIndex = 0;
66
+ let currentTranscriptWindowIndex = 0;
67
+ for (const [i2, slugifiedSentence] of enumerate(
68
+ slugifiedChapterSentenceWindowList
69
+ )) {
70
+ if (!slugifiedSentence) continue;
71
+ const sentenceAlignments = getAlignmentsForSentence(
72
+ slugifiedSentence,
73
+ alignments.slice(alignmentIndex)
74
+ );
75
+ const sentenceLengthInSlugifiedTranscript = sentenceAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
76
+ const start = findStartTimestamp(
77
+ chapterOffset + transcriptMapping.invert().map(
78
+ slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
79
+ 1
80
+ ),
81
+ transcription
82
+ );
83
+ chapterTranscriptEndIndex = chapterOffset + transcriptMapping.invert().map(
84
+ slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex + sentenceLengthInSlugifiedTranscript,
85
+ -1
86
+ );
87
+ const end = findEndTimestamp(chapterTranscriptEndIndex, transcription);
88
+ if (start && end !== null) {
89
+ sentenceRanges.push({
90
+ id: i2 + chapterSentenceIndex,
91
+ start: start.start,
92
+ audiofile: start.audiofile,
93
+ end
94
+ });
141
95
  }
142
- } else if (lastSentenceRange !== null) {
143
- if (audiofile === lastSentenceRange.audiofile) {
144
- if (sentenceId === 0) {
145
- lastSentenceRange.end = start;
146
- }
147
- } else {
148
- const lastTrackDuration = await getTrackDuration(
149
- lastSentenceRange.audiofile
150
- );
151
- lastSentenceRange.end = lastTrackDuration;
152
- if (sentenceId === 0) {
153
- start = 0;
154
- }
96
+ alignmentIndex += sentenceAlignments.length;
97
+ currentTranscriptWindowIndex += sentenceLengthInSlugifiedTranscript;
98
+ if (slugifiedChapterTranscriptWindow[currentTranscriptWindowIndex] === "-") {
99
+ currentTranscriptWindowIndex++;
155
100
  }
156
- } else if (sentenceId === 0) {
157
- start = 0;
158
101
  }
159
- sentenceRanges.push({
160
- id: sentenceId,
161
- start,
162
- end,
163
- audiofile
164
- });
165
- notFound = 0;
166
- lastMatchEnd = matchEnd + transcriptionOffset + transcriptionWindowOffset + chapterOffset;
167
- const windowIndexResult = getWindowIndexFromOffset(
168
- transcriptionWindowList,
169
- matchEnd + transcriptionWindowOffset
170
- );
171
- transcriptionWindowIndex += windowIndexResult.index;
172
- transcriptionWindowOffset = windowIndexResult.offset;
173
- lastGoodTranscriptionWindow = transcriptionWindowIndex;
174
- sentenceIndex += 1;
102
+ chapterSentenceIndex += slugifiedChapterSentenceWindowList.length;
103
+ slugifiedChapterTranscriptWindowStartIndex += currentTranscriptWindowIndex;
104
+ if (slugifiedChapterTranscript[slugifiedChapterTranscriptWindowStartIndex] === "-") {
105
+ slugifiedChapterTranscriptWindowStartIndex++;
106
+ }
175
107
  }
176
- return {
177
- sentenceRanges,
178
- transcriptionOffset: lastMatchEnd
179
- };
108
+ return { sentenceRanges, transcriptionOffset: chapterTranscriptEndIndex };
180
109
  }
181
110
  async function getLargestGap(trailing, leading) {
182
111
  const leadingGap = leading.start;
@@ -67,11 +67,15 @@ function createReplacers(locale) {
67
67
  const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${currencySymbols.group}`, "g"), "").replace(new RegExp(`\\${currencySymbols.decimal}`), ".");
68
68
  const number = parseFloat(normalizedNumeral);
69
69
  if (Number.isNaN(number)) return match[0];
70
- return (0, import_to_words.toWords)(number, {
71
- localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`,
72
- currency: true,
73
- doNotAddOnly: true
74
- });
70
+ try {
71
+ return (0, import_to_words.toWords)(number, {
72
+ localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`,
73
+ currency: true,
74
+ doNotAddOnly: true
75
+ });
76
+ } catch {
77
+ return match[0];
78
+ }
75
79
  }
76
80
  const numberFormat = new Intl.NumberFormat(locale);
77
81
  const numberParts = numberFormat.formatToParts(demoNumber);
@@ -103,9 +107,13 @@ function createReplacers(locale) {
103
107
  const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${numberSymbols.group}`, "g"), "").replace(new RegExp(`\\${numberSymbols.decimal}`), ".");
104
108
  const number = parseFloat(normalizedNumeral);
105
109
  if (Number.isNaN(number)) return match[0];
106
- return (0, import_to_words.toWords)(number, {
107
- localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`
108
- });
110
+ try {
111
+ return (0, import_to_words.toWords)(number, {
112
+ localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`
113
+ });
114
+ } catch {
115
+ return match[0];
116
+ }
109
117
  }
110
118
  return [
111
119
  [currencyRegex, currencyReplacer],
@@ -45,11 +45,15 @@ function createReplacers(locale) {
45
45
  const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${currencySymbols.group}`, "g"), "").replace(new RegExp(`\\${currencySymbols.decimal}`), ".");
46
46
  const number = parseFloat(normalizedNumeral);
47
47
  if (Number.isNaN(number)) return match[0];
48
- return toWords(number, {
49
- localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`,
50
- currency: true,
51
- doNotAddOnly: true
52
- });
48
+ try {
49
+ return toWords(number, {
50
+ localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`,
51
+ currency: true,
52
+ doNotAddOnly: true
53
+ });
54
+ } catch {
55
+ return match[0];
56
+ }
53
57
  }
54
58
  const numberFormat = new Intl.NumberFormat(locale);
55
59
  const numberParts = numberFormat.formatToParts(demoNumber);
@@ -81,9 +85,13 @@ function createReplacers(locale) {
81
85
  const normalizedNumeral = numeralMatch.replaceAll(new RegExp(`\\${numberSymbols.group}`, "g"), "").replace(new RegExp(`\\${numberSymbols.decimal}`), ".");
82
86
  const number = parseFloat(normalizedNumeral);
83
87
  if (Number.isNaN(number)) return match[0];
84
- return toWords(number, {
85
- localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`
86
- });
88
+ try {
89
+ return toWords(number, {
90
+ localeCode: `${maximizedLocale.language}-${maximizedLocale.region}`
91
+ });
92
+ } catch {
93
+ return match[0];
94
+ }
87
95
  }
88
96
  return [
89
97
  [currencyRegex, currencyReplacer],
@@ -0,0 +1,100 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __copyProps = (to, from, except, desc) => {
9
+ if (from && typeof from === "object" || typeof from === "function") {
10
+ for (let key of __getOwnPropNames(from))
11
+ if (!__hasOwnProp.call(to, key) && key !== except)
12
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
13
+ }
14
+ return to;
15
+ };
16
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
17
+ // If the importer is in node compatibility mode or this is not an ESM
18
+ // file that has been converted to a CommonJS file using a Babel-
19
+ // compatible transform (i.e. "__esModule" has not been set), then set
20
+ // "default" to the CommonJS "module.exports" for node compatibility.
21
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
22
+ mod
23
+ ));
24
+ var import_node_assert = __toESM(require("node:assert"), 1);
25
+ var import_node_test = require("node:test");
26
+ var import_editDistance = require("../editDistance.cjs");
27
+ var import_errorAlign = require("../errorAlign.cjs");
28
+ var import_utils = require("../utils.cjs");
29
+ void (0, import_node_test.describe)("errorAlign", () => {
30
+ void (0, import_node_test.test)("error alignment for an example including all substitution types", () => {
31
+ const ref = "This is a substitution test deleted.";
32
+ const hyp = "Inserted this is a contribution test.";
33
+ const alignments = (0, import_errorAlign.errorAlign)(ref, hyp);
34
+ import_node_assert.default.deepStrictEqual(
35
+ alignments.map((a) => a.opType),
36
+ ["INSERT", "MATCH", "MATCH", "MATCH", "SUBSTITUTE", "MATCH", "DELETE"]
37
+ );
38
+ });
39
+ void (0, import_node_test.test)("error alignment for full match", () => {
40
+ const ref = "This is a test.";
41
+ const hyp = "This is a test.";
42
+ const alignments = (0, import_errorAlign.errorAlign)(ref, hyp);
43
+ import_node_assert.default.deepStrictEqual(
44
+ alignments.map((a) => a.opType),
45
+ ["MATCH", "MATCH", "MATCH", "MATCH"]
46
+ );
47
+ });
48
+ void (0, import_node_test.test)("error alignment for partial substitutions and insertions with compound markers", () => {
49
+ var _a, _b;
50
+ const ref = "test";
51
+ const hyp = "testpartial";
52
+ const alignments = (0, import_errorAlign.errorAlign)(ref, hyp);
53
+ import_node_assert.default.strictEqual(alignments.length, 2);
54
+ import_node_assert.default.strictEqual((_a = alignments[0]) == null ? void 0 : _a.opType, "SUBSTITUTE");
55
+ import_node_assert.default.strictEqual(alignments[0].leftCompound, false);
56
+ import_node_assert.default.strictEqual(alignments[0].rightCompound, true);
57
+ import_node_assert.default.strictEqual((_b = alignments[1]) == null ? void 0 : _b.opType, "INSERT");
58
+ import_node_assert.default.strictEqual(alignments[1].leftCompound, true);
59
+ import_node_assert.default.strictEqual(alignments[1].rightCompound, false);
60
+ });
61
+ });
62
+ void (0, import_node_test.test)("character categorization", () => {
63
+ import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("<"), 0);
64
+ import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("b"), 1);
65
+ import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("a"), 2);
66
+ import_node_assert.default.strictEqual((0, import_utils.categorizeChar)("'"), 3);
67
+ });
68
+ void (0, import_node_test.test)("string representation of alignment objects", () => {
69
+ const deleteAlignment = (0, import_errorAlign.errorAlign)("deleted", "")[0];
70
+ import_node_assert.default.strictEqual(
71
+ deleteAlignment == null ? void 0 : deleteAlignment.toString(),
72
+ 'Alignment(DELETE: "deleted")'
73
+ );
74
+ const insertAlignment = (0, import_errorAlign.errorAlign)("", "inserted")[0];
75
+ import_node_assert.default.strictEqual(
76
+ insertAlignment == null ? void 0 : insertAlignment.toString(),
77
+ 'Alignment(INSERT: "inserted")'
78
+ );
79
+ const substituteAlignment = (0, import_errorAlign.errorAlign)(
80
+ "substitution",
81
+ "substitutiontesting"
82
+ )[0];
83
+ import_node_assert.default.strictEqual(substituteAlignment == null ? void 0 : substituteAlignment.leftCompound, false);
84
+ import_node_assert.default.strictEqual(substituteAlignment.rightCompound, true);
85
+ import_node_assert.default.strictEqual(
86
+ substituteAlignment.toString(),
87
+ 'Alignment(SUBSTITUTE: "substitution"- -> "substitution")'
88
+ );
89
+ const matchAlignment = (0, import_errorAlign.errorAlign)("test", "test")[0];
90
+ import_node_assert.default.strictEqual(
91
+ matchAlignment == null ? void 0 : matchAlignment.toString(),
92
+ 'Alignment(MATCH: "test" == "test")'
93
+ );
94
+ });
95
+ void (0, import_node_test.test)("Levenshtein distance matrix computation", () => {
96
+ const ref = "kitten";
97
+ const hyp = "sitting";
98
+ const distanceMatrix = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp);
99
+ import_node_assert.default.strictEqual(distanceMatrix.at(-1).at(-1), 3);
100
+ });
@@ -0,0 +1,2 @@
1
+
2
+ export { }
@@ -0,0 +1,2 @@
1
+
2
+ export { }
@@ -0,0 +1,77 @@
1
+ import assert from "node:assert";
2
+ import { describe, test } from "node:test";
3
+ import { computeLevenshteinDistanceMatrix } from "../editDistance.js";
4
+ import { errorAlign } from "../errorAlign.js";
5
+ import { categorizeChar } from "../utils.js";
6
+ void describe("errorAlign", () => {
7
+ void test("error alignment for an example including all substitution types", () => {
8
+ const ref = "This is a substitution test deleted.";
9
+ const hyp = "Inserted this is a contribution test.";
10
+ const alignments = errorAlign(ref, hyp);
11
+ assert.deepStrictEqual(
12
+ alignments.map((a) => a.opType),
13
+ ["INSERT", "MATCH", "MATCH", "MATCH", "SUBSTITUTE", "MATCH", "DELETE"]
14
+ );
15
+ });
16
+ void test("error alignment for full match", () => {
17
+ const ref = "This is a test.";
18
+ const hyp = "This is a test.";
19
+ const alignments = errorAlign(ref, hyp);
20
+ assert.deepStrictEqual(
21
+ alignments.map((a) => a.opType),
22
+ ["MATCH", "MATCH", "MATCH", "MATCH"]
23
+ );
24
+ });
25
+ void test("error alignment for partial substitutions and insertions with compound markers", () => {
26
+ var _a, _b;
27
+ const ref = "test";
28
+ const hyp = "testpartial";
29
+ const alignments = errorAlign(ref, hyp);
30
+ assert.strictEqual(alignments.length, 2);
31
+ assert.strictEqual((_a = alignments[0]) == null ? void 0 : _a.opType, "SUBSTITUTE");
32
+ assert.strictEqual(alignments[0].leftCompound, false);
33
+ assert.strictEqual(alignments[0].rightCompound, true);
34
+ assert.strictEqual((_b = alignments[1]) == null ? void 0 : _b.opType, "INSERT");
35
+ assert.strictEqual(alignments[1].leftCompound, true);
36
+ assert.strictEqual(alignments[1].rightCompound, false);
37
+ });
38
+ });
39
+ void test("character categorization", () => {
40
+ assert.strictEqual(categorizeChar("<"), 0);
41
+ assert.strictEqual(categorizeChar("b"), 1);
42
+ assert.strictEqual(categorizeChar("a"), 2);
43
+ assert.strictEqual(categorizeChar("'"), 3);
44
+ });
45
+ void test("string representation of alignment objects", () => {
46
+ const deleteAlignment = errorAlign("deleted", "")[0];
47
+ assert.strictEqual(
48
+ deleteAlignment == null ? void 0 : deleteAlignment.toString(),
49
+ 'Alignment(DELETE: "deleted")'
50
+ );
51
+ const insertAlignment = errorAlign("", "inserted")[0];
52
+ assert.strictEqual(
53
+ insertAlignment == null ? void 0 : insertAlignment.toString(),
54
+ 'Alignment(INSERT: "inserted")'
55
+ );
56
+ const substituteAlignment = errorAlign(
57
+ "substitution",
58
+ "substitutiontesting"
59
+ )[0];
60
+ assert.strictEqual(substituteAlignment == null ? void 0 : substituteAlignment.leftCompound, false);
61
+ assert.strictEqual(substituteAlignment.rightCompound, true);
62
+ assert.strictEqual(
63
+ substituteAlignment.toString(),
64
+ 'Alignment(SUBSTITUTE: "substitution"- -> "substitution")'
65
+ );
66
+ const matchAlignment = errorAlign("test", "test")[0];
67
+ assert.strictEqual(
68
+ matchAlignment == null ? void 0 : matchAlignment.toString(),
69
+ 'Alignment(MATCH: "test" == "test")'
70
+ );
71
+ });
72
+ void test("Levenshtein distance matrix computation", () => {
73
+ const ref = "kitten";
74
+ const hyp = "sitting";
75
+ const distanceMatrix = computeLevenshteinDistanceMatrix(ref, hyp);
76
+ assert.strictEqual(distanceMatrix.at(-1).at(-1), 3);
77
+ });
@@ -0,0 +1,118 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __copyProps = (to, from, except, desc) => {
9
+ if (from && typeof from === "object" || typeof from === "function") {
10
+ for (let key of __getOwnPropNames(from))
11
+ if (!__hasOwnProp.call(to, key) && key !== except)
12
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
13
+ }
14
+ return to;
15
+ };
16
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
17
+ // If the importer is in node compatibility mode or this is not an ESM
18
+ // file that has been converted to a CommonJS file using a Babel-
19
+ // compatible transform (i.e. "__esModule" has not been set), then set
20
+ // "default" to the CommonJS "module.exports" for node compatibility.
21
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
22
+ mod
23
+ ));
24
+ var import_node_assert = __toESM(require("node:assert"), 1);
25
+ var import_node_test = require("node:test");
26
+ var import_beamSearch = require("../beamSearch.cjs");
27
+ var import_editDistance = require("../editDistance.cjs");
28
+ var import_graphMetadata = require("../graphMetadata.cjs");
29
+ var import_native = require("../native.cjs");
30
+ var import_pathToAlignment = require("../pathToAlignment.cjs");
31
+ var import_utils = require("../utils.cjs");
32
+ void (0, import_node_test.describe)("native C++ vs TypeScript implementations", () => {
33
+ void (0, import_node_test.describe)("Levenshtein distance matrix", () => {
34
+ void (0, import_node_test.test)("string input", () => {
35
+ const ref = "kitten";
36
+ const hyp = "sitting";
37
+ const tsResult = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp);
38
+ const nativeResult = (0, import_native.computeLevenshteinDistanceMatrix)(ref, hyp);
39
+ import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
40
+ });
41
+ void (0, import_node_test.test)("string array input", () => {
42
+ const ref = ["hello", "world"];
43
+ const hyp = ["hello", "there"];
44
+ const tsResult = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp);
45
+ const nativeResult = (0, import_native.computeLevenshteinDistanceMatrix)(ref, hyp);
46
+ import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
47
+ });
48
+ void (0, import_node_test.test)("with backtrace", () => {
49
+ const ref = "kitten";
50
+ const hyp = "sitting";
51
+ const tsResult = (0, import_editDistance.computeLevenshteinDistanceMatrix)(ref, hyp, true);
52
+ const nativeResult = (0, import_native.computeLevenshteinDistanceMatrix)(ref, hyp, true);
53
+ import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
54
+ });
55
+ });
56
+ void (0, import_node_test.describe)("error align distance matrix", () => {
57
+ void (0, import_node_test.test)("string input", () => {
58
+ const ref = "test";
59
+ const hyp = "best";
60
+ const tsResult = (0, import_editDistance.computeErrorAlignDistanceMatrix)(ref, hyp);
61
+ const nativeResult = (0, import_native.computeErrorAlignDistanceMatrix)(ref, hyp);
62
+ import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
63
+ });
64
+ void (0, import_node_test.test)("with backtrace", () => {
65
+ const ref = "test";
66
+ const hyp = "best";
67
+ const tsResult = (0, import_editDistance.computeErrorAlignDistanceMatrix)(ref, hyp, true);
68
+ const nativeResult = (0, import_native.computeErrorAlignDistanceMatrix)(ref, hyp, true);
69
+ import_node_assert.default.deepStrictEqual(nativeResult, tsResult);
70
+ });
71
+ });
72
+ void (0, import_node_test.describe)("beam search", () => {
73
+ function buildSubgraphMetadata(ref, hyp) {
74
+ const tokenizer = import_utils.basicTokenizer;
75
+ const normalizer = import_utils.basicNormalizer;
76
+ const unpackedTokenizer = (0, import_utils.unpackRegexMatch)(tokenizer);
77
+ const ensuredNormalizer = (0, import_utils.ensureLengthPreservation)(normalizer);
78
+ const refTokenMatches = unpackedTokenizer(ref);
79
+ const hypTokenMatches = unpackedTokenizer(hyp);
80
+ const refNorm = refTokenMatches.map(([r]) => ensuredNormalizer(r));
81
+ const hypNorm = hypTokenMatches.map(([h]) => ensuredNormalizer(h));
82
+ return new import_graphMetadata.SubgraphMetadata(
83
+ ref,
84
+ hyp,
85
+ refTokenMatches,
86
+ hypTokenMatches,
87
+ refNorm,
88
+ hypNorm
89
+ );
90
+ }
91
+ void (0, import_node_test.test)("simple substitution", () => {
92
+ const src = buildSubgraphMetadata("hello", "jello");
93
+ const tsPath = (0, import_beamSearch.errorAlignBeamSearch)(src);
94
+ const nativePath = (0, import_native.errorAlignBeamSearch)(src);
95
+ const tsAlignments = (0, import_pathToAlignment.getAlignments)(tsPath);
96
+ const nativeAlignments = (0, import_pathToAlignment.getAlignments)(nativePath);
97
+ import_node_assert.default.deepStrictEqual(nativeAlignments, tsAlignments);
98
+ });
99
+ void (0, import_node_test.test)("multi-word alignment with all op types", () => {
100
+ const ref = "This is a substitution test deleted.";
101
+ const hyp = "Inserted this is a contribution test.";
102
+ const src = buildSubgraphMetadata(ref, hyp);
103
+ const tsPath = (0, import_beamSearch.errorAlignBeamSearch)(src);
104
+ const nativePath = (0, import_native.errorAlignBeamSearch)(src);
105
+ const tsAlignments = (0, import_pathToAlignment.getAlignments)(tsPath);
106
+ const nativeAlignments = (0, import_pathToAlignment.getAlignments)(nativePath);
107
+ import_node_assert.default.deepStrictEqual(nativeAlignments, tsAlignments);
108
+ });
109
+ void (0, import_node_test.test)("identical strings", () => {
110
+ const src = buildSubgraphMetadata("test words", "test words");
111
+ const tsPath = (0, import_beamSearch.errorAlignBeamSearch)(src);
112
+ const nativePath = (0, import_native.errorAlignBeamSearch)(src);
113
+ const tsAlignments = (0, import_pathToAlignment.getAlignments)(tsPath);
114
+ const nativeAlignments = (0, import_pathToAlignment.getAlignments)(nativePath);
115
+ import_node_assert.default.deepStrictEqual(nativeAlignments, tsAlignments);
116
+ });
117
+ });
118
+ });
@@ -0,0 +1,2 @@
1
+
2
+ export { }
@@ -0,0 +1,2 @@
1
+
2
+ export { }