@storyteller-platform/align 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/LICENSE.txt +21 -0
  2. package/README.md +3 -0
  3. package/dist/align/align.cjs +525 -0
  4. package/dist/align/align.d.cts +58 -0
  5. package/dist/align/align.d.ts +58 -0
  6. package/dist/align/align.js +458 -0
  7. package/dist/align/fuzzy.cjs +164 -0
  8. package/dist/align/fuzzy.d.cts +6 -0
  9. package/dist/align/fuzzy.d.ts +6 -0
  10. package/dist/align/fuzzy.js +141 -0
  11. package/dist/align/getSentenceRanges.cjs +304 -0
  12. package/dist/align/getSentenceRanges.d.cts +31 -0
  13. package/dist/align/getSentenceRanges.d.ts +31 -0
  14. package/dist/align/getSentenceRanges.js +277 -0
  15. package/dist/align/parse.cjs +63 -0
  16. package/dist/align/parse.d.cts +30 -0
  17. package/dist/align/parse.d.ts +30 -0
  18. package/dist/align/parse.js +51 -0
  19. package/dist/chunk-BIEQXUOY.js +50 -0
  20. package/dist/cli/bin.cjs +368 -0
  21. package/dist/cli/bin.d.cts +1 -0
  22. package/dist/cli/bin.d.ts +1 -0
  23. package/dist/cli/bin.js +319 -0
  24. package/dist/common/ffmpeg.cjs +232 -0
  25. package/dist/common/ffmpeg.d.cts +33 -0
  26. package/dist/common/ffmpeg.d.ts +33 -0
  27. package/dist/common/ffmpeg.js +196 -0
  28. package/dist/common/logging.cjs +45 -0
  29. package/dist/common/logging.d.cts +5 -0
  30. package/dist/common/logging.d.ts +5 -0
  31. package/dist/common/logging.js +12 -0
  32. package/dist/common/parse.cjs +73 -0
  33. package/dist/common/parse.d.cts +28 -0
  34. package/dist/common/parse.d.ts +28 -0
  35. package/dist/common/parse.js +56 -0
  36. package/dist/common/shell.cjs +30 -0
  37. package/dist/common/shell.d.cts +3 -0
  38. package/dist/common/shell.d.ts +3 -0
  39. package/dist/common/shell.js +7 -0
  40. package/dist/index.cjs +37 -0
  41. package/dist/index.d.cts +12 -0
  42. package/dist/index.d.ts +12 -0
  43. package/dist/index.js +11 -0
  44. package/dist/markup/__tests__/markup.test.cjs +464 -0
  45. package/dist/markup/__tests__/markup.test.d.cts +2 -0
  46. package/dist/markup/__tests__/markup.test.d.ts +2 -0
  47. package/dist/markup/__tests__/markup.test.js +441 -0
  48. package/dist/markup/markup.cjs +316 -0
  49. package/dist/markup/markup.d.cts +24 -0
  50. package/dist/markup/markup.d.ts +24 -0
  51. package/dist/markup/markup.js +254 -0
  52. package/dist/markup/parse.cjs +55 -0
  53. package/dist/markup/parse.d.cts +17 -0
  54. package/dist/markup/parse.d.ts +17 -0
  55. package/dist/markup/parse.js +43 -0
  56. package/dist/markup/segmentation.cjs +87 -0
  57. package/dist/markup/segmentation.d.cts +8 -0
  58. package/dist/markup/segmentation.d.ts +8 -0
  59. package/dist/markup/segmentation.js +67 -0
  60. package/dist/markup/semantics.cjs +79 -0
  61. package/dist/markup/semantics.d.cts +6 -0
  62. package/dist/markup/semantics.d.ts +6 -0
  63. package/dist/markup/semantics.js +53 -0
  64. package/dist/process/AudioEncoding.cjs +16 -0
  65. package/dist/process/AudioEncoding.d.cts +8 -0
  66. package/dist/process/AudioEncoding.d.ts +8 -0
  67. package/dist/process/AudioEncoding.js +0 -0
  68. package/dist/process/__tests__/processAudiobook.test.cjs +232 -0
  69. package/dist/process/__tests__/processAudiobook.test.d.cts +2 -0
  70. package/dist/process/__tests__/processAudiobook.test.d.ts +2 -0
  71. package/dist/process/__tests__/processAudiobook.test.js +209 -0
  72. package/dist/process/mime.cjs +43 -0
  73. package/dist/process/mime.d.cts +3 -0
  74. package/dist/process/mime.d.ts +3 -0
  75. package/dist/process/mime.js +24 -0
  76. package/dist/process/parse.cjs +84 -0
  77. package/dist/process/parse.d.cts +28 -0
  78. package/dist/process/parse.d.ts +28 -0
  79. package/dist/process/parse.js +73 -0
  80. package/dist/process/processAudiobook.cjs +220 -0
  81. package/dist/process/processAudiobook.d.cts +24 -0
  82. package/dist/process/processAudiobook.d.ts +24 -0
  83. package/dist/process/processAudiobook.js +166 -0
  84. package/dist/process/ranges.cjs +203 -0
  85. package/dist/process/ranges.d.cts +15 -0
  86. package/dist/process/ranges.d.ts +15 -0
  87. package/dist/process/ranges.js +137 -0
  88. package/dist/transcribe/parse.cjs +149 -0
  89. package/dist/transcribe/parse.d.cts +114 -0
  90. package/dist/transcribe/parse.d.ts +114 -0
  91. package/dist/transcribe/parse.js +143 -0
  92. package/dist/transcribe/transcribe.cjs +400 -0
  93. package/dist/transcribe/transcribe.d.cts +41 -0
  94. package/dist/transcribe/transcribe.d.ts +41 -0
  95. package/dist/transcribe/transcribe.js +330 -0
  96. package/package.json +96 -0
@@ -0,0 +1,458 @@
1
+ import {
2
+ __callDispose,
3
+ __using
4
+ } from "../chunk-BIEQXUOY.js";
5
+ import { copyFile, mkdir, readFile, readdir, writeFile } from "node:fs/promises";
6
+ import { dirname as autoDirname, join as autoJoin } from "node:path";
7
+ import { basename, dirname, parse, relative } from "node:path/posix";
8
+ import memoize from "memoize";
9
+ import { isAudioFile, lookupAudioMime } from "@storyteller-platform/audiobook";
10
+ import {
11
+ Epub
12
+ } from "@storyteller-platform/epub";
13
+ import {
14
+ createAggregator,
15
+ createTiming
16
+ } from "@storyteller-platform/ghost-story";
17
+ import { getTrackDuration } from "../common/ffmpeg.js";
18
+ import { getXhtmlSegmentation } from "../markup/segmentation.js";
19
+ import { findNearestMatch } from "./fuzzy.js";
20
+ import {
21
+ expandEmptySentenceRanges,
22
+ getChapterDuration,
23
+ getSentenceRanges,
24
+ interpolateSentenceRanges
25
+ } from "./getSentenceRanges.js";
26
+ const OFFSET_SEARCH_WINDOW_SIZE = 5e3;
27
+ async function align(input, output, transcriptionsDir, audiobookDir, options) {
28
+ var _stack = [];
29
+ try {
30
+ await copyFile(input, output);
31
+ const audiobookFiles = await readdir(audiobookDir).then(
32
+ (filenames) => filenames.filter((f) => isAudioFile(f)).map((f) => autoJoin(audiobookDir, f))
33
+ );
34
+ const epub = __using(_stack, await Epub.from(output));
35
+ const transcriptions = await readdir(transcriptionsDir).then(
36
+ (filenames) => filenames.filter((f) => f.endsWith(".json")).map((f) => autoJoin(transcriptionsDir, f))
37
+ ).then(
38
+ (filepaths) => Promise.all(
39
+ filepaths.map(async (p) => readFile(p, { encoding: "utf-8" }))
40
+ )
41
+ ).then(
42
+ (contents) => contents.map(
43
+ (c) => JSON.parse(c)
44
+ )
45
+ );
46
+ const aligner = new Aligner(
47
+ epub,
48
+ audiobookFiles,
49
+ transcriptions,
50
+ options.granularity,
51
+ options.primaryLocale,
52
+ options.logger
53
+ );
54
+ const timing = await aligner.alignBook(options.onProgress);
55
+ if (options.reportsPath) {
56
+ await mkdir(autoDirname(options.reportsPath), { recursive: true });
57
+ await writeFile(
58
+ options.reportsPath,
59
+ JSON.stringify(aligner.report, null, 2),
60
+ {
61
+ encoding: "utf-8"
62
+ }
63
+ );
64
+ }
65
+ return timing;
66
+ } catch (_) {
67
+ var _error = _, _hasError = true;
68
+ } finally {
69
+ __callDispose(_stack, _error, _hasError);
70
+ }
71
+ }
72
+ class Aligner {
73
+ constructor(epub, audiofiles, transcriptions, granularity, languageOverride, logger) {
74
+ this.epub = epub;
75
+ this.languageOverride = languageOverride;
76
+ this.logger = logger;
77
+ this.transcription = concatTranscriptions(transcriptions, audiofiles);
78
+ this.getChapterSentences = memoize(this.getChapterSentences.bind(this));
79
+ this.granularity = granularity ?? "sentence";
80
+ }
81
+ transcription;
82
+ totalDuration = 0;
83
+ alignedChapters = [];
84
+ timing = createAggregator();
85
+ granularity;
86
+ report = {
87
+ chapters: []
88
+ };
89
+ findBestOffset(epubSentences, transcriptionText, lastMatchOffset) {
90
+ let i = 0;
91
+ while (i < transcriptionText.length) {
92
+ let startSentence = 0;
93
+ const proposedStartIndex = (lastMatchOffset + i) % transcriptionText.length;
94
+ const proposedEndIndex = (proposedStartIndex + OFFSET_SEARCH_WINDOW_SIZE) % transcriptionText.length;
95
+ const wrapping = proposedEndIndex < proposedStartIndex;
96
+ let endIndex = wrapping ? transcriptionText.length : proposedEndIndex;
97
+ let startIndex = proposedStartIndex;
98
+ let startSeen = null;
99
+ let endSeen = null;
100
+ for (const aligned of this.alignedChapters) {
101
+ if (startSeen !== null && endSeen === aligned.startOffset) {
102
+ endSeen = aligned.endOffset;
103
+ } else {
104
+ startSeen = aligned.startOffset;
105
+ endSeen = aligned.endOffset;
106
+ }
107
+ if (startIndex >= startSeen && startIndex < endSeen) {
108
+ startIndex = endSeen;
109
+ }
110
+ if (endIndex >= startSeen && endIndex <= endSeen) {
111
+ endIndex = startSeen;
112
+ }
113
+ }
114
+ if (startIndex < endIndex) {
115
+ const transcriptionTextSlice = transcriptionText.slice(
116
+ startIndex,
117
+ endIndex
118
+ );
119
+ while (startSentence < epubSentences.length) {
120
+ const queryString = epubSentences.slice(startSentence, startSentence + 6).join(" ");
121
+ const firstMatch = findNearestMatch(
122
+ queryString.toLowerCase(),
123
+ transcriptionTextSlice.toLowerCase(),
124
+ Math.max(Math.floor(0.1 * queryString.length), 1)
125
+ );
126
+ if (firstMatch) {
127
+ return {
128
+ startSentence,
129
+ transcriptionOffset: (firstMatch.index + startIndex) % transcriptionText.length
130
+ };
131
+ }
132
+ startSentence += 3;
133
+ }
134
+ }
135
+ if (wrapping) {
136
+ i += transcriptionText.length - proposedStartIndex;
137
+ } else {
138
+ i += Math.floor(OFFSET_SEARCH_WINDOW_SIZE / 2);
139
+ }
140
+ }
141
+ return { startSentence: 0, transcriptionOffset: null };
142
+ }
143
+ async getChapterSentences(chapterId) {
144
+ const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
145
+ const segmentation = await getXhtmlSegmentation(
146
+ Epub.getXhtmlBody(chapterXml),
147
+ {
148
+ primaryLocale: this.languageOverride ?? await this.epub.getLanguage()
149
+ }
150
+ );
151
+ return segmentation.sentences.map((s) => s.text);
152
+ }
153
+ async writeAlignedChapter(alignedChapter) {
154
+ const { chapter, sentenceRanges, xml } = alignedChapter;
155
+ const audiofiles = Array.from(
156
+ new Set(sentenceRanges.map(({ audiofile }) => audiofile))
157
+ );
158
+ await Promise.all(
159
+ audiofiles.map(async (audiofile) => {
160
+ const { name, base } = parse(audiofile);
161
+ const id = `audio_${name}`;
162
+ const manifest = await this.epub.getManifest();
163
+ if (id in manifest) return;
164
+ const epubAudioFilename = `Audio/${base}`;
165
+ const duration = await getTrackDuration(audiofile);
166
+ this.totalDuration += duration;
167
+ const audio = await readFile(audiofile);
168
+ const mediaType = lookupAudioMime(base) ?? void 0;
169
+ await this.epub.addManifestItem(
170
+ {
171
+ id,
172
+ href: epubAudioFilename,
173
+ mediaType
174
+ },
175
+ audio
176
+ );
177
+ })
178
+ );
179
+ const { name: chapterStem } = parse(chapter.href);
180
+ const mediaOverlayId = `${chapter.id}_overlay`;
181
+ await this.epub.addManifestItem(
182
+ {
183
+ id: mediaOverlayId,
184
+ href: `MediaOverlays/${chapterStem}.smil`,
185
+ mediaType: "application/smil+xml"
186
+ },
187
+ createMediaOverlay(chapter, sentenceRanges),
188
+ "xml"
189
+ );
190
+ await this.epub.updateManifestItem(chapter.id, {
191
+ ...chapter,
192
+ mediaOverlay: mediaOverlayId
193
+ });
194
+ await this.epub.writeXhtmlItemContents(chapter.id, xml);
195
+ const chapterDuration = getChapterDuration(sentenceRanges);
196
+ await this.epub.addMetadata({
197
+ type: "meta",
198
+ properties: {
199
+ property: "media:duration",
200
+ refines: `#${mediaOverlayId}`
201
+ },
202
+ value: Epub.formatSmilDuration(chapterDuration)
203
+ });
204
+ }
205
+ addChapterReport(chapter, chapterSentences, sentenceRanges, startSentence, transcriptionOffset) {
206
+ this.report.chapters.push({
207
+ href: chapter.href,
208
+ transcriptionOffset,
209
+ transcriptionContext: {
210
+ before: this.transcription.transcript.slice(
211
+ Math.max(0, transcriptionOffset - 30),
212
+ transcriptionOffset
213
+ ),
214
+ after: this.transcription.transcript.slice(
215
+ transcriptionOffset,
216
+ Math.min(
217
+ transcriptionOffset + 30,
218
+ this.transcription.transcript.length - 1
219
+ )
220
+ )
221
+ },
222
+ firstMatchedSentenceId: startSentence,
223
+ firstMatchedSentenceContext: {
224
+ prevSentence: chapterSentences[startSentence - 1] ?? null,
225
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
226
+ matchedSentence: chapterSentences[startSentence],
227
+ nextSentence: chapterSentences[startSentence + 1] ?? null
228
+ },
229
+ audioFiles: sentenceRanges.reduce((acc, range) => {
230
+ const existing = acc.find(
231
+ (context) => context.filepath === range.audiofile
232
+ );
233
+ if (existing) {
234
+ existing.end = range.end;
235
+ return acc;
236
+ }
237
+ acc.push({
238
+ filepath: range.audiofile,
239
+ start: range.start,
240
+ end: range.end
241
+ });
242
+ return acc;
243
+ }, [])
244
+ });
245
+ }
246
+ async alignChapter(startSentence, chapterId, transcriptionOffset, lastSentenceRange) {
247
+ const timing = createTiming();
248
+ timing.start("read contents");
249
+ const manifest = await this.epub.getManifest();
250
+ const chapter = manifest[chapterId];
251
+ if (!chapter)
252
+ throw new Error(
253
+ `Failed to align chapter: could not find chapter with id ${chapterId} in manifest`
254
+ );
255
+ const chapterXml = await this.epub.readXhtmlItemContents(chapterId);
256
+ timing.end("read contents");
257
+ timing.start("split to sentences");
258
+ const chapterSentences = await this.getChapterSentences(chapterId);
259
+ timing.end("split to sentences");
260
+ timing.start("align sentences");
261
+ const { sentenceRanges, transcriptionOffset: endTranscriptionOffset } = await getSentenceRanges(
262
+ startSentence,
263
+ this.transcription,
264
+ chapterSentences,
265
+ transcriptionOffset,
266
+ lastSentenceRange
267
+ );
268
+ timing.end("align sentences");
269
+ timing.start("expand ranges");
270
+ const interpolated = await interpolateSentenceRanges(
271
+ sentenceRanges,
272
+ lastSentenceRange
273
+ );
274
+ const expanded = expandEmptySentenceRanges(interpolated);
275
+ timing.end("expand ranges");
276
+ const storytellerStylesheetUrl = relative(
277
+ dirname(chapter.href),
278
+ "Styles/storyteller-readaloud.css"
279
+ );
280
+ Epub.addLinkToXhtmlHead(chapterXml, {
281
+ rel: "stylesheet",
282
+ href: storytellerStylesheetUrl,
283
+ type: "text/css"
284
+ });
285
+ this.alignedChapters.push({
286
+ chapter,
287
+ xml: chapterXml,
288
+ sentenceRanges: expanded,
289
+ startOffset: transcriptionOffset,
290
+ endOffset: endTranscriptionOffset
291
+ });
292
+ this.addChapterReport(
293
+ chapter,
294
+ chapterSentences,
295
+ expanded,
296
+ startSentence,
297
+ transcriptionOffset
298
+ );
299
+ return {
300
+ lastSentenceRange: expanded[expanded.length - 1] ?? null,
301
+ endTranscriptionOffset,
302
+ timing
303
+ };
304
+ }
305
+ async alignBook(onProgress) {
306
+ var _a, _b, _c, _d, _e, _f;
307
+ this.timing.setMetadata(
308
+ "language",
309
+ ((_a = this.languageOverride ?? await this.epub.getLanguage()) == null ? void 0 : _a.language) ?? "unknown"
310
+ );
311
+ this.timing.setMetadata("granularity", this.granularity);
312
+ const spine = await this.epub.getSpineItems();
313
+ const transcriptionText = this.transcription.transcript;
314
+ let lastTranscriptionOffset = 0;
315
+ let lastSentenceRange = null;
316
+ for (let index = 0; index < spine.length; index++) {
317
+ onProgress == null ? void 0 : onProgress(index / spine.length);
318
+ const spineItem = spine[index];
319
+ (_b = this.logger) == null ? void 0 : _b.info(
320
+ `Aligning epub item #${index} : ${basename(spineItem.href)}`
321
+ );
322
+ const chapterId = spineItem.id;
323
+ const chapterSentences = await this.getChapterSentences(chapterId);
324
+ if (chapterSentences.length === 0) {
325
+ (_c = this.logger) == null ? void 0 : _c.info(`Chapter #${index} has no text; skipping`);
326
+ continue;
327
+ }
328
+ if (chapterSentences.length < 2 && // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
329
+ chapterSentences[0].split(" ").length < 4) {
330
+ (_d = this.logger) == null ? void 0 : _d.info(
331
+ `Chapter #${index} is fewer than four words; skipping`
332
+ );
333
+ continue;
334
+ }
335
+ const { startSentence, transcriptionOffset } = this.findBestOffset(
336
+ chapterSentences,
337
+ transcriptionText,
338
+ lastTranscriptionOffset
339
+ );
340
+ if (transcriptionOffset === null) {
341
+ (_e = this.logger) == null ? void 0 : _e.info(
342
+ `Couldn't find matching transcription for chapter #${index}`
343
+ );
344
+ continue;
345
+ }
346
+ (_f = this.logger) == null ? void 0 : _f.info(
347
+ `Chapter #${index} best matches transcription at offset ${transcriptionOffset}, starting at sentence ${startSentence}`
348
+ );
349
+ const result = await this.alignChapter(
350
+ startSentence,
351
+ chapterId,
352
+ transcriptionOffset,
353
+ lastSentenceRange
354
+ );
355
+ lastSentenceRange = result.lastSentenceRange;
356
+ lastTranscriptionOffset = result.endTranscriptionOffset;
357
+ this.timing.add(result.timing.summary());
358
+ }
359
+ if (lastSentenceRange) {
360
+ lastSentenceRange.end = await getTrackDuration(
361
+ lastSentenceRange.audiofile
362
+ );
363
+ }
364
+ for (const alignedChapter of this.alignedChapters) {
365
+ await this.writeAlignedChapter(alignedChapter);
366
+ }
367
+ await this.epub.addMetadata({
368
+ type: "meta",
369
+ properties: { property: "media:duration" },
370
+ value: Epub.formatSmilDuration(this.totalDuration)
371
+ });
372
+ await this.epub.addMetadata({
373
+ type: "meta",
374
+ properties: { property: "media:active-class" },
375
+ value: "-epub-media-overlay-active"
376
+ });
377
+ await this.epub.addManifestItem(
378
+ {
379
+ id: "storyteller_readaloud_styles",
380
+ href: "Styles/storyteller-readaloud.css",
381
+ mediaType: "text/css"
382
+ },
383
+ `
384
+ .-epub-media-overlay-active {
385
+ background-color: #ffb;
386
+ }
387
+ `,
388
+ "utf-8"
389
+ );
390
+ return this.timing;
391
+ }
392
+ }
393
+ function createMediaOverlay(chapter, sentenceRanges) {
394
+ return [
395
+ Epub.createXmlElement(
396
+ "smil",
397
+ {
398
+ xmlns: "https://www.w3.org/ns/SMIL",
399
+ "xmlns:epub": "http://www.idpf.org/2007/ops",
400
+ version: "3.0"
401
+ },
402
+ [
403
+ Epub.createXmlElement("body", {}, [
404
+ Epub.createXmlElement(
405
+ "seq",
406
+ {
407
+ id: `${chapter.id}_overlay`,
408
+ "epub:textref": `../${chapter.href}`,
409
+ "epub:type": "chapter"
410
+ },
411
+ sentenceRanges.map(
412
+ (sentenceRange) => Epub.createXmlElement(
413
+ "par",
414
+ {
415
+ id: `${chapter.id}-s${sentenceRange.id}`
416
+ },
417
+ [
418
+ Epub.createXmlElement("text", {
419
+ src: `../${chapter.href}#${chapter.id}-s${sentenceRange.id}`
420
+ }),
421
+ Epub.createXmlElement("audio", {
422
+ src: `../Audio/${basename(sentenceRange.audiofile)}`,
423
+ clipBegin: `${sentenceRange.start.toFixed(3)}s`,
424
+ clipEnd: `${sentenceRange.end.toFixed(3)}s`
425
+ })
426
+ ]
427
+ )
428
+ )
429
+ )
430
+ ])
431
+ ]
432
+ )
433
+ ];
434
+ }
435
+ function concatTranscriptions(transcriptions, audiofiles) {
436
+ return transcriptions.reduce(
437
+ (acc, transcription, index) => ({
438
+ ...acc,
439
+ transcript: acc.transcript + " " + transcription.transcript,
440
+ timeline: [
441
+ ...acc.timeline,
442
+ ...transcription.timeline.map((entry) => ({
443
+ ...entry,
444
+ startOffsetUtf16: (entry.startOffsetUtf16 ?? 0) + acc.transcript.length + 1,
445
+ endOffsetUtf16: (entry.endOffsetUtf16 ?? 0) + acc.transcript.length + 1,
446
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
447
+ audiofile: audiofiles[index]
448
+ }))
449
+ ]
450
+ }),
451
+ { transcript: "", timeline: [] }
452
+ );
453
+ }
454
+ export {
455
+ Aligner,
456
+ align,
457
+ concatTranscriptions
458
+ };
@@ -0,0 +1,164 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var fuzzy_exports = {};
20
+ __export(fuzzy_exports, {
21
+ findNearestMatch: () => findNearestMatch
22
+ });
23
+ module.exports = __toCommonJS(fuzzy_exports);
24
+ function findNearestMatch(needle, haystack, maxDist) {
25
+ let nearest = null;
26
+ for (const match of levenshteinNgram(needle, haystack, maxDist)) {
27
+ if (!nearest || match.dist < nearest.dist) {
28
+ nearest = match;
29
+ }
30
+ }
31
+ return nearest && {
32
+ match: haystack.slice(nearest.start, nearest.end),
33
+ index: nearest.start
34
+ };
35
+ }
36
+ function reverse(str, from = str.length, to = 0) {
37
+ let reversed = "";
38
+ for (let i = from - 1; i >= to; i--) {
39
+ reversed = reversed + str[i];
40
+ }
41
+ return reversed;
42
+ }
43
+ function* searchExact(subsequence, sequence, startIndex = 0, endIndex = sequence.length) {
44
+ let index = sequence.indexOf(subsequence, startIndex);
45
+ while (index !== -1 && index + subsequence.length < endIndex) {
46
+ yield index;
47
+ index = sequence.indexOf(subsequence, index + 1);
48
+ }
49
+ }
50
+ function expand(subsequence, sequence, maxDist) {
51
+ const subsequenceLength = subsequence.length;
52
+ if (subsequenceLength === 0) {
53
+ return { index: 0, score: 0 };
54
+ }
55
+ const scores = Array.from({ length: subsequenceLength + 1 }).map((_, i) => i);
56
+ let minScore = subsequenceLength;
57
+ let minScoreIndex = -1;
58
+ let maxGoodScore = maxDist;
59
+ let newNeedleIndexRangeStart = 0;
60
+ let newNeedleIndexRangeEnd = subsequenceLength - 1;
61
+ for (let sequenceIndex = 0; sequenceIndex < sequence.length; sequenceIndex++) {
62
+ const char = sequence[sequenceIndex];
63
+ const needleIndexRangeStart = newNeedleIndexRangeStart;
64
+ const needleIndexRangeEnd = Math.min(
65
+ subsequenceLength,
66
+ newNeedleIndexRangeEnd + 1
67
+ );
68
+ let a = sequenceIndex;
69
+ let c = a + 1;
70
+ if (c <= maxGoodScore) {
71
+ newNeedleIndexRangeStart = 0;
72
+ newNeedleIndexRangeEnd = 0;
73
+ } else {
74
+ newNeedleIndexRangeStart = null;
75
+ newNeedleIndexRangeEnd = -1;
76
+ }
77
+ for (let subsequenceIndex = needleIndexRangeStart; subsequenceIndex < needleIndexRangeEnd; subsequenceIndex++) {
78
+ const b = scores[subsequenceIndex];
79
+ c = scores[subsequenceIndex] = Math.min(
80
+ a + (char === subsequence[subsequenceIndex] ? 0 : 1),
81
+ b + 1,
82
+ c + 1
83
+ );
84
+ a = b;
85
+ if (c <= maxGoodScore) {
86
+ if (newNeedleIndexRangeStart === null) {
87
+ newNeedleIndexRangeStart = subsequenceIndex;
88
+ }
89
+ newNeedleIndexRangeEnd = Math.max(
90
+ newNeedleIndexRangeEnd,
91
+ subsequenceIndex + 1 + (maxGoodScore - c)
92
+ );
93
+ }
94
+ }
95
+ if (newNeedleIndexRangeStart === null) {
96
+ break;
97
+ }
98
+ if (needleIndexRangeEnd === subsequenceLength && c <= minScore) {
99
+ minScore = c;
100
+ minScoreIndex = sequenceIndex;
101
+ if (minScore < maxGoodScore) {
102
+ maxGoodScore = minScore;
103
+ }
104
+ }
105
+ }
106
+ return minScore <= maxDist ? { score: minScore, index: minScoreIndex + 1 } : null;
107
+ }
108
+ function* levenshteinNgram(subsequence, sequence, maxDist) {
109
+ const subsequenceLength = subsequence.length;
110
+ const sequenceLength = sequence.length;
111
+ const ngramLength = Math.round(subsequenceLength / (maxDist + 1));
112
+ if (ngramLength === 0) {
113
+ throw new Error("The subsequence length must be greater than maxDist");
114
+ }
115
+ for (let ngramStart = 0; ngramStart < subsequenceLength - ngramLength + 1; ngramStart += ngramLength) {
116
+ const ngramEnd = ngramStart + ngramLength;
117
+ const subsequenceBeforeReversed = reverse(subsequence, ngramStart);
118
+ const subsequenceAfter = subsequence.slice(ngramEnd);
119
+ const startIndex = Math.max(0, ngramStart - maxDist);
120
+ const endIndex = Math.min(
121
+ sequenceLength,
122
+ sequenceLength - subsequenceLength + ngramEnd + maxDist
123
+ );
124
+ for (const index of searchExact(
125
+ subsequence.slice(ngramStart, ngramEnd),
126
+ sequence,
127
+ startIndex,
128
+ endIndex
129
+ )) {
130
+ const rightMatch = expand(
131
+ subsequenceAfter,
132
+ sequence.slice(
133
+ index + ngramLength,
134
+ index - ngramStart + subsequenceLength + maxDist
135
+ ),
136
+ maxDist
137
+ );
138
+ if (rightMatch === null) continue;
139
+ const { score: distRight, index: rightExpandSize } = rightMatch;
140
+ const leftMatch = expand(
141
+ subsequenceBeforeReversed,
142
+ reverse(
143
+ sequence,
144
+ index,
145
+ Math.max(0, index - ngramStart - (maxDist - distRight))
146
+ ),
147
+ maxDist - distRight
148
+ );
149
+ if (leftMatch === null) continue;
150
+ const { score: distLeft, index: leftExpandSize } = leftMatch;
151
+ const start = index - leftExpandSize;
152
+ yield {
153
+ start,
154
+ end: index + ngramLength + rightExpandSize,
155
+ // dist: distLeft + distRight + (start / sequenceLength) * maxDist,
156
+ dist: distLeft + distRight
157
+ };
158
+ }
159
+ }
160
+ }
161
+ // Annotate the CommonJS export names for ESM import in node:
162
+ 0 && (module.exports = {
163
+ findNearestMatch
164
+ });
@@ -0,0 +1,6 @@
1
+ declare function findNearestMatch(needle: string, haystack: string, maxDist: number): {
2
+ match: string;
3
+ index: number;
4
+ } | null;
5
+
6
+ export { findNearestMatch };
@@ -0,0 +1,6 @@
1
+ declare function findNearestMatch(needle: string, haystack: string, maxDist: number): {
2
+ match: string;
3
+ index: number;
4
+ } | null;
5
+
6
+ export { findNearestMatch };