kordoc 2.5.2 → 2.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +450 -431
  2. package/dist/chunk-4NWDJGAU.js +18955 -0
  3. package/dist/chunk-4NWDJGAU.js.map +1 -0
  4. package/dist/{chunk-NKKLA43G.js → chunk-4SK2PDMQ.js} +14 -3
  5. package/dist/chunk-4SK2PDMQ.js.map +1 -0
  6. package/dist/{chunk-24NKFRB4.js → chunk-LB7E2KDF.js} +14 -3
  7. package/dist/chunk-LB7E2KDF.js.map +1 -0
  8. package/dist/chunk-MEPHGCPQ.js +266 -0
  9. package/dist/chunk-MEPHGCPQ.js.map +1 -0
  10. package/dist/chunk-MOL7MDBG.js +0 -0
  11. package/dist/chunk-MUOQXDZ4.cjs.map +1 -1
  12. package/dist/{chunk-Z65OQP3H.cjs → chunk-Y476BOHI.cjs} +14 -3
  13. package/dist/chunk-Y476BOHI.cjs.map +1 -0
  14. package/dist/cli.js +60 -5
  15. package/dist/cli.js.map +1 -1
  16. package/dist/{detect-I7YIS4Q6.js → detect-RI2MQ33K.js} +6 -2
  17. package/dist/formula-3AQUUIRF.js +1151 -0
  18. package/dist/formula-3AQUUIRF.js.map +1 -0
  19. package/dist/formula-JCNF43NE.js +1153 -0
  20. package/dist/formula-JCNF43NE.js.map +1 -0
  21. package/dist/formula-XGG6ZP42.cjs +1151 -0
  22. package/dist/formula-XGG6ZP42.cjs.map +1 -0
  23. package/dist/index.cjs +14706 -450
  24. package/dist/index.cjs.map +1 -1
  25. package/dist/index.d.cts +73 -2
  26. package/dist/index.d.ts +73 -2
  27. package/dist/index.js +14583 -327
  28. package/dist/index.js.map +1 -1
  29. package/dist/mcp.js +5 -5
  30. package/dist/mcp.js.map +1 -1
  31. package/dist/page-range-3C7UGGEK.cjs.map +1 -1
  32. package/dist/page-range-737B4EZW.js +0 -0
  33. package/dist/{parser-AZYPOKAR.cjs → parser-7OFQ67QL.cjs} +160 -28
  34. package/dist/parser-7OFQ67QL.cjs.map +1 -0
  35. package/dist/{parser-BQKQOIJU.js → parser-DJCMY3OO.js} +136 -4
  36. package/dist/parser-DJCMY3OO.js.map +1 -0
  37. package/dist/{parser-FRROKAB7.js → parser-QMMQ7Y7R.js} +136 -4
  38. package/dist/parser-QMMQ7Y7R.js.map +1 -0
  39. package/dist/{provider-WPIYEALY.js → provider-2SEHU2FM.js} +1 -1
  40. package/dist/provider-2SEHU2FM.js.map +1 -0
  41. package/dist/{provider-7H4CPZYS.js → provider-AKROB7WQ.js} +1 -1
  42. package/dist/provider-AKROB7WQ.js.map +1 -0
  43. package/dist/{provider-YN2SSK4X.cjs → provider-SNONEZNW.cjs} +1 -1
  44. package/dist/provider-SNONEZNW.cjs.map +1 -0
  45. package/dist/setup-57FB3LSP.js +0 -0
  46. package/dist/{watch-ZJAUWUAE.js → watch-FVMVIZ5Q.js} +4 -4
  47. package/dist/watch-FVMVIZ5Q.js.map +1 -0
  48. package/package.json +98 -77
  49. package/dist/chunk-24NKFRB4.js.map +0 -1
  50. package/dist/chunk-2CAJSQK5.js +0 -5052
  51. package/dist/chunk-2CAJSQK5.js.map +0 -1
  52. package/dist/chunk-M3E3C5GS.js +0 -59
  53. package/dist/chunk-M3E3C5GS.js.map +0 -1
  54. package/dist/chunk-NKKLA43G.js.map +0 -1
  55. package/dist/chunk-Z65OQP3H.cjs.map +0 -1
  56. package/dist/parser-AZYPOKAR.cjs.map +0 -1
  57. package/dist/parser-BQKQOIJU.js.map +0 -1
  58. package/dist/parser-FRROKAB7.js.map +0 -1
  59. package/dist/provider-7H4CPZYS.js.map +0 -1
  60. package/dist/provider-WPIYEALY.js.map +0 -1
  61. package/dist/provider-YN2SSK4X.cjs.map +0 -1
  62. package/dist/watch-ZJAUWUAE.js.map +0 -1
  63. /package/dist/{detect-I7YIS4Q6.js.map → detect-RI2MQ33K.js.map} +0 -0
@@ -0,0 +1,1153 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/pdf/formula/models.ts
4
+ import { createHash } from "crypto";
5
+ import { createReadStream } from "fs";
6
+ import { mkdir, stat, unlink, rename } from "fs/promises";
7
+ import { createWriteStream } from "fs";
8
+ import { homedir } from "os";
9
+ import { join, dirname } from "path";
10
+ import { pipeline } from "stream/promises";
11
+ import { Readable } from "stream";
12
+ var MFD_MODEL = {
13
+ name: "Pix2Text MFD",
14
+ filename: "mfd.onnx",
15
+ url: "https://huggingface.co/breezedeus/pix2text-mfd/resolve/main/mfd-v20240618.onnx",
16
+ sha256: "51a8854743b17ae654729af8db82a630c1ccfa06debf4856c8b28055f87d02c1",
17
+ sizeMb: 42
18
+ };
19
+ var MFR_ENCODER_MODEL = {
20
+ name: "Pix2Text MFR encoder",
21
+ filename: "encoder_model.onnx",
22
+ url: "https://huggingface.co/breezedeus/pix2text-mfr/resolve/main/encoder_model.onnx",
23
+ sha256: "bd8d5c322792e9ec45793af5569e9748f82a3d728a9e00213dbfc56c1486f37d",
24
+ sizeMb: 87
25
+ };
26
+ var MFR_DECODER_MODEL = {
27
+ name: "Pix2Text MFR decoder",
28
+ filename: "decoder_model.onnx",
29
+ url: "https://huggingface.co/breezedeus/pix2text-mfr/resolve/main/decoder_model.onnx",
30
+ sha256: "fd0f92d7a012f3dae41e1ac79421aea0ea888b5a66cb3f9a004e424f82f3daed",
31
+ sizeMb: 30
32
+ };
33
+ var MFR_TOKENIZER = {
34
+ name: "Pix2Text MFR tokenizer",
35
+ filename: "tokenizer.json",
36
+ url: "https://huggingface.co/breezedeus/pix2text-mfr/resolve/main/tokenizer.json",
37
+ sha256: "3e2ab757277d22639bec28c9d7972e352d3d1dba223051fa674002dc5ab64df3",
38
+ sizeMb: 1
39
+ };
40
+ var ALL_FORMULA_MODELS = [
41
+ MFD_MODEL,
42
+ MFR_ENCODER_MODEL,
43
+ MFR_DECODER_MODEL,
44
+ MFR_TOKENIZER
45
+ ];
46
+ function getFormulaModelsDir() {
47
+ const override = process.env.KORDOC_MODEL_CACHE;
48
+ if (override && override.trim()) {
49
+ return join(override, "pix2text");
50
+ }
51
+ return join(homedir(), ".cache", "kordoc", "models", "pix2text");
52
+ }
53
+ async function getFormulaModelStatus() {
54
+ const dir = getFormulaModelsDir();
55
+ const result = [];
56
+ for (const spec of ALL_FORMULA_MODELS) {
57
+ const localPath = join(dir, spec.filename);
58
+ let exists = false;
59
+ try {
60
+ const s = await stat(localPath);
61
+ exists = s.isFile() && s.size > 0;
62
+ } catch {
63
+ exists = false;
64
+ }
65
+ if (!exists) {
66
+ result.push({ spec, localPath, exists: false, verified: false });
67
+ continue;
68
+ }
69
+ try {
70
+ const actual = await sha256OfFile(localPath);
71
+ if (actual === spec.sha256) {
72
+ result.push({ spec, localPath, exists: true, verified: true });
73
+ } else {
74
+ result.push({
75
+ spec,
76
+ localPath,
77
+ exists: true,
78
+ verified: false,
79
+ invalidReason: `SHA256 mismatch: expected ${spec.sha256}, got ${actual}`
80
+ });
81
+ }
82
+ } catch (e) {
83
+ result.push({
84
+ spec,
85
+ localPath,
86
+ exists: true,
87
+ verified: false,
88
+ invalidReason: `SHA compute failed: ${e.message}`
89
+ });
90
+ }
91
+ }
92
+ return result;
93
+ }
94
+ async function ensureFormulaModels(onProgress) {
95
+ const dir = getFormulaModelsDir();
96
+ await mkdir(dir, { recursive: true });
97
+ for (const spec of ALL_FORMULA_MODELS) {
98
+ const localPath = join(dir, spec.filename);
99
+ if (await isExistingValid(localPath, spec.sha256)) {
100
+ onProgress?.({
101
+ spec,
102
+ downloaded: 0,
103
+ total: null,
104
+ phase: "skip",
105
+ message: "\uC774\uBBF8 \uC874\uC7AC + SHA \uC77C\uCE58"
106
+ });
107
+ continue;
108
+ }
109
+ try {
110
+ await unlink(localPath);
111
+ } catch {
112
+ }
113
+ await downloadToFile(spec, localPath, onProgress);
114
+ }
115
+ }
116
+ async function ensureSingleModel(spec, onProgress) {
117
+ const dir = getFormulaModelsDir();
118
+ await mkdir(dir, { recursive: true });
119
+ const localPath = join(dir, spec.filename);
120
+ if (await isExistingValid(localPath, spec.sha256)) {
121
+ onProgress?.({ spec, downloaded: 0, total: null, phase: "skip" });
122
+ return;
123
+ }
124
+ try {
125
+ await unlink(localPath);
126
+ } catch {
127
+ }
128
+ await downloadToFile(spec, localPath, onProgress);
129
+ }
130
+ async function isExistingValid(localPath, sha256Expected) {
131
+ try {
132
+ const s = await stat(localPath);
133
+ if (!s.isFile() || s.size === 0) return false;
134
+ } catch {
135
+ return false;
136
+ }
137
+ try {
138
+ const actual = await sha256OfFile(localPath);
139
+ return actual === sha256Expected;
140
+ } catch {
141
+ return false;
142
+ }
143
+ }
144
+ async function downloadToFile(spec, localPath, onProgress) {
145
+ const partPath = `${localPath}.part`;
146
+ await mkdir(dirname(localPath), { recursive: true });
147
+ const resp = await fetch(spec.url, {
148
+ headers: {
149
+ // HF CDN 은 UA 없으면 가끔 403 을 뱉는다
150
+ "User-Agent": "kordoc-formula-ocr/1.0 (+https://github.com/chrisryugj/kordoc)"
151
+ }
152
+ });
153
+ if (!resp.ok || !resp.body) {
154
+ throw new Error(
155
+ `${spec.name} \uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: HTTP ${resp.status} ${resp.statusText} (${spec.url})`
156
+ );
157
+ }
158
+ const lenHeader = resp.headers.get("content-length");
159
+ const total = lenHeader ? Number.parseInt(lenHeader, 10) : null;
160
+ let downloaded = 0;
161
+ const ws = createWriteStream(partPath);
162
+ try {
163
+ const reader = Readable.fromWeb(resp.body);
164
+ reader.on("data", (chunk) => {
165
+ downloaded += chunk.length;
166
+ onProgress?.({
167
+ spec,
168
+ downloaded,
169
+ total,
170
+ phase: "download"
171
+ });
172
+ });
173
+ await pipeline(reader, ws);
174
+ } catch (e) {
175
+ try {
176
+ await unlink(partPath);
177
+ } catch {
178
+ }
179
+ throw new Error(`${spec.name} \uC2A4\uD2B8\uB9AC\uBC0D \uC2E4\uD328: ${e.message}`);
180
+ }
181
+ onProgress?.({
182
+ spec,
183
+ downloaded,
184
+ total,
185
+ phase: "verify"
186
+ });
187
+ let actual;
188
+ try {
189
+ actual = await sha256OfFile(partPath);
190
+ } catch (e) {
191
+ try {
192
+ await unlink(partPath);
193
+ } catch {
194
+ }
195
+ throw new Error(`${spec.name} SHA \uACC4\uC0B0 \uC2E4\uD328: ${e.message}`);
196
+ }
197
+ if (actual !== spec.sha256) {
198
+ try {
199
+ await unlink(partPath);
200
+ } catch {
201
+ }
202
+ throw new Error(
203
+ `${spec.name} SHA256 mismatch: expected ${spec.sha256}, got ${actual} \u2014 \uBAA8\uB378 URL \uC774 \uC624\uC5FC\uB418\uC5C8\uAC70\uB098 \uC804\uC1A1 \uC911 \uC190\uC0C1\uB418\uC5C8\uC2B5\uB2C8\uB2E4.`
204
+ );
205
+ }
206
+ await rename(partPath, localPath);
207
+ onProgress?.({
208
+ spec,
209
+ downloaded,
210
+ total,
211
+ phase: "done"
212
+ });
213
+ }
214
+ async function sha256OfFile(p) {
215
+ const h = createHash("sha256");
216
+ const stream = createReadStream(p);
217
+ await pipeline(stream, async function* (src) {
218
+ for await (const chunk of src) {
219
+ h.update(chunk);
220
+ }
221
+ });
222
+ return h.digest("hex");
223
+ }
224
+
225
+ // src/pdf/formula/postprocess.ts
226
+ var TRAILING_WHITESPACE_CMDS = [
227
+ "\\,",
228
+ "\\:",
229
+ "\\;",
230
+ "\\!",
231
+ "\\ ",
232
+ "\\quad",
233
+ "\\qquad",
234
+ "\\enspace",
235
+ "\\thinspace"
236
+ ];
237
+ function postProcessLatex(latex) {
238
+ let s = stripTrailingWhitespace(latex);
239
+ s = collapseSpaces(s);
240
+ for (let i = 0; i < 10; i++) {
241
+ const next = stripEmptyGroups(s);
242
+ if (next === s) break;
243
+ s = next;
244
+ }
245
+ s = fixLatexSpacing(s);
246
+ s = normalizeFormulaSpacing(s);
247
+ s = s.trim();
248
+ if (isTrivialFormula(s)) return "";
249
+ return s;
250
+ }
251
+ function stripTrailingWhitespace(s) {
252
+ let t = s;
253
+ for (; ; ) {
254
+ const trimmed = t.replace(/[\s]+$/, "");
255
+ let changed = false;
256
+ for (const p of TRAILING_WHITESPACE_CMDS) {
257
+ if (trimmed.endsWith(p)) {
258
+ t = trimmed.slice(0, trimmed.length - p.length);
259
+ changed = true;
260
+ break;
261
+ }
262
+ }
263
+ if (!changed) return trimmed;
264
+ }
265
+ }
266
+ function collapseSpaces(s) {
267
+ let out = "";
268
+ let prevSpace = false;
269
+ for (const c of s) {
270
+ if (/\s/.test(c)) {
271
+ if (!prevSpace) {
272
+ out += " ";
273
+ prevSpace = true;
274
+ }
275
+ } else {
276
+ out += c;
277
+ prevSpace = false;
278
+ }
279
+ }
280
+ return out;
281
+ }
282
+ function stripEmptyGroups(s) {
283
+ let out = "";
284
+ let i = 0;
285
+ const bytes = s;
286
+ while (i < bytes.length) {
287
+ const ch = bytes[i];
288
+ if (ch === "{") {
289
+ let j = i + 1;
290
+ while (j < bytes.length && /\s/.test(bytes[j])) j++;
291
+ if (j < bytes.length && bytes[j] === "}") {
292
+ while (out.endsWith(" ") || out.endsWith(" ")) {
293
+ out = out.slice(0, -1);
294
+ }
295
+ if (out.endsWith("^") || out.endsWith("_")) {
296
+ out = out.slice(0, -1);
297
+ } else {
298
+ let k = out.length;
299
+ while (k > 0 && /[A-Za-z]/.test(out[k - 1])) k--;
300
+ if (k > 0 && out[k - 1] === "\\" && k < out.length) {
301
+ out = out.slice(0, k - 1);
302
+ }
303
+ }
304
+ i = j + 1;
305
+ continue;
306
+ }
307
+ }
308
+ out += ch;
309
+ i++;
310
+ }
311
+ return out;
312
+ }
313
+ var KNOWN_LATEX_CMDS = /* @__PURE__ */ new Set([
314
+ // 연산자
315
+ "cdot",
316
+ "cdots",
317
+ "ldots",
318
+ "dots",
319
+ "vdots",
320
+ "ddots",
321
+ "times",
322
+ "div",
323
+ "pm",
324
+ "mp",
325
+ "ast",
326
+ "star",
327
+ "circ",
328
+ "bullet",
329
+ "oplus",
330
+ "ominus",
331
+ "otimes",
332
+ "odot",
333
+ // 관계
334
+ "approx",
335
+ "equiv",
336
+ "neq",
337
+ "ne",
338
+ "sim",
339
+ "simeq",
340
+ "cong",
341
+ "leq",
342
+ "geq",
343
+ "le",
344
+ "ge",
345
+ "ll",
346
+ "gg",
347
+ "prec",
348
+ "succ",
349
+ "preceq",
350
+ "succeq",
351
+ "propto",
352
+ "parallel",
353
+ "perp",
354
+ // 집합/논리
355
+ "in",
356
+ "notin",
357
+ "ni",
358
+ "subset",
359
+ "supset",
360
+ "subseteq",
361
+ "supseteq",
362
+ "cap",
363
+ "cup",
364
+ "bigcap",
365
+ "bigcup",
366
+ "emptyset",
367
+ "varnothing",
368
+ "forall",
369
+ "exists",
370
+ "nexists",
371
+ "neg",
372
+ "lnot",
373
+ "land",
374
+ "lor",
375
+ "vee",
376
+ "wedge",
377
+ // 그리스 소문자
378
+ "alpha",
379
+ "beta",
380
+ "gamma",
381
+ "delta",
382
+ "epsilon",
383
+ "varepsilon",
384
+ "zeta",
385
+ "eta",
386
+ "theta",
387
+ "vartheta",
388
+ "iota",
389
+ "kappa",
390
+ "lambda",
391
+ "mu",
392
+ "nu",
393
+ "xi",
394
+ "omicron",
395
+ "pi",
396
+ "varpi",
397
+ "rho",
398
+ "varrho",
399
+ "sigma",
400
+ "varsigma",
401
+ "tau",
402
+ "upsilon",
403
+ "phi",
404
+ "varphi",
405
+ "chi",
406
+ "psi",
407
+ "omega",
408
+ // 그리스 대문자
409
+ "Gamma",
410
+ "Delta",
411
+ "Theta",
412
+ "Lambda",
413
+ "Xi",
414
+ "Pi",
415
+ "Sigma",
416
+ "Upsilon",
417
+ "Phi",
418
+ "Psi",
419
+ "Omega",
420
+ // 화살표
421
+ "to",
422
+ "gets",
423
+ "mapsto",
424
+ "rightarrow",
425
+ "leftarrow",
426
+ "leftrightarrow",
427
+ "Rightarrow",
428
+ "Leftarrow",
429
+ "Leftrightarrow",
430
+ "uparrow",
431
+ "downarrow",
432
+ "longrightarrow",
433
+ "longleftarrow",
434
+ "longmapsto",
435
+ // 큰 연산자
436
+ "sum",
437
+ "prod",
438
+ "coprod",
439
+ "int",
440
+ "iint",
441
+ "iiint",
442
+ "oint",
443
+ "bigoplus",
444
+ "bigotimes",
445
+ // 함수명
446
+ "sin",
447
+ "cos",
448
+ "tan",
449
+ "sec",
450
+ "csc",
451
+ "cot",
452
+ "arcsin",
453
+ "arccos",
454
+ "arctan",
455
+ "sinh",
456
+ "cosh",
457
+ "tanh",
458
+ "log",
459
+ "ln",
460
+ "lg",
461
+ "exp",
462
+ "lim",
463
+ "liminf",
464
+ "limsup",
465
+ "sup",
466
+ "inf",
467
+ "max",
468
+ "min",
469
+ "arg",
470
+ "det",
471
+ "dim",
472
+ "gcd",
473
+ "deg",
474
+ "hom",
475
+ "ker",
476
+ "mod",
477
+ // 특수 기호/수식
478
+ "infty",
479
+ "partial",
480
+ "nabla",
481
+ "prime",
482
+ "aleph",
483
+ "ell",
484
+ "hbar",
485
+ "Re",
486
+ "Im",
487
+ "top",
488
+ "bot",
489
+ "angle",
490
+ "vdash",
491
+ "dashv",
492
+ // 기타
493
+ "left",
494
+ "right",
495
+ "big",
496
+ "Big",
497
+ "bigg",
498
+ "Bigg"
499
+ ]);
500
+ function fixLatexSpacing(s) {
501
+ let out = "";
502
+ let i = 0;
503
+ while (i < s.length) {
504
+ if (s[i] === "\\" && i + 1 < s.length && /[A-Za-z]/.test(s[i + 1])) {
505
+ let j = i + 1;
506
+ while (j < s.length && /[A-Za-z]/.test(s[j])) j++;
507
+ const full = s.slice(i + 1, j);
508
+ const nextChar = j < s.length ? s[j] : "";
509
+ if (nextChar === "{") {
510
+ out += "\\" + full;
511
+ i = j;
512
+ continue;
513
+ }
514
+ let splitAt = full.length;
515
+ if (!KNOWN_LATEX_CMDS.has(full) && full.length >= 3) {
516
+ for (let len = full.length - 1; len >= 2; len--) {
517
+ if (KNOWN_LATEX_CMDS.has(full.slice(0, len))) {
518
+ splitAt = len;
519
+ break;
520
+ }
521
+ }
522
+ }
523
+ out += "\\" + full.slice(0, splitAt);
524
+ if (splitAt < full.length) {
525
+ out += " " + full.slice(splitAt);
526
+ }
527
+ i = j;
528
+ } else {
529
+ out += s[i];
530
+ i++;
531
+ }
532
+ }
533
+ return out;
534
+ }
535
+ function isTrivialFormula(s) {
536
+ const t = s.trim();
537
+ if (t.length === 0) return true;
538
+ const stripped = t.replace(/[\s{}]/g, "");
539
+ if (stripped.length <= 2) return true;
540
+ if (/^\\[A-Za-z]+$/.test(t)) return true;
541
+ if (/^\\(?:mathrm|textrm|text|operatorname|mathit|mathbf|mathcal|mathsf|mathtt)\{[A-Za-z]{1,6}\}$/.test(
542
+ t
543
+ ))
544
+ return true;
545
+ const tokens = tokenizeLatex(t);
546
+ if (tokens.length >= 3) {
547
+ const freq = /* @__PURE__ */ new Map();
548
+ for (const tok of tokens) freq.set(tok, (freq.get(tok) ?? 0) + 1);
549
+ let maxCount = 0;
550
+ for (const c of freq.values()) if (c > maxCount) maxCount = c;
551
+ if (maxCount >= 3 && maxCount / tokens.length >= 0.5) return true;
552
+ }
553
+ if (tokens.length >= 2 && tokens.length <= 4) {
554
+ const hasOpOrNum = tokens.some(
555
+ (tok) => /^[=+\-/*<>]$/.test(tok) || /^[0-9]$/.test(tok)
556
+ );
557
+ if (!hasOpOrNum) return true;
558
+ }
559
+ if (hasHighRepetition(t)) return true;
560
+ if (t.includes("\\square")) return true;
561
+ if (/^[-+]?\d+\.?\d*$/.test(t.replace(/[\s{}\\]/g, ""))) return true;
562
+ if (/(\([^()]{2,15}\))\s*\1/.test(t)) return true;
563
+ if (/(\{(?:[^{}]|\{[^{}]*\})+\})\s*\1/.test(t)) return true;
564
+ const argMatch = t.match(/^[A-Za-z\\][A-Za-z]*\(([^()]+)\)$/);
565
+ if (argMatch) {
566
+ const args = argMatch[1].split(",").map((a) => a.trim());
567
+ if (args.length >= 2) {
568
+ const freq = /* @__PURE__ */ new Map();
569
+ for (const a of args) if (a) freq.set(a, (freq.get(a) ?? 0) + 1);
570
+ for (const [, c] of freq) {
571
+ if (c >= 2 && c / args.length >= 0.5) return true;
572
+ }
573
+ }
574
+ }
575
+ if (/\\frac\{([^{}]+)\}\{\1\}/.test(t)) return true;
576
+ if (/(\\[A-Za-z]+|\b[A-Za-z])\s*\/\s*\1\b/.test(t)) return true;
577
+ if (/\\begin\{(?:matrix|pmatrix|bmatrix|vmatrix)\}/.test(t)) {
578
+ const cdotsCount = (t.match(/\\cdots/g) ?? []).length;
579
+ if (cdotsCount >= 2) return true;
580
+ }
581
+ if (tokens.length <= 12) {
582
+ const mathrmCount = (t.match(/\\mathrm\{/g) ?? []).length;
583
+ if (mathrmCount >= 2) {
584
+ const hasRealMath = /[=+\-*/<>^]/.test(t) && /\d/.test(t);
585
+ if (!hasRealMath) return true;
586
+ }
587
+ }
588
+ if (/^[a-zA-Z]{2,3}_\{\\mathrm\{[a-zA-Z]{3,}\}\}$/.test(t)) return true;
589
+ if (/^\\mathrm\{[a-z]{2,}\}[-+][-+]?(?:\\[a-zA-Z]+|[a-zA-Z0-9])$/.test(t)) return true;
590
+ if (/\\(?:mathsf|mathtt|texttt)\{/.test(t)) return true;
591
+ if (/\\begin\{aligned\}/.test(t) && !t.includes("=")) return true;
592
+ if (/\\begin\{matrix\}/.test(t) && (t.match(/\\downarrow/g) ?? []).length >= 2) return true;
593
+ return false;
594
+ }
595
+ function hasHighRepetition(s) {
596
+ if (s.length < 15) return false;
597
+ for (let len = 5; len <= 15; len++) {
598
+ if (len * 3 > s.length) break;
599
+ const seen = /* @__PURE__ */ new Map();
600
+ for (let i = 0; i <= s.length - len; i++) {
601
+ const sub = s.slice(i, i + len);
602
+ if (!/[a-zA-Z]/.test(sub)) continue;
603
+ seen.set(sub, (seen.get(sub) ?? 0) + 1);
604
+ }
605
+ for (const [, count] of seen) {
606
+ if (count < 3) continue;
607
+ if (count * len / s.length >= 0.6) return true;
608
+ }
609
+ }
610
+ return false;
611
+ }
612
+ function tokenizeLatex(s) {
613
+ const result = [];
614
+ let i = 0;
615
+ while (i < s.length) {
616
+ const c = s[i];
617
+ if (c === "\\") {
618
+ let j = i + 1;
619
+ while (j < s.length && /[A-Za-z]/.test(s[j])) j++;
620
+ if (j === i + 1 && j < s.length) j++;
621
+ result.push(s.slice(i, j));
622
+ i = j;
623
+ } else if (/\s/.test(c)) {
624
+ i++;
625
+ } else {
626
+ result.push(c);
627
+ i++;
628
+ }
629
+ }
630
+ return result;
631
+ }
632
+ function normalizeFormulaSpacing(s) {
633
+ const tokens = [];
634
+ let i = 0;
635
+ while (i < s.length) {
636
+ const c = s[i];
637
+ if (c === "\\") {
638
+ let j = i + 1;
639
+ while (j < s.length && /[A-Za-z]/.test(s[j])) j++;
640
+ if (j === i + 1 && j < s.length) j++;
641
+ tokens.push(s.slice(i, j));
642
+ i = j;
643
+ } else if (/\s/.test(c)) {
644
+ tokens.push(" ");
645
+ i++;
646
+ } else {
647
+ tokens.push(c);
648
+ i++;
649
+ }
650
+ }
651
+ const out = [];
652
+ for (let k = 0; k < tokens.length; k++) {
653
+ if (tokens[k] !== " ") {
654
+ out.push(tokens[k]);
655
+ continue;
656
+ }
657
+ let prev = "";
658
+ for (let p = k - 1; p >= 0; p--) {
659
+ if (tokens[p] !== " ") {
660
+ prev = tokens[p];
661
+ break;
662
+ }
663
+ }
664
+ let next = "";
665
+ for (let q = k + 1; q < tokens.length; q++) {
666
+ if (tokens[q] !== " ") {
667
+ next = tokens[q];
668
+ break;
669
+ }
670
+ }
671
+ const prevIsCmd = /^\\[A-Za-z]+$/.test(prev);
672
+ const nextIsAlpha = /^[A-Za-z]$/.test(next);
673
+ if (prevIsCmd && nextIsAlpha) {
674
+ if (out.length === 0 || out[out.length - 1] !== " ") {
675
+ out.push(" ");
676
+ }
677
+ }
678
+ }
679
+ while (out.length > 0 && out[0] === " ") out.shift();
680
+ while (out.length > 0 && out[out.length - 1] === " ") out.pop();
681
+ return out.join("");
682
+ }
683
+
684
+ // src/pdf/formula/detector.ts
685
+ var MFD_IMG_SIZE = 768;
686
+ var MFD_NUM_CLASSES = 2;
687
+ var MFD_CHANNELS = 4 + MFD_NUM_CLASSES;
688
+ var MFD_CONF_INLINE = 0.3;
689
+ var MFD_CONF_DISPLAY = 0.4;
690
+ var MFD_IOU_THRESHOLD = 0.45;
691
+ var MFD_MIN_AREA = 80;
692
+ var PAD_VALUE = 114 / 255;
693
+ async function detectFormulaRegions(session, frame, ort) {
694
+ const { scale, padX, padY, tensor } = letterbox(frame, MFD_IMG_SIZE);
695
+ const input = new ort.Tensor("float32", tensor, [1, 3, MFD_IMG_SIZE, MFD_IMG_SIZE]);
696
+ const feeds = { images: input };
697
+ const outputs = await session.run(feeds);
698
+ const firstKey = Object.keys(outputs)[0];
699
+ const out = outputs[firstKey];
700
+ if (!out || out.type !== "float32") {
701
+ throw new Error("MFD \uCD9C\uB825 \uC5C6\uC74C \uB610\uB294 dtype \uBD88\uC77C\uCE58");
702
+ }
703
+ const outDims = out.dims;
704
+ if (outDims.length !== 3) {
705
+ throw new Error(`MFD \uCD9C\uB825 \uCC28\uC6D0 \uC608\uC0C1 3, \uC2E4\uC81C ${outDims.length}: [${outDims.join(",")}]`);
706
+ }
707
+ const channels = outDims[1];
708
+ const anchors = outDims[2];
709
+ if (channels !== MFD_CHANNELS) {
710
+ throw new Error(`MFD \uCC44\uB110 \uC218 \uC608\uC0C1 ${MFD_CHANNELS}, \uC2E4\uC81C ${channels}`);
711
+ }
712
+ if (anchors <= 0) return [];
713
+ const data = out.data;
714
+ const candidates = [];
715
+ for (let a = 0; a < anchors; a++) {
716
+ const cx = data[a];
717
+ const cy = data[anchors + a];
718
+ const w = data[2 * anchors + a];
719
+ const h = data[3 * anchors + a];
720
+ let bestCls = 0;
721
+ let bestScore = 0;
722
+ for (let c = 0; c < MFD_NUM_CLASSES; c++) {
723
+ const s = data[(4 + c) * anchors + a];
724
+ if (s > bestScore) {
725
+ bestScore = s;
726
+ bestCls = c;
727
+ }
728
+ }
729
+ const threshold = bestCls === 1 ? MFD_CONF_DISPLAY : MFD_CONF_INLINE;
730
+ if (bestScore < threshold) continue;
731
+ let x1 = (cx - w / 2 - padX) / scale;
732
+ let y1 = (cy - h / 2 - padY) / scale;
733
+ let x2 = (cx + w / 2 - padX) / scale;
734
+ let y2 = (cy + h / 2 - padY) / scale;
735
+ x1 = clamp(x1, 0, frame.width - 1);
736
+ y1 = clamp(y1, 0, frame.height - 1);
737
+ x2 = clamp(x2, 0, frame.width - 1);
738
+ y2 = clamp(y2, 0, frame.height - 1);
739
+ if (x2 - x1 < 2 || y2 - y1 < 2) continue;
740
+ if ((x2 - x1) * (y2 - y1) < MFD_MIN_AREA) continue;
741
+ candidates.push({
742
+ x1,
743
+ y1,
744
+ x2,
745
+ y2,
746
+ kind: bestCls === 1 ? "display" : "inline",
747
+ score: bestScore
748
+ });
749
+ }
750
+ const kept = [];
751
+ for (const kind of ["inline", "display"]) {
752
+ const subset = candidates.filter((c) => c.kind === kind);
753
+ kept.push(...nms(subset, MFD_IOU_THRESHOLD));
754
+ }
755
+ kept.sort((a, b) => a.y1 - b.y1 || a.x1 - b.x1);
756
+ return kept.map((d) => ({
757
+ bbox: { x1: d.x1, y1: d.y1, x2: d.x2, y2: d.y2 },
758
+ kind: d.kind,
759
+ score: d.score
760
+ }));
761
+ }
762
+ function letterbox(frame, target) {
763
+ const w = frame.width;
764
+ const h = frame.height;
765
+ const scale = Math.min(target / w, target / h);
766
+ const newW = Math.max(1, Math.round(w * scale));
767
+ const newH = Math.max(1, Math.round(h * scale));
768
+ const padX = (target - newW) / 2;
769
+ const padY = (target - newH) / 2;
770
+ const offX = Math.floor(padX);
771
+ const offY = Math.floor(padY);
772
+ const ts = target;
773
+ const tensor = new Float32Array(3 * ts * ts);
774
+ tensor.fill(PAD_VALUE);
775
+ const src = frame.data;
776
+ const srcW = frame.width;
777
+ const srcH = frame.height;
778
+ for (let y = 0; y < newH; y++) {
779
+ const sy = Math.min(srcH - 1, Math.floor((y + 0.5) / newH * srcH));
780
+ for (let x = 0; x < newW; x++) {
781
+ const sx = Math.min(srcW - 1, Math.floor((x + 0.5) / newW * srcW));
782
+ const srcIdx = (sy * srcW + sx) * 4;
783
+ const r = src[srcIdx];
784
+ const g = src[srcIdx + 1];
785
+ const b = src[srcIdx + 2];
786
+ const tx = x + offX;
787
+ const ty = y + offY;
788
+ const idx = ty * ts + tx;
789
+ tensor[idx] = r / 255;
790
+ tensor[ts * ts + idx] = g / 255;
791
+ tensor[2 * ts * ts + idx] = b / 255;
792
+ }
793
+ }
794
+ return { scale, padX, padY, tensor };
795
+ }
796
+ function nms(cands, iouThreshold) {
797
+ const sorted = [...cands].sort((a, b) => b.score - a.score);
798
+ const kept = [];
799
+ for (const cand of sorted) {
800
+ let keep = true;
801
+ for (const k of kept) {
802
+ if (iou(cand, k) > iouThreshold) {
803
+ keep = false;
804
+ break;
805
+ }
806
+ }
807
+ if (keep) kept.push(cand);
808
+ }
809
+ return kept;
810
+ }
811
+ function iou(a, b) {
812
+ const x1 = Math.max(a.x1, b.x1);
813
+ const y1 = Math.max(a.y1, b.y1);
814
+ const x2 = Math.min(a.x2, b.x2);
815
+ const y2 = Math.min(a.y2, b.y2);
816
+ const interW = Math.max(0, x2 - x1);
817
+ const interH = Math.max(0, y2 - y1);
818
+ const inter = interW * interH;
819
+ const areaA = Math.max(0, a.x2 - a.x1) * Math.max(0, a.y2 - a.y1);
820
+ const areaB = Math.max(0, b.x2 - b.x1) * Math.max(0, b.y2 - b.y1);
821
+ const union = areaA + areaB - inter;
822
+ return union <= 0 ? 0 : inter / union;
823
+ }
824
+ function clamp(v, lo, hi) {
825
+ if (v < lo) return lo;
826
+ if (v > hi) return hi;
827
+ return v;
828
+ }
829
+
830
+ // src/pdf/formula/recognizer.ts
831
+ var MFR_IMG_SIZE = 384;
832
+ var MFR_ENC_HIDDEN = 384;
833
+ var MFR_MAX_NEW_TOKENS = 256;
834
+ var MFR_EOS_ID = 2;
835
+ var MFR_PAD_ID = 0;
836
+ async function recognizeFormula(deps, crop) {
837
+ const tensor = deitPreprocess(crop, MFR_IMG_SIZE);
838
+ const { ort, encoder, decoder, tokenizer } = deps;
839
+ const pixelInput = new ort.Tensor("float32", tensor, [1, 3, MFR_IMG_SIZE, MFR_IMG_SIZE]);
840
+ const encOut = await encoder.run({ pixel_values: pixelInput });
841
+ const encKey = Object.keys(encOut).find((k) => k.includes("hidden")) ?? Object.keys(encOut)[0];
842
+ const encTensor = encOut[encKey];
843
+ if (!encTensor || encTensor.type !== "float32") {
844
+ throw new Error("MFR encoder \uCD9C\uB825 \uC5C6\uC74C");
845
+ }
846
+ const encDims = encTensor.dims;
847
+ if (encDims.length !== 3) {
848
+ throw new Error(`MFR encoder \uCC28\uC6D0 \uC608\uC0C1 3, \uC2E4\uC81C ${encDims.length}`);
849
+ }
850
+ const encSeq = encDims[1];
851
+ const encHidden = encDims[2];
852
+ if (encHidden !== MFR_ENC_HIDDEN) {
853
+ throw new Error(`MFR encoder hidden \uC608\uC0C1 ${MFR_ENC_HIDDEN}, \uC2E4\uC81C ${encHidden}`);
854
+ }
855
+ const encData = encTensor.data;
856
+ const tokens = [MFR_EOS_ID];
857
+ for (let step = 0; step < MFR_MAX_NEW_TOKENS; step++) {
858
+ const seqLen = tokens.length;
859
+ const idsArr = BigInt64Array.from(tokens.map((t) => BigInt(t)));
860
+ const idsTensor = new ort.Tensor("int64", idsArr, [1, seqLen]);
861
+ const hidCopy = new Float32Array(encData);
862
+ const hidTensor = new ort.Tensor("float32", hidCopy, [1, encSeq, encHidden]);
863
+ const decOut = await decoder.run({
864
+ input_ids: idsTensor,
865
+ encoder_hidden_states: hidTensor
866
+ });
867
+ const logitKey = Object.keys(decOut).find((k) => k.includes("logit")) ?? Object.keys(decOut)[0];
868
+ const logitsTensor = decOut[logitKey];
869
+ if (!logitsTensor || logitsTensor.type !== "float32") {
870
+ throw new Error("MFR decoder logits \uC5C6\uC74C");
871
+ }
872
+ const dims = logitsTensor.dims;
873
+ if (dims.length !== 3) {
874
+ throw new Error(`MFR decoder \uCC28\uC6D0 \uC608\uC0C1 3, \uC2E4\uC81C ${dims.length}`);
875
+ }
876
+ const decSeq = dims[1];
877
+ const vocab = dims[2];
878
+ const logitsData = logitsTensor.data;
879
+ const lastOffset = (decSeq - 1) * vocab;
880
+ let bestId = 0;
881
+ let bestVal = -Infinity;
882
+ for (let v = 0; v < vocab; v++) {
883
+ const val = logitsData[lastOffset + v];
884
+ if (val > bestVal) {
885
+ bestVal = val;
886
+ bestId = v;
887
+ }
888
+ }
889
+ tokens.push(bestId);
890
+ if (bestId === MFR_EOS_ID) break;
891
+ }
892
+ const body = [];
893
+ for (let i = 1; i < tokens.length; i++) {
894
+ const t = tokens[i];
895
+ if (t === MFR_EOS_ID) break;
896
+ if (t === MFR_PAD_ID) continue;
897
+ if (t < 0) continue;
898
+ body.push(t);
899
+ }
900
+ const raw = tokenizer.decode(body, { skip_special_tokens: true });
901
+ return postProcessLatex(raw);
902
+ }
903
+ function deitPreprocess(crop, target) {
904
+ const ts = target;
905
+ const out = new Float32Array(3 * ts * ts);
906
+ const { data: src, width: srcW, height: srcH } = crop;
907
+ for (let y = 0; y < ts; y++) {
908
+ const sy = Math.min(srcH - 1, Math.max(0, Math.floor((y + 0.5) / ts * srcH)));
909
+ for (let x = 0; x < ts; x++) {
910
+ const sx = Math.min(srcW - 1, Math.max(0, Math.floor((x + 0.5) / ts * srcW)));
911
+ const srcIdx = (sy * srcW + sx) * 4;
912
+ const r = src[srcIdx];
913
+ const g = src[srcIdx + 1];
914
+ const b = src[srcIdx + 2];
915
+ const idx = y * ts + x;
916
+ out[idx] = r / 127.5 - 1;
917
+ out[ts * ts + idx] = g / 127.5 - 1;
918
+ out[2 * ts * ts + idx] = b / 127.5 - 1;
919
+ }
920
+ }
921
+ return out;
922
+ }
923
+
924
+ // src/pdf/formula/pipeline.ts
925
+ import { join as join2 } from "path";
926
+ var RENDER_SCALE = 2;
927
+ var FormulaPipeline = class _FormulaPipeline {
928
+ mfd;
929
+ encoder;
930
+ decoder;
931
+ tokenizer;
932
+ ort;
933
+ sharp;
934
+ pdfium;
935
+ opts;
936
+ constructor(parts) {
937
+ this.mfd = parts.mfd;
938
+ this.encoder = parts.encoder;
939
+ this.decoder = parts.decoder;
940
+ this.tokenizer = parts.tokenizer;
941
+ this.ort = parts.ort;
942
+ this.sharp = parts.sharp;
943
+ this.pdfium = parts.pdfium;
944
+ this.opts = parts.opts;
945
+ }
946
+ /**
947
+ * 수식 OCR 엔진 초기화. 모델 파일이 로컬에 없으면 즉시 실패 — 호출자가
948
+ * `ensureFormulaModels()` 를 먼저 돌려야 한다.
949
+ */
950
+ static async create(options) {
951
+ const opts = {
952
+ scale: options?.scale ?? RENDER_SCALE,
953
+ maxRegionsPerPage: options?.maxRegionsPerPage ?? 50,
954
+ pageTimeoutMs: options?.pageTimeoutMs ?? 6e4
955
+ };
956
+ const [ortMod, sharpModRaw, hfMod, pdfiumMod] = await Promise.all([
957
+ tryImport(
958
+ "onnxruntime-node",
959
+ () => import("onnxruntime-node")
960
+ ),
961
+ tryImport(
962
+ "sharp",
963
+ () => import("sharp")
964
+ ),
965
+ tryImport(
966
+ "@huggingface/transformers",
967
+ () => import("@huggingface/transformers")
968
+ ),
969
+ tryImport(
970
+ "@hyzyla/pdfium",
971
+ () => import("@hyzyla/pdfium")
972
+ )
973
+ ]);
974
+ const sharpAny = sharpModRaw;
975
+ const sharpMod = typeof sharpAny === "function" ? sharpAny : sharpAny.default ?? sharpAny;
976
+ const modelsDir = getFormulaModelsDir();
977
+ const mfdPath = join2(modelsDir, MFD_MODEL.filename);
978
+ const encPath = join2(modelsDir, MFR_ENCODER_MODEL.filename);
979
+ const decPath = join2(modelsDir, MFR_DECODER_MODEL.filename);
980
+ const tokPath = join2(modelsDir, MFR_TOKENIZER.filename);
981
+ const sessionOpts = {
982
+ graphOptimizationLevel: "all",
983
+ executionProviders: ["cpu"]
984
+ };
985
+ const [mfd, encoder, decoder] = await Promise.all([
986
+ ortMod.InferenceSession.create(mfdPath, sessionOpts),
987
+ ortMod.InferenceSession.create(encPath, sessionOpts),
988
+ ortMod.InferenceSession.create(decPath, sessionOpts)
989
+ ]);
990
+ const { readFile } = await import("fs/promises");
991
+ const tokenizerJson = JSON.parse(await readFile(tokPath, "utf-8"));
992
+ const PretrainedCtor = hfMod.PreTrainedTokenizer;
993
+ const tokenizer = new PretrainedCtor(tokenizerJson, {});
994
+ const pdfium = await pdfiumMod.PDFiumLibrary.init();
995
+ return new _FormulaPipeline({
996
+ mfd,
997
+ encoder,
998
+ decoder,
999
+ tokenizer,
1000
+ ort: ortMod,
1001
+ sharp: sharpMod,
1002
+ pdfium,
1003
+ opts
1004
+ });
1005
+ }
1006
+ /** 리소스 해제 — 더 이상 사용하지 않을 때 호출. */
1007
+ async destroy() {
1008
+ try {
1009
+ this.pdfium.destroy();
1010
+ } catch {
1011
+ }
1012
+ }
1013
+ /**
1014
+ * PDF 버퍼를 열어 페이지별 수식 영역을 인식한다.
1015
+ * 실패한 페이지는 skip (에러 전파 없음 — 로그만).
1016
+ *
1017
+ * @param pageFilter null 이면 전체 페이지. Set 이면 1-based 페이지 번호 일치만.
1018
+ */
1019
+ async runOnBuffer(buffer, pageFilter = null, onPageProgress) {
1020
+ const view = buffer instanceof Uint8Array ? buffer : new Uint8Array(buffer);
1021
+ const doc = await this.pdfium.loadDocument(view);
1022
+ try {
1023
+ const pages = [];
1024
+ let pageIdx = 0;
1025
+ for (const page of doc.pages()) {
1026
+ pageIdx++;
1027
+ if (pageFilter && !pageFilter.has(page.number)) continue;
1028
+ onPageProgress?.(page.number, doc.getPageCount());
1029
+ try {
1030
+ const result = await withTimeout(
1031
+ this.processPage(page.number, page),
1032
+ this.opts.pageTimeoutMs,
1033
+ `formula page ${page.number} timed out after ${this.opts.pageTimeoutMs}ms`
1034
+ );
1035
+ if (result) pages.push(result);
1036
+ } catch (e) {
1037
+ process.stderr.write(
1038
+ `[kordoc-formula] page ${page.number} skipped: ${e.message}
1039
+ `
1040
+ );
1041
+ }
1042
+ }
1043
+ return pages;
1044
+ } finally {
1045
+ doc.destroy();
1046
+ }
1047
+ }
1048
+ async processPage(pageNumber, page) {
1049
+ const { originalWidth: pdfWidth, originalHeight: pdfHeight } = page.getOriginalSize();
1050
+ const sharpCtor = this.sharp;
1051
+ const rendered = await page.render({
1052
+ scale: this.opts.scale,
1053
+ render: async ({ data, width, height }) => {
1054
+ return data;
1055
+ }
1056
+ });
1057
+ const { data: bgra, width: rw, height: rh } = rendered;
1058
+ const rgba = bgraToRgba(bgra);
1059
+ const pageFrame = { width: rw, height: rh, data: rgba };
1060
+ const regions0 = await detectFormulaRegions(this.mfd, pageFrame, this.ort);
1061
+ if (regions0.length === 0) {
1062
+ return { pageNumber, renderedWidth: rw, renderedHeight: rh, pdfWidth, pdfHeight, regions: [] };
1063
+ }
1064
+ const capped = regions0.slice(0, this.opts.maxRegionsPerPage);
1065
+ const regions = [];
1066
+ for (const r of capped) {
1067
+ const x1 = Math.floor(Math.max(0, r.bbox.x1));
1068
+ const y1 = Math.floor(Math.max(0, r.bbox.y1));
1069
+ const x2 = Math.ceil(Math.min(rw, r.bbox.x2));
1070
+ const y2 = Math.ceil(Math.min(rh, r.bbox.y2));
1071
+ const cw = x2 - x1;
1072
+ const ch = y2 - y1;
1073
+ if (cw < 4 || ch < 4) continue;
1074
+ const cropRgba = await sharpCtor(rgba, {
1075
+ raw: { width: rw, height: rh, channels: 4 }
1076
+ }).extract({ left: x1, top: y1, width: cw, height: ch }).raw().toBuffer();
1077
+ const cropFrame = { width: cw, height: ch, data: new Uint8Array(cropRgba) };
1078
+ let latex = "";
1079
+ try {
1080
+ latex = await recognizeFormula(
1081
+ {
1082
+ encoder: this.encoder,
1083
+ decoder: this.decoder,
1084
+ tokenizer: this.tokenizer,
1085
+ ort: this.ort
1086
+ },
1087
+ cropFrame
1088
+ );
1089
+ } catch (e) {
1090
+ process.stderr.write(
1091
+ `[kordoc-formula] recognize failed at page ${pageNumber} ${JSON.stringify(r.bbox)}: ${e.message}
1092
+ `
1093
+ );
1094
+ latex = "";
1095
+ }
1096
+ regions.push({ ...r, latex });
1097
+ }
1098
+ return {
1099
+ pageNumber,
1100
+ renderedWidth: rw,
1101
+ renderedHeight: rh,
1102
+ pdfWidth,
1103
+ pdfHeight,
1104
+ regions
1105
+ };
1106
+ }
1107
+ };
1108
+ async function tryImport(name, loader) {
1109
+ try {
1110
+ return await loader();
1111
+ } catch (e) {
1112
+ throw new Error(
1113
+ `\uC218\uC2DD OCR \uC744 \uC0AC\uC6A9\uD558\uB824\uBA74 optional dependency '${name}' \uC774 \uD544\uC694\uD569\uB2C8\uB2E4. \`npm install ${name}\` \uD6C4 \uB2E4\uC2DC \uC2E4\uD589\uD558\uC138\uC694. \uC6D0\uC778: ${e.message}`
1114
+ );
1115
+ }
1116
+ }
1117
+ async function withTimeout(promise, ms, msg) {
1118
+ let timer;
1119
+ try {
1120
+ return await Promise.race([
1121
+ promise,
1122
+ new Promise((_, reject) => {
1123
+ timer = setTimeout(() => reject(new Error(msg)), ms);
1124
+ })
1125
+ ]);
1126
+ } finally {
1127
+ if (timer) clearTimeout(timer);
1128
+ }
1129
+ }
1130
+ function bgraToRgba(bgra) {
1131
+ const out = new Uint8Array(bgra.length);
1132
+ for (let i = 0; i < bgra.length; i += 4) {
1133
+ out[i] = bgra[i + 2];
1134
+ out[i + 1] = bgra[i + 1];
1135
+ out[i + 2] = bgra[i];
1136
+ out[i + 3] = bgra[i + 3];
1137
+ }
1138
+ return out;
1139
+ }
1140
+ export {
1141
+ ALL_FORMULA_MODELS,
1142
+ FormulaPipeline,
1143
+ MFD_MODEL,
1144
+ MFR_DECODER_MODEL,
1145
+ MFR_ENCODER_MODEL,
1146
+ MFR_TOKENIZER,
1147
+ ensureFormulaModels,
1148
+ ensureSingleModel,
1149
+ getFormulaModelStatus,
1150
+ getFormulaModelsDir,
1151
+ postProcessLatex
1152
+ };
1153
+ //# sourceMappingURL=formula-JCNF43NE.js.map