@clazic/kordoc 2.7.4 → 2.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -29
- package/dist/{chunk-EJZO6DUI.js → chunk-URSQEMVJ.js} +345 -521
- package/dist/chunk-URSQEMVJ.js.map +1 -0
- package/dist/{chunk-CIR4TB4K.js → chunk-X7UUXEMM.js} +2 -2
- package/dist/cli.js +5 -87
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +447 -631
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -135
- package/dist/index.d.ts +4 -135
- package/dist/index.js +440 -621
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -43
- package/dist/mcp.js.map +1 -1
- package/dist/{utils-LYW4Z2Z6.js → utils-QQVZGOGU.js} +2 -2
- package/dist/{watch-CVSZKJE3.js → watch-RQYUNSSH.js} +3 -3
- package/package.json +1 -2
- package/dist/chunk-EJZO6DUI.js.map +0 -1
- /package/dist/{chunk-CIR4TB4K.js.map → chunk-X7UUXEMM.js.map} +0 -0
- /package/dist/{utils-LYW4Z2Z6.js.map → utils-QQVZGOGU.js.map} +0 -0
- /package/dist/{watch-CVSZKJE3.js.map → watch-RQYUNSSH.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -3062,9 +3062,6 @@ __export(index_exports, {
|
|
|
3062
3062
|
VERSION: () => VERSION,
|
|
3063
3063
|
blocksToMarkdown: () => blocksToMarkdown,
|
|
3064
3064
|
compare: () => compare,
|
|
3065
|
-
convertHwpToPdf: () => convertHwpToPdf,
|
|
3066
|
-
convertHwpxToPdf: () => convertHwpxToPdf,
|
|
3067
|
-
convertToPdf: () => convertToPdf,
|
|
3068
3065
|
detectFormat: () => detectFormat,
|
|
3069
3066
|
detectZipFormat: () => detectZipFormat,
|
|
3070
3067
|
diffBlocks: () => diffBlocks,
|
|
@@ -3084,7 +3081,7 @@ __export(index_exports, {
|
|
|
3084
3081
|
runUnifiedOcrPipeline: () => runUnifiedOcrPipeline
|
|
3085
3082
|
});
|
|
3086
3083
|
module.exports = __toCommonJS(index_exports);
|
|
3087
|
-
var
|
|
3084
|
+
var import_promises3 = require("fs/promises");
|
|
3088
3085
|
|
|
3089
3086
|
// src/detect.ts
|
|
3090
3087
|
var import_jszip = __toESM(require("jszip"), 1);
|
|
@@ -3137,7 +3134,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
3137
3134
|
var import_xmldom = require("@xmldom/xmldom");
|
|
3138
3135
|
|
|
3139
3136
|
// src/utils.ts
|
|
3140
|
-
var VERSION = true ? "2.7.
|
|
3137
|
+
var VERSION = true ? "2.7.6" : "0.0.0-dev";
|
|
3141
3138
|
function toArrayBuffer(buf) {
|
|
3142
3139
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3143
3140
|
return buf.buffer;
|
|
@@ -3344,13 +3341,21 @@ function sanitizeText(text) {
|
|
|
3344
3341
|
}
|
|
3345
3342
|
return result;
|
|
3346
3343
|
}
|
|
3344
|
+
function escapeGfm(text, inTableCell = false) {
|
|
3345
|
+
if (!text) return text;
|
|
3346
|
+
let result = text.replace(/(?<!\\)~/g, "\\~");
|
|
3347
|
+
if (inTableCell) {
|
|
3348
|
+
result = result.replace(/(?<!\\)\|/g, "\\|");
|
|
3349
|
+
}
|
|
3350
|
+
return result;
|
|
3351
|
+
}
|
|
3347
3352
|
function blocksToMarkdown(blocks) {
|
|
3348
3353
|
const lines = [];
|
|
3349
3354
|
for (let i = 0; i < blocks.length; i++) {
|
|
3350
3355
|
const block = blocks[i];
|
|
3351
3356
|
if (block.type === "heading" && block.text) {
|
|
3352
3357
|
const prefix = "#".repeat(Math.min(block.level || 2, 6));
|
|
3353
|
-
const headingText = sanitizeText(block.text);
|
|
3358
|
+
const headingText = escapeGfm(sanitizeText(block.text), false);
|
|
3354
3359
|
if (headingText) lines.push("", `${prefix} ${headingText}`, "");
|
|
3355
3360
|
continue;
|
|
3356
3361
|
}
|
|
@@ -3363,42 +3368,47 @@ function blocksToMarkdown(blocks) {
|
|
|
3363
3368
|
continue;
|
|
3364
3369
|
}
|
|
3365
3370
|
if (block.type === "list" && block.text) {
|
|
3366
|
-
const
|
|
3367
|
-
if (!
|
|
3368
|
-
const alreadyNumbered = block.listType === "ordered" && /^\d+\.\s/.test(
|
|
3371
|
+
const sanitized = sanitizeText(block.text);
|
|
3372
|
+
if (!sanitized) continue;
|
|
3373
|
+
const alreadyNumbered = block.listType === "ordered" && /^\d+\.\s/.test(sanitized);
|
|
3369
3374
|
const prefix = alreadyNumbered ? "" : block.listType === "ordered" ? "1. " : "- ";
|
|
3375
|
+
const listText = escapeGfm(sanitized, false);
|
|
3370
3376
|
lines.push(`${prefix}${listText}`);
|
|
3371
3377
|
if (block.children) {
|
|
3372
3378
|
for (const child of block.children) {
|
|
3373
3379
|
const childPrefix = child.listType === "ordered" ? "1." : "-";
|
|
3374
|
-
|
|
3380
|
+
const childText = child.text ? escapeGfm(sanitizeText(child.text), false) : "";
|
|
3381
|
+
lines.push(` ${childPrefix} ${childText}`);
|
|
3375
3382
|
}
|
|
3376
3383
|
}
|
|
3377
3384
|
continue;
|
|
3378
3385
|
}
|
|
3379
3386
|
if (block.type === "paragraph" && block.text) {
|
|
3380
|
-
|
|
3381
|
-
if (!
|
|
3382
|
-
if (/^\[별표\s*\d+/.test(
|
|
3387
|
+
const sanitized = sanitizeText(block.text);
|
|
3388
|
+
if (!sanitized) continue;
|
|
3389
|
+
if (/^\[별표\s*\d+/.test(sanitized)) {
|
|
3383
3390
|
const nextBlock = blocks[i + 1];
|
|
3391
|
+
const escapedSelf = escapeGfm(sanitized, false);
|
|
3384
3392
|
if (nextBlock?.type === "paragraph" && nextBlock.text && /관련\)?$/.test(nextBlock.text)) {
|
|
3385
|
-
|
|
3393
|
+
const nextEscaped = escapeGfm(sanitizeText(nextBlock.text), false);
|
|
3394
|
+
lines.push("", `## ${escapedSelf} ${nextEscaped}`, "");
|
|
3386
3395
|
i++;
|
|
3387
3396
|
} else {
|
|
3388
|
-
lines.push("", `## ${
|
|
3397
|
+
lines.push("", `## ${escapedSelf}`, "");
|
|
3389
3398
|
}
|
|
3390
3399
|
continue;
|
|
3391
3400
|
}
|
|
3392
|
-
if (/^\([^)]*조[^)]*관련\)$/.test(
|
|
3393
|
-
lines.push(`*${
|
|
3401
|
+
if (/^\([^)]*조[^)]*관련\)$/.test(sanitized)) {
|
|
3402
|
+
lines.push(`*${escapeGfm(sanitized, false)}*`, "");
|
|
3394
3403
|
continue;
|
|
3395
3404
|
}
|
|
3405
|
+
let text = escapeGfm(sanitized, false);
|
|
3396
3406
|
if (block.href) {
|
|
3397
3407
|
const href = sanitizeHref(block.href);
|
|
3398
3408
|
if (href) text = `[${text}](${href})`;
|
|
3399
3409
|
}
|
|
3400
3410
|
if (block.footnoteText) {
|
|
3401
|
-
text += ` (\uC8FC: ${block.footnoteText})`;
|
|
3411
|
+
text += ` (\uC8FC: ${escapeGfm(block.footnoteText, false)})`;
|
|
3402
3412
|
}
|
|
3403
3413
|
lines.push(text);
|
|
3404
3414
|
} else if (block.type === "table" && block.table) {
|
|
@@ -3423,13 +3433,13 @@ function tableToMarkdown(table) {
|
|
|
3423
3433
|
return content.split(/\n/).map((line) => {
|
|
3424
3434
|
const trimmed = line.trim();
|
|
3425
3435
|
if (!trimmed) return "";
|
|
3426
|
-
if (/^\d+\.\s/.test(trimmed)) return `**${trimmed}**`;
|
|
3427
|
-
if (/^[가-힣]\.\s/.test(trimmed)) return ` ${trimmed}`;
|
|
3428
|
-
return trimmed;
|
|
3436
|
+
if (/^\d+\.\s/.test(trimmed)) return `**${escapeGfm(trimmed, false)}**`;
|
|
3437
|
+
if (/^[가-힣]\.\s/.test(trimmed)) return ` ${escapeGfm(trimmed, false)}`;
|
|
3438
|
+
return escapeGfm(trimmed, false);
|
|
3429
3439
|
}).filter(Boolean).join("\n");
|
|
3430
3440
|
}
|
|
3431
3441
|
if (numCols === 1 && numRows >= 2) {
|
|
3432
|
-
return cells.map((row) => sanitizeText(row[0].text).replace(/\n/g, " ")).filter(Boolean).join("\n");
|
|
3442
|
+
return cells.map((row) => escapeGfm(sanitizeText(row[0].text).replace(/\n/g, " "), false)).filter(Boolean).join("\n");
|
|
3433
3443
|
}
|
|
3434
3444
|
const display = Array.from({ length: numRows }, () => Array(numCols).fill(""));
|
|
3435
3445
|
const skip = /* @__PURE__ */ new Set();
|
|
@@ -3438,7 +3448,7 @@ function tableToMarkdown(table) {
|
|
|
3438
3448
|
if (skip.has(`${r},${c}`)) continue;
|
|
3439
3449
|
const cell = cells[r]?.[c];
|
|
3440
3450
|
if (!cell) continue;
|
|
3441
|
-
display[r][c] = sanitizeText(cell.text).replace(/\n/g, "<br>");
|
|
3451
|
+
display[r][c] = escapeGfm(sanitizeText(cell.text).replace(/\n/g, "<br>"), true);
|
|
3442
3452
|
for (let dr = 0; dr < cell.rowSpan; dr++) {
|
|
3443
3453
|
for (let dc = 0; dc < cell.colSpan; dc++) {
|
|
3444
3454
|
if (dr === 0 && dc === 0) continue;
|
|
@@ -3485,6 +3495,223 @@ var HEADING_RATIO_H1 = 1.5;
|
|
|
3485
3495
|
var HEADING_RATIO_H2 = 1.3;
|
|
3486
3496
|
var HEADING_RATIO_H3 = 1.15;
|
|
3487
3497
|
|
|
3498
|
+
// src/hwp5/equation.ts
|
|
3499
|
+
var WORD_COMMANDS = /* @__PURE__ */ new Map([
|
|
3500
|
+
["alpha", "\\alpha"],
|
|
3501
|
+
["beta", "\\beta"],
|
|
3502
|
+
["gamma", "\\gamma"],
|
|
3503
|
+
["delta", "\\delta"],
|
|
3504
|
+
["epsilon", "\\epsilon"],
|
|
3505
|
+
["theta", "\\theta"],
|
|
3506
|
+
["lambda", "\\lambda"],
|
|
3507
|
+
["mu", "\\mu"],
|
|
3508
|
+
["pi", "\\pi"],
|
|
3509
|
+
["sigma", "\\sigma"],
|
|
3510
|
+
["tau", "\\tau"],
|
|
3511
|
+
["phi", "\\phi"],
|
|
3512
|
+
["omega", "\\omega"],
|
|
3513
|
+
["sin", "\\sin"],
|
|
3514
|
+
["cos", "\\cos"],
|
|
3515
|
+
["tan", "\\tan"],
|
|
3516
|
+
["sec", "\\sec"],
|
|
3517
|
+
["csc", "\\csc"],
|
|
3518
|
+
["cot", "\\cot"],
|
|
3519
|
+
["log", "\\log"],
|
|
3520
|
+
["ln", "\\ln"],
|
|
3521
|
+
["lim", "\\lim"],
|
|
3522
|
+
["inf", "\\infty"],
|
|
3523
|
+
["sum", "\\sum"],
|
|
3524
|
+
["smallsum", "\\sum"],
|
|
3525
|
+
["prod", "\\prod"],
|
|
3526
|
+
["int", "\\int"],
|
|
3527
|
+
["oint", "\\oint"],
|
|
3528
|
+
["rightarrow", "\\rightarrow"],
|
|
3529
|
+
["leftarrow", "\\leftarrow"],
|
|
3530
|
+
["partial", "\\partial"],
|
|
3531
|
+
["nabla", "\\nabla"],
|
|
3532
|
+
["angle", "\\angle"],
|
|
3533
|
+
["triangle", "\\triangle"],
|
|
3534
|
+
["vec", "\\vec"],
|
|
3535
|
+
["bar", "\\overline"],
|
|
3536
|
+
["dot", "\\dot"],
|
|
3537
|
+
["hat", "\\hat"],
|
|
3538
|
+
["left", "\\left"],
|
|
3539
|
+
["right", "\\right"]
|
|
3540
|
+
]);
|
|
3541
|
+
var SYMBOL_WORDS = /* @__PURE__ */ new Map([
|
|
3542
|
+
["times", "\\times"],
|
|
3543
|
+
["divide", "\\div"],
|
|
3544
|
+
["div", "\\div"],
|
|
3545
|
+
["le", "\\leq"],
|
|
3546
|
+
["ge", "\\geq"],
|
|
3547
|
+
["geq", "\\geq"],
|
|
3548
|
+
["deg", "^\\circ"],
|
|
3549
|
+
["rarrow", "\\rightarrow"],
|
|
3550
|
+
["larrow", "\\leftarrow"],
|
|
3551
|
+
["lrarrow", "\\leftrightarrow"],
|
|
3552
|
+
["in", "\\in"],
|
|
3553
|
+
["notin", "\\notin"],
|
|
3554
|
+
["emptyset", "\\emptyset"],
|
|
3555
|
+
["subset", "\\subset"],
|
|
3556
|
+
["nsubset", "\\nsubseteq"],
|
|
3557
|
+
["cup", "\\cup"],
|
|
3558
|
+
["cap", "\\cap"],
|
|
3559
|
+
["smallinter", "\\cap"],
|
|
3560
|
+
["sim", "\\sim"],
|
|
3561
|
+
["circ", "\\circ"],
|
|
3562
|
+
["bot", "\\perp"],
|
|
3563
|
+
["dyad", "\\overleftrightarrow"],
|
|
3564
|
+
["arch", "\\overset{\\frown}"]
|
|
3565
|
+
]);
|
|
3566
|
+
function hwpEquationToLatex(equation) {
|
|
3567
|
+
return convertEquation(equation.replace(/\0/g, "").trim(), 0);
|
|
3568
|
+
}
|
|
3569
|
+
function convertEquation(equation, depth) {
|
|
3570
|
+
if (!equation || depth > 12) return equation;
|
|
3571
|
+
let result = equation.replace(/\s+/g, " ").replace(/`+/g, "\\,").replace(/~+/g, "\\,").trim();
|
|
3572
|
+
result = convertMatrixLike(result);
|
|
3573
|
+
result = convertRoots(result, depth);
|
|
3574
|
+
result = convertOver(result, depth);
|
|
3575
|
+
result = convertSqrt(result, depth);
|
|
3576
|
+
result = convertScripts(result);
|
|
3577
|
+
result = convertOperators(result);
|
|
3578
|
+
result = removeFontDirectives(result);
|
|
3579
|
+
result = convertWords(result);
|
|
3580
|
+
result = cleanupLatexSpacing(result);
|
|
3581
|
+
return result;
|
|
3582
|
+
}
|
|
3583
|
+
function convertMatrixLike(input) {
|
|
3584
|
+
return input.replace(
|
|
3585
|
+
/\bmatrix\s*\{([^{}]*)\}/gi,
|
|
3586
|
+
(_match, body) => `\\begin{matrix} ${body.split("#").map((part) => part.trim()).join(" & ")} \\end{matrix}`
|
|
3587
|
+
).replace(
|
|
3588
|
+
/\bcases\s*\{([^{}]*)\}/gi,
|
|
3589
|
+
(_match, body) => `\\begin{cases} ${body.split("#").map((part) => part.trim()).join(" \\\\ ")} \\end{cases}`
|
|
3590
|
+
);
|
|
3591
|
+
}
|
|
3592
|
+
function convertRoots(input, depth) {
|
|
3593
|
+
return input.replace(/(?<!\\)\broot\s+({[^{}]*}|\S+)\s+of\s+({[^{}]*}|\S+)/gi, (_match, degree, radicand) => {
|
|
3594
|
+
return `\\sqrt[${convertEquation(unwrapGroup(degree), depth + 1)}]{${convertEquation(unwrapGroup(radicand), depth + 1)}}`;
|
|
3595
|
+
});
|
|
3596
|
+
}
|
|
3597
|
+
function convertSqrt(input, depth) {
|
|
3598
|
+
return input.replace(/(?<!\\)\bsqrt\s*({[^{}]*}|\S+)/gi, (_match, radicand) => {
|
|
3599
|
+
return `\\sqrt{${convertEquation(unwrapGroup(radicand), depth + 1)}}`;
|
|
3600
|
+
});
|
|
3601
|
+
}
|
|
3602
|
+
function convertOver(input, depth) {
|
|
3603
|
+
let result = input;
|
|
3604
|
+
for (let guard = 0; guard < 50; guard++) {
|
|
3605
|
+
const over = findTopLevelWord(result, "over");
|
|
3606
|
+
if (over < 0) break;
|
|
3607
|
+
const left = readLeftAtom(result, over);
|
|
3608
|
+
const right = readRightAtom(result, over + "over".length);
|
|
3609
|
+
if (!left || !right) break;
|
|
3610
|
+
const numerator = convertEquation(unwrapGroup(left.atom), depth + 1);
|
|
3611
|
+
const denominator = convertEquation(unwrapGroup(right.atom), depth + 1);
|
|
3612
|
+
result = result.slice(0, left.start) + `\\frac{${numerator}}{${denominator}}` + result.slice(right.end);
|
|
3613
|
+
}
|
|
3614
|
+
return result;
|
|
3615
|
+
}
|
|
3616
|
+
function convertScripts(input) {
|
|
3617
|
+
return input.replace(/\s*\^\s*/g, "^").replace(/\s*_\s*/g, "_").replace(/\^(?!\{)([^\s{}_^]+)/g, "^{$1}").replace(/_(?!\{)([^\s{}_^]+)/g, "_{$1}");
|
|
3618
|
+
}
|
|
3619
|
+
function convertOperators(input) {
|
|
3620
|
+
return input.replace(/\+-/g, "\\pm").replace(/-\+/g, "\\mp").replace(/\/\//g, "\\parallel").replace(/△/g, "\\triangle ").replace(/□/g, "\\square ").replace(/‧/g, "\\cdot ").replace(/!=/g, "\\neq").replace(/<=/g, "\\leq").replace(/>=/g, "\\geq").replace(/==/g, "\\equiv");
|
|
3621
|
+
}
|
|
3622
|
+
function removeFontDirectives(input) {
|
|
3623
|
+
return input.replace(/(?<!\\)\b(?:rm|it)\b\s*/gi, "");
|
|
3624
|
+
}
|
|
3625
|
+
function convertWords(input) {
|
|
3626
|
+
return input.replace(/(?<![\\A-Za-z0-9])([A-Za-z][A-Za-z0-9]*)(?![A-Za-z0-9])/g, (word) => {
|
|
3627
|
+
const exact = SYMBOL_WORDS.get(word);
|
|
3628
|
+
if (exact) return exact;
|
|
3629
|
+
const lower = word.toLowerCase();
|
|
3630
|
+
return SYMBOL_WORDS.get(lower) ?? WORD_COMMANDS.get(lower) ?? word;
|
|
3631
|
+
});
|
|
3632
|
+
}
|
|
3633
|
+
function cleanupLatexSpacing(input) {
|
|
3634
|
+
return input.replace(/\\left\s*\{/g, "\\left\\{").replace(/\\right\s*\}/g, "\\right\\}").replace(/\\left\s*([\[\]\(\)\|])/g, "\\left$1").replace(/\\right\s*([\[\]\(\)\|])/g, "\\right$1").replace(/\s*\\,\s*/g, "\\,").replace(/\s+/g, " ").replace(/\{\s+/g, "{").replace(/\s+\}/g, "}").trim();
|
|
3635
|
+
}
|
|
3636
|
+
function findTopLevelWord(input, word) {
|
|
3637
|
+
let curly = 0;
|
|
3638
|
+
let paren = 0;
|
|
3639
|
+
for (let i = 0; i <= input.length - word.length; i++) {
|
|
3640
|
+
const ch = input[i];
|
|
3641
|
+
if (ch === "{") curly++;
|
|
3642
|
+
else if (ch === "}") curly = Math.max(0, curly - 1);
|
|
3643
|
+
else if (ch === "(") paren++;
|
|
3644
|
+
else if (ch === ")") paren = Math.max(0, paren - 1);
|
|
3645
|
+
if (curly !== 0 || paren !== 0) continue;
|
|
3646
|
+
if (input.slice(i, i + word.length).toLowerCase() !== word) continue;
|
|
3647
|
+
if (isWordChar(input[i - 1]) || isWordChar(input[i + word.length])) continue;
|
|
3648
|
+
return i;
|
|
3649
|
+
}
|
|
3650
|
+
return -1;
|
|
3651
|
+
}
|
|
3652
|
+
function readLeftAtom(input, end) {
|
|
3653
|
+
let pos = end - 1;
|
|
3654
|
+
while (pos >= 0 && /\s/.test(input[pos])) pos--;
|
|
3655
|
+
if (pos < 0) return null;
|
|
3656
|
+
if (input[pos] === "}") {
|
|
3657
|
+
const start2 = findMatchingLeft(input, pos, "{", "}");
|
|
3658
|
+
if (start2 >= 0) return { start: start2, atom: input.slice(start2, pos + 1) };
|
|
3659
|
+
}
|
|
3660
|
+
if (input[pos] === ")") {
|
|
3661
|
+
const start2 = findMatchingLeft(input, pos, "(", ")");
|
|
3662
|
+
if (start2 >= 0) return { start: start2, atom: input.slice(start2, pos + 1) };
|
|
3663
|
+
}
|
|
3664
|
+
let start = pos;
|
|
3665
|
+
while (start >= 0 && !/\s/.test(input[start]) && !/[+\-=<>]/.test(input[start])) start--;
|
|
3666
|
+
return { start: start + 1, atom: input.slice(start + 1, pos + 1) };
|
|
3667
|
+
}
|
|
3668
|
+
function readRightAtom(input, start) {
|
|
3669
|
+
let pos = start;
|
|
3670
|
+
while (pos < input.length && /\s/.test(input[pos])) pos++;
|
|
3671
|
+
if (pos >= input.length) return null;
|
|
3672
|
+
if (input[pos] === "{") {
|
|
3673
|
+
const end2 = findMatchingRight(input, pos, "{", "}");
|
|
3674
|
+
if (end2 >= 0) return { end: end2 + 1, atom: input.slice(pos, end2 + 1) };
|
|
3675
|
+
}
|
|
3676
|
+
if (input[pos] === "(") {
|
|
3677
|
+
const end2 = findMatchingRight(input, pos, "(", ")");
|
|
3678
|
+
if (end2 >= 0) return { end: end2 + 1, atom: input.slice(pos, end2 + 1) };
|
|
3679
|
+
}
|
|
3680
|
+
let end = pos;
|
|
3681
|
+
while (end < input.length && !/\s/.test(input[end]) && !/[+\-=<>]/.test(input[end])) end++;
|
|
3682
|
+
return { end, atom: input.slice(pos, end) };
|
|
3683
|
+
}
|
|
3684
|
+
function findMatchingLeft(input, closeIndex, open, close) {
|
|
3685
|
+
let depth = 0;
|
|
3686
|
+
for (let i = closeIndex; i >= 0; i--) {
|
|
3687
|
+
if (input[i] === close) depth++;
|
|
3688
|
+
else if (input[i] === open) {
|
|
3689
|
+
depth--;
|
|
3690
|
+
if (depth === 0) return i;
|
|
3691
|
+
}
|
|
3692
|
+
}
|
|
3693
|
+
return -1;
|
|
3694
|
+
}
|
|
3695
|
+
function findMatchingRight(input, openIndex, open, close) {
|
|
3696
|
+
let depth = 0;
|
|
3697
|
+
for (let i = openIndex; i < input.length; i++) {
|
|
3698
|
+
if (input[i] === open) depth++;
|
|
3699
|
+
else if (input[i] === close) {
|
|
3700
|
+
depth--;
|
|
3701
|
+
if (depth === 0) return i;
|
|
3702
|
+
}
|
|
3703
|
+
}
|
|
3704
|
+
return -1;
|
|
3705
|
+
}
|
|
3706
|
+
function unwrapGroup(input) {
|
|
3707
|
+
const trimmed = input.trim();
|
|
3708
|
+
if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed.slice(1, -1);
|
|
3709
|
+
return trimmed;
|
|
3710
|
+
}
|
|
3711
|
+
function isWordChar(ch) {
|
|
3712
|
+
return !!ch && /[A-Za-z0-9_]/.test(ch);
|
|
3713
|
+
}
|
|
3714
|
+
|
|
3488
3715
|
// src/hwpx/parser.ts
|
|
3489
3716
|
init_page_range();
|
|
3490
3717
|
init_logger();
|
|
@@ -4166,6 +4393,17 @@ function findDescendant(node, targetTag, depth = 0) {
|
|
|
4166
4393
|
}
|
|
4167
4394
|
return null;
|
|
4168
4395
|
}
|
|
4396
|
+
function findChildByLocalName(node, targetTag) {
|
|
4397
|
+
const children = node.childNodes;
|
|
4398
|
+
if (!children) return null;
|
|
4399
|
+
for (let i = 0; i < children.length; i++) {
|
|
4400
|
+
const child = children[i];
|
|
4401
|
+
if (child.nodeType !== 1) continue;
|
|
4402
|
+
const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
|
|
4403
|
+
if (tag === targetTag) return child;
|
|
4404
|
+
}
|
|
4405
|
+
return null;
|
|
4406
|
+
}
|
|
4169
4407
|
function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
|
|
4170
4408
|
const children = drawTextNode.childNodes;
|
|
4171
4409
|
if (!children) return;
|
|
@@ -4268,6 +4506,22 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
4268
4506
|
case "shapeComment":
|
|
4269
4507
|
case "drawText":
|
|
4270
4508
|
break;
|
|
4509
|
+
// 수식: <hp:equation> 내부의 <hp:script>에 HML/HULK-style 수식 본문이
|
|
4510
|
+
// 들어있음. hwpEquationToLatex로 LaTeX 변환 후 `$...$`로 래핑하여
|
|
4511
|
+
// 본문 텍스트에 인라인 삽입. 변환 실패/빈 결과는 조용히 드롭
|
|
4512
|
+
// (대체 텍스트 "수식입니다." 누출 방지는 기존 정규식이 처리).
|
|
4513
|
+
case "equation": {
|
|
4514
|
+
const script = findChildByLocalName(child, "script");
|
|
4515
|
+
const raw = script ? extractTextFromNode(script) : "";
|
|
4516
|
+
if (raw.trim()) {
|
|
4517
|
+
try {
|
|
4518
|
+
const latex = hwpEquationToLatex(raw).trim();
|
|
4519
|
+
if (latex) text += " $" + latex.replace(/\$/g, "\\$") + "$ ";
|
|
4520
|
+
} catch {
|
|
4521
|
+
}
|
|
4522
|
+
}
|
|
4523
|
+
break;
|
|
4524
|
+
}
|
|
4271
4525
|
// run 요소에서 charPrIDRef 추출
|
|
4272
4526
|
case "r": {
|
|
4273
4527
|
const runCharPr = child.getAttribute("charPrIDRef");
|
|
@@ -4334,8 +4588,13 @@ var TAG_CHAR_SHAPE = 68;
|
|
|
4334
4588
|
var TAG_CTRL_HEADER = 71;
|
|
4335
4589
|
var TAG_LIST_HEADER = 72;
|
|
4336
4590
|
var TAG_TABLE = 77;
|
|
4337
|
-
var
|
|
4338
|
-
var
|
|
4591
|
+
var TAG_EQEDIT = 88;
|
|
4592
|
+
var HWPTAG_BEGIN = 16;
|
|
4593
|
+
var TAG_ID_MAPPINGS = HWPTAG_BEGIN + 1;
|
|
4594
|
+
var TAG_FACE_NAME = HWPTAG_BEGIN + 3;
|
|
4595
|
+
var TAG_DOC_CHAR_SHAPE = HWPTAG_BEGIN + 5;
|
|
4596
|
+
var TAG_DOC_PARA_SHAPE = HWPTAG_BEGIN + 9;
|
|
4597
|
+
var TAG_DOC_STYLE = HWPTAG_BEGIN + 10;
|
|
4339
4598
|
var CHAR_LINE = 0;
|
|
4340
4599
|
var CHAR_SECTION_BREAK = 10;
|
|
4341
4600
|
var CHAR_PARA = 13;
|
|
@@ -4493,6 +4752,15 @@ function extractText(data) {
|
|
|
4493
4752
|
}
|
|
4494
4753
|
return result;
|
|
4495
4754
|
}
|
|
4755
|
+
function extractEquationText(data) {
|
|
4756
|
+
if (data.length < 6) return null;
|
|
4757
|
+
const scriptLength = data.readUInt16LE(4);
|
|
4758
|
+
const scriptStart = 6;
|
|
4759
|
+
const scriptEnd = scriptStart + scriptLength * 2;
|
|
4760
|
+
if (scriptLength <= 0 || scriptEnd > data.length) return null;
|
|
4761
|
+
const equation = data.subarray(scriptStart, scriptEnd).toString("utf16le").replace(/\0+/g, "").trim();
|
|
4762
|
+
return equation || null;
|
|
4763
|
+
}
|
|
4496
4764
|
|
|
4497
4765
|
// src/hwp5/aes.ts
|
|
4498
4766
|
var S_BOX = new Uint8Array([
|
|
@@ -5652,6 +5920,26 @@ function findViewTextSectionsLenient(lcfb, compressed) {
|
|
|
5652
5920
|
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
5653
5921
|
}
|
|
5654
5922
|
var TAG_SHAPE_COMPONENT = 74;
|
|
5923
|
+
var CTRL_ID_EQEDIT = "deqe";
|
|
5924
|
+
function isEquationControlId(ctrlId) {
|
|
5925
|
+
return ctrlId === CTRL_ID_EQEDIT || ctrlId === "eqed";
|
|
5926
|
+
}
|
|
5927
|
+
function formatEquationForMarkdown(equation) {
|
|
5928
|
+
const normalized = hwpEquationToLatex(equation);
|
|
5929
|
+
if (!normalized) return "";
|
|
5930
|
+
return `$${normalized.replace(/\$/g, "\\$")}$`;
|
|
5931
|
+
}
|
|
5932
|
+
function extractEquationFromControl(records, ctrlIdx) {
|
|
5933
|
+
const ctrlLevel = records[ctrlIdx].level;
|
|
5934
|
+
for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 10; j++) {
|
|
5935
|
+
const r = records[j];
|
|
5936
|
+
if (r.level <= ctrlLevel) break;
|
|
5937
|
+
if (r.tagId !== TAG_EQEDIT) continue;
|
|
5938
|
+
const equation = extractEquationText(r.data);
|
|
5939
|
+
return equation ? formatEquationForMarkdown(equation) : null;
|
|
5940
|
+
}
|
|
5941
|
+
return null;
|
|
5942
|
+
}
|
|
5655
5943
|
function extractBinDataId(records, ctrlIdx) {
|
|
5656
5944
|
const ctrlLevel = records[ctrlIdx].level;
|
|
5657
5945
|
for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 50; j++) {
|
|
@@ -5811,6 +6099,16 @@ function parseSection(records, docInfo, warnings, sectionNum) {
|
|
|
5811
6099
|
}
|
|
5812
6100
|
} else if (ctrlId === " elo" || ctrlId === "ole ") {
|
|
5813
6101
|
warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
|
|
6102
|
+
} else if (isEquationControlId(ctrlId)) {
|
|
6103
|
+
const equation = extractEquationFromControl(records, i);
|
|
6104
|
+
if (equation) {
|
|
6105
|
+
const lastBlock = blocks[blocks.length - 1];
|
|
6106
|
+
if (lastBlock && lastBlock.type === "paragraph" && lastBlock.text) {
|
|
6107
|
+
lastBlock.text = lastBlock.text + " " + equation;
|
|
6108
|
+
} else {
|
|
6109
|
+
blocks.push({ type: "paragraph", text: equation, pageNumber: sectionNum });
|
|
6110
|
+
}
|
|
6111
|
+
}
|
|
5814
6112
|
} else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
|
|
5815
6113
|
const noteText = extractNoteText(records, i);
|
|
5816
6114
|
if (noteText && blocks.length > 0) {
|
|
@@ -5843,6 +6141,13 @@ function extractNoteText(records, ctrlIdx) {
|
|
|
5843
6141
|
const t = extractText(r.data).trim();
|
|
5844
6142
|
if (t) texts.push(t);
|
|
5845
6143
|
}
|
|
6144
|
+
if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
|
|
6145
|
+
const innerCtrlId = r.data.subarray(0, 4).toString("ascii");
|
|
6146
|
+
if (isEquationControlId(innerCtrlId)) {
|
|
6147
|
+
const equation = extractEquationFromControl(records, j);
|
|
6148
|
+
if (equation) texts.push(equation);
|
|
6149
|
+
}
|
|
6150
|
+
}
|
|
5846
6151
|
}
|
|
5847
6152
|
return texts.length > 0 ? texts.join(" ") : null;
|
|
5848
6153
|
}
|
|
@@ -5856,6 +6161,13 @@ function extractTextBoxText(records, ctrlIdx) {
|
|
|
5856
6161
|
const t = extractText(r.data).trim();
|
|
5857
6162
|
if (t) texts.push(t);
|
|
5858
6163
|
}
|
|
6164
|
+
if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
|
|
6165
|
+
const innerCtrlId = r.data.subarray(0, 4).toString("ascii");
|
|
6166
|
+
if (isEquationControlId(innerCtrlId)) {
|
|
6167
|
+
const equation = extractEquationFromControl(records, j);
|
|
6168
|
+
if (equation) texts.push(equation);
|
|
6169
|
+
}
|
|
6170
|
+
}
|
|
5859
6171
|
}
|
|
5860
6172
|
return texts.length > 0 ? texts.join("\n") : null;
|
|
5861
6173
|
}
|
|
@@ -5924,6 +6236,12 @@ function parseParagraphWithTables(records, startIdx) {
|
|
|
5924
6236
|
i = nextIdx;
|
|
5925
6237
|
continue;
|
|
5926
6238
|
}
|
|
6239
|
+
if (isEquationControlId(ctrlId)) {
|
|
6240
|
+
const equation = extractEquationFromControl(records, i);
|
|
6241
|
+
if (equation) {
|
|
6242
|
+
text = text ? text + " " + equation : equation;
|
|
6243
|
+
}
|
|
6244
|
+
}
|
|
5927
6245
|
}
|
|
5928
6246
|
i++;
|
|
5929
6247
|
}
|
|
@@ -11233,526 +11551,6 @@ async function markdownToXlsx(markdown, options) {
|
|
|
11233
11551
|
return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
|
|
11234
11552
|
}
|
|
11235
11553
|
|
|
11236
|
-
// src/convert/index.ts
|
|
11237
|
-
var import_promises3 = require("fs/promises");
|
|
11238
|
-
|
|
11239
|
-
// src/convert/libreoffice.ts
|
|
11240
|
-
var import_libreoffice_convert = __toESM(require("libreoffice-convert"), 1);
|
|
11241
|
-
|
|
11242
|
-
// src/convert/error.ts
|
|
11243
|
-
var ConvertError = class extends Error {
|
|
11244
|
-
constructor(code, message) {
|
|
11245
|
-
super(message);
|
|
11246
|
-
this.code = code;
|
|
11247
|
-
this.name = "ConvertError";
|
|
11248
|
-
}
|
|
11249
|
-
};
|
|
11250
|
-
|
|
11251
|
-
// src/convert/installer.ts
|
|
11252
|
-
var import_os3 = require("os");
|
|
11253
|
-
var import_path5 = require("path");
|
|
11254
|
-
var import_promises2 = require("fs/promises");
|
|
11255
|
-
var import_fs4 = require("fs");
|
|
11256
|
-
var import_child_process4 = require("child_process");
|
|
11257
|
-
var installInFlight = null;
|
|
11258
|
-
var CACHE_DIR = (0, import_path5.join)((0, import_os3.homedir)(), ".cache", "kordoc", "libreoffice");
|
|
11259
|
-
var VERSION_FILE = (0, import_path5.join)(CACHE_DIR, "version");
|
|
11260
|
-
var PACKAGES = {
|
|
11261
|
-
darwin: {
|
|
11262
|
-
url: "https://ftp.osuosl.org/pub/tdf/libreoffice/stable/26.2.3/mac/x86_64/LibreOffice_26.2.3_MacOS_x86-64.dmg",
|
|
11263
|
-
binPath: "LibreOffice.app/Contents/MacOS/soffice",
|
|
11264
|
-
sizeMb: 300
|
|
11265
|
-
},
|
|
11266
|
-
linux: {
|
|
11267
|
-
url: "https://ftp.osuosl.org/pub/tdf/libreoffice/stable/26.2.3/deb/x86_64/LibreOffice_26.2.3_Linux_x86-64_deb.tar.gz",
|
|
11268
|
-
binPath: "opt/libreoffice26.2/program/soffice",
|
|
11269
|
-
sizeMb: 210
|
|
11270
|
-
},
|
|
11271
|
-
win32: {
|
|
11272
|
-
url: "https://ftp.osuosl.org/pub/tdf/libreoffice/stable/26.2.3/win/x86_64/LibreOffice_26.2.3_Win_x86-64.msi",
|
|
11273
|
-
binPath: "LibreOffice/program/soffice.exe",
|
|
11274
|
-
sizeMb: 360
|
|
11275
|
-
}
|
|
11276
|
-
};
|
|
11277
|
-
async function findInPath() {
|
|
11278
|
-
return new Promise((resolve4) => {
|
|
11279
|
-
const child = (0, import_child_process4.spawn)("soffice", ["--version"], { stdio: "ignore" });
|
|
11280
|
-
child.on("close", (code) => resolve4(code === 0 ? "soffice" : null));
|
|
11281
|
-
child.on("error", () => resolve4(null));
|
|
11282
|
-
});
|
|
11283
|
-
}
|
|
11284
|
-
async function findInCache() {
|
|
11285
|
-
const cachedBin = (0, import_path5.join)(CACHE_DIR, "bin", "soffice");
|
|
11286
|
-
try {
|
|
11287
|
-
await (0, import_promises2.access)(cachedBin);
|
|
11288
|
-
return cachedBin;
|
|
11289
|
-
} catch {
|
|
11290
|
-
return null;
|
|
11291
|
-
}
|
|
11292
|
-
}
|
|
11293
|
-
async function findInDefaultPaths() {
|
|
11294
|
-
const platform = process.platform;
|
|
11295
|
-
const paths = [];
|
|
11296
|
-
if (platform === "darwin") {
|
|
11297
|
-
paths.push(
|
|
11298
|
-
"/Applications/LibreOffice.app/Contents/MacOS/soffice",
|
|
11299
|
-
"/opt/homebrew/bin/soffice",
|
|
11300
|
-
"/usr/local/bin/soffice"
|
|
11301
|
-
);
|
|
11302
|
-
} else if (platform === "linux") {
|
|
11303
|
-
paths.push(
|
|
11304
|
-
"/usr/bin/soffice",
|
|
11305
|
-
"/usr/lib/libreoffice/program/soffice"
|
|
11306
|
-
);
|
|
11307
|
-
} else if (platform === "win32") {
|
|
11308
|
-
const pf = process.env["ProgramFiles"] ?? "C:\\Program Files";
|
|
11309
|
-
const pf86 = process.env["ProgramFiles(x86)"] ?? "C:\\Program Files (x86)";
|
|
11310
|
-
paths.push(
|
|
11311
|
-
(0, import_path5.join)(pf, "LibreOffice", "program", "soffice.exe"),
|
|
11312
|
-
(0, import_path5.join)(pf86, "LibreOffice", "program", "soffice.exe")
|
|
11313
|
-
);
|
|
11314
|
-
}
|
|
11315
|
-
for (const p of paths) {
|
|
11316
|
-
try {
|
|
11317
|
-
await (0, import_promises2.access)(p);
|
|
11318
|
-
return p;
|
|
11319
|
-
} catch {
|
|
11320
|
-
continue;
|
|
11321
|
-
}
|
|
11322
|
-
}
|
|
11323
|
-
return null;
|
|
11324
|
-
}
|
|
11325
|
-
async function downloadWithProgress(url, dest, totalBytes, onProgress) {
|
|
11326
|
-
const response = await fetch(url);
|
|
11327
|
-
if (!response.ok) throw new Error(`\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: HTTP ${response.status} (${url})`);
|
|
11328
|
-
if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
|
|
11329
|
-
const file = (0, import_fs4.createWriteStream)(dest);
|
|
11330
|
-
const reader = response.body.getReader();
|
|
11331
|
-
let downloaded = 0;
|
|
11332
|
-
try {
|
|
11333
|
-
while (true) {
|
|
11334
|
-
const { done, value } = await reader.read();
|
|
11335
|
-
if (done) break;
|
|
11336
|
-
if (!file.write(value)) {
|
|
11337
|
-
await new Promise((resolve4) => file.once("drain", resolve4));
|
|
11338
|
-
}
|
|
11339
|
-
downloaded += value.length;
|
|
11340
|
-
onProgress?.(downloaded, totalBytes);
|
|
11341
|
-
}
|
|
11342
|
-
} finally {
|
|
11343
|
-
reader.releaseLock();
|
|
11344
|
-
await new Promise((resolve4, reject) => {
|
|
11345
|
-
file.end((err) => err ? reject(err) : resolve4());
|
|
11346
|
-
});
|
|
11347
|
-
}
|
|
11348
|
-
}
|
|
11349
|
-
async function installForPlatform(pkg, onProgress) {
|
|
11350
|
-
const platform = process.platform;
|
|
11351
|
-
await (0, import_promises2.mkdir)(CACHE_DIR, { recursive: true });
|
|
11352
|
-
const downloadPath = (0, import_path5.join)(CACHE_DIR, `download-${Date.now()}`);
|
|
11353
|
-
await downloadWithProgress(pkg.url, downloadPath, pkg.sizeMb * 1024 * 1024, onProgress);
|
|
11354
|
-
try {
|
|
11355
|
-
if (platform === "darwin") {
|
|
11356
|
-
return await installMacOS(pkg, downloadPath);
|
|
11357
|
-
} else if (platform === "linux") {
|
|
11358
|
-
return await installLinux(pkg, downloadPath);
|
|
11359
|
-
} else if (platform === "win32") {
|
|
11360
|
-
return await installWindows(pkg, downloadPath);
|
|
11361
|
-
}
|
|
11362
|
-
} catch (err) {
|
|
11363
|
-
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11364
|
-
throw err;
|
|
11365
|
-
}
|
|
11366
|
-
throw new ConvertError("UNSUPPORTED_PLATFORM", `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4`);
|
|
11367
|
-
}
|
|
11368
|
-
async function installMacOS(pkg, downloadPath) {
|
|
11369
|
-
const mountPoint = `/Volumes/LibreOffice_${Date.now()}`;
|
|
11370
|
-
await new Promise((resolve4, reject) => {
|
|
11371
|
-
const stderr = [];
|
|
11372
|
-
const child = (0, import_child_process4.spawn)("hdiutil", ["attach", "-nobrowse", "-noverify", "-mountpoint", mountPoint, downloadPath]);
|
|
11373
|
-
child.stderr?.on("data", (d) => stderr.push(d.toString()));
|
|
11374
|
-
child.on(
|
|
11375
|
-
"close",
|
|
11376
|
-
(code) => code === 0 ? resolve4() : reject(new Error(`dmg \uB9C8\uC6B4\uD2B8 \uC2E4\uD328 (code=${code}): ${stderr.join("").trim()}`))
|
|
11377
|
-
);
|
|
11378
|
-
});
|
|
11379
|
-
try {
|
|
11380
|
-
const appSource = (0, import_path5.join)(mountPoint, "LibreOffice.app");
|
|
11381
|
-
const appDest = (0, import_path5.join)(CACHE_DIR, "LibreOffice.app");
|
|
11382
|
-
await new Promise((resolve4, reject) => {
|
|
11383
|
-
const child = (0, import_child_process4.spawn)("cp", ["-R", appSource, appDest]);
|
|
11384
|
-
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(".app \uBCF5\uC0AC \uC2E4\uD328")));
|
|
11385
|
-
});
|
|
11386
|
-
} finally {
|
|
11387
|
-
await new Promise((resolve4) => {
|
|
11388
|
-
const child = (0, import_child_process4.spawn)("hdiutil", ["detach", mountPoint]);
|
|
11389
|
-
child.on("close", () => resolve4());
|
|
11390
|
-
});
|
|
11391
|
-
}
|
|
11392
|
-
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11393
|
-
return await createSymlink((0, import_path5.join)(CACHE_DIR, pkg.binPath));
|
|
11394
|
-
}
|
|
11395
|
-
async function installLinux(pkg, downloadPath) {
|
|
11396
|
-
const extractDir = (0, import_path5.join)(CACHE_DIR, `extract-${Date.now()}`);
|
|
11397
|
-
await (0, import_promises2.mkdir)(extractDir, { recursive: true });
|
|
11398
|
-
await new Promise((resolve4, reject) => {
|
|
11399
|
-
const child = (0, import_child_process4.spawn)("tar", ["xzf", downloadPath, "-C", extractDir]);
|
|
11400
|
-
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("\uC555\uCD95 \uD574\uC81C \uC2E4\uD328")));
|
|
11401
|
-
});
|
|
11402
|
-
const debsDir = (0, import_path5.join)(extractDir, "DEBS");
|
|
11403
|
-
try {
|
|
11404
|
-
await (0, import_promises2.access)(debsDir);
|
|
11405
|
-
const entries = await (await import("fs/promises")).readdir(debsDir);
|
|
11406
|
-
for (const entry of entries) {
|
|
11407
|
-
if (entry.endsWith(".deb")) {
|
|
11408
|
-
await new Promise((resolve4, reject) => {
|
|
11409
|
-
const child = (0, import_child_process4.spawn)("dpkg-deb", ["-x", (0, import_path5.join)(debsDir, entry), CACHE_DIR]);
|
|
11410
|
-
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(`${entry} \uCD94\uCD9C \uC2E4\uD328`)));
|
|
11411
|
-
});
|
|
11412
|
-
}
|
|
11413
|
-
}
|
|
11414
|
-
} catch {
|
|
11415
|
-
}
|
|
11416
|
-
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11417
|
-
await (0, import_promises2.rm)(extractDir, { recursive: true, force: true });
|
|
11418
|
-
return await createSymlink((0, import_path5.join)(CACHE_DIR, pkg.binPath));
|
|
11419
|
-
}
|
|
11420
|
-
async function installWindows(pkg, downloadPath) {
|
|
11421
|
-
await new Promise((resolve4, reject) => {
|
|
11422
|
-
const child = (0, import_child_process4.spawn)("msiexec", ["/a", downloadPath, "/qn", `TARGETDIR=${CACHE_DIR}`]);
|
|
11423
|
-
child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("MSI \uC124\uCE58 \uC2E4\uD328")));
|
|
11424
|
-
});
|
|
11425
|
-
await (0, import_promises2.rm)(downloadPath, { force: true });
|
|
11426
|
-
return (0, import_path5.join)(CACHE_DIR, pkg.binPath);
|
|
11427
|
-
}
|
|
11428
|
-
async function createSymlink(actualBin) {
|
|
11429
|
-
const binDir = (0, import_path5.join)(CACHE_DIR, "bin");
|
|
11430
|
-
await (0, import_promises2.mkdir)(binDir, { recursive: true });
|
|
11431
|
-
const linkBin = (0, import_path5.join)(binDir, "soffice");
|
|
11432
|
-
try {
|
|
11433
|
-
await (0, import_promises2.symlink)(actualBin, linkBin);
|
|
11434
|
-
} catch {
|
|
11435
|
-
}
|
|
11436
|
-
process.env.PATH = `${binDir}${import_path5.delimiter}${process.env.PATH}`;
|
|
11437
|
-
return linkBin;
|
|
11438
|
-
}
|
|
11439
|
-
async function installLibreOffice(onProgress) {
|
|
11440
|
-
const platform = process.platform;
|
|
11441
|
-
const pkg = PACKAGES[platform];
|
|
11442
|
-
if (!pkg) {
|
|
11443
|
-
throw new ConvertError(
|
|
11444
|
-
"UNSUPPORTED_PLATFORM",
|
|
11445
|
-
`${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. \uC218\uB3D9\uC73C\uB85C LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.`
|
|
11446
|
-
);
|
|
11447
|
-
}
|
|
11448
|
-
return await installForPlatform(pkg, onProgress);
|
|
11449
|
-
}
|
|
11450
|
-
async function resolveSoffice(emitter, autoInstall = true) {
|
|
11451
|
-
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
11452
|
-
const inPath = await findInPath();
|
|
11453
|
-
if (inPath) {
|
|
11454
|
-
emitter.validate("soffice_found", "\uC2DC\uC2A4\uD15C PATH\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inPath });
|
|
11455
|
-
return inPath;
|
|
11456
|
-
}
|
|
11457
|
-
const inCache = await findInCache();
|
|
11458
|
-
if (inCache) {
|
|
11459
|
-
emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
|
|
11460
|
-
return inCache;
|
|
11461
|
-
}
|
|
11462
|
-
const inDefault = await findInDefaultPaths();
|
|
11463
|
-
if (inDefault) {
|
|
11464
|
-
emitter.validate("soffice_found", "\uAE30\uBCF8 \uACBD\uB85C\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inDefault });
|
|
11465
|
-
return inDefault;
|
|
11466
|
-
}
|
|
11467
|
-
if (!autoInstall) {
|
|
11468
|
-
emitter.error(
|
|
11469
|
-
"validate",
|
|
11470
|
-
"SOFFICE_NOT_FOUND",
|
|
11471
|
-
"LibreOffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4",
|
|
11472
|
-
"\uC218\uB3D9\uC73C\uB85C \uC124\uCE58\uD558\uAC70\uB098 autoInstallLibreOffice: true \uC635\uC158\uC744 \uC0AC\uC6A9\uD558\uC138\uC694."
|
|
11473
|
-
);
|
|
11474
|
-
throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
|
|
11475
|
-
}
|
|
11476
|
-
if (installInFlight) {
|
|
11477
|
-
return installInFlight;
|
|
11478
|
-
}
|
|
11479
|
-
emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
|
|
11480
|
-
installInFlight = (async () => {
|
|
11481
|
-
try {
|
|
11482
|
-
const installed = await installLibreOffice((downloaded, total) => {
|
|
11483
|
-
const percent = Math.round(downloaded / total * 100);
|
|
11484
|
-
emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
|
|
11485
|
-
percent,
|
|
11486
|
-
downloadedBytes: downloaded,
|
|
11487
|
-
totalBytes: total
|
|
11488
|
-
});
|
|
11489
|
-
});
|
|
11490
|
-
emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
|
|
11491
|
-
return installed;
|
|
11492
|
-
} catch (err) {
|
|
11493
|
-
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
11494
|
-
emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
|
|
11495
|
-
throw err;
|
|
11496
|
-
} finally {
|
|
11497
|
-
installInFlight = null;
|
|
11498
|
-
}
|
|
11499
|
-
})();
|
|
11500
|
-
return installInFlight;
|
|
11501
|
-
}
|
|
11502
|
-
|
|
11503
|
-
// src/convert/libreoffice.ts
|
|
11504
|
-
var libreConvert = import_libreoffice_convert.default.convert;
|
|
11505
|
-
var libreConvertWithOptions = import_libreoffice_convert.default.convertWithOptions;
|
|
11506
|
-
async function convertBuffer(buffer, targetExt, timeoutMs = 6e4, sofficePath) {
|
|
11507
|
-
return new Promise((resolve4, reject) => {
|
|
11508
|
-
const timer = setTimeout(() => {
|
|
11509
|
-
reject(
|
|
11510
|
-
new ConvertError("TIMEOUT", `\uBCC0\uD658 \uD0C0\uC784\uC544\uC6C3 (${timeoutMs}ms \uCD08\uACFC)`)
|
|
11511
|
-
);
|
|
11512
|
-
}, timeoutMs);
|
|
11513
|
-
const cb = (err, done) => {
|
|
11514
|
-
clearTimeout(timer);
|
|
11515
|
-
if (err || !done) {
|
|
11516
|
-
reject(
|
|
11517
|
-
new ConvertError(
|
|
11518
|
-
"CONVERT_FAILED",
|
|
11519
|
-
err?.message ?? "LibreOffice \uBCC0\uD658 \uC2E4\uD328"
|
|
11520
|
-
)
|
|
11521
|
-
);
|
|
11522
|
-
return;
|
|
11523
|
-
}
|
|
11524
|
-
resolve4(done);
|
|
11525
|
-
};
|
|
11526
|
-
if (sofficePath) {
|
|
11527
|
-
libreConvertWithOptions(buffer, targetExt, void 0, { sofficeBinaryPaths: [sofficePath] }, cb);
|
|
11528
|
-
} else {
|
|
11529
|
-
libreConvert(buffer, targetExt, void 0, cb);
|
|
11530
|
-
}
|
|
11531
|
-
});
|
|
11532
|
-
}
|
|
11533
|
-
|
|
11534
|
-
// src/convert/events.ts
|
|
11535
|
-
var ConvertEventEmitter = class {
|
|
11536
|
-
listener = null;
|
|
11537
|
-
/** 이벤트 리스너 등록 */
|
|
11538
|
-
setListener(listener) {
|
|
11539
|
-
this.listener = listener;
|
|
11540
|
-
}
|
|
11541
|
-
/** 이벤트 발송 */
|
|
11542
|
-
emit(event) {
|
|
11543
|
-
try {
|
|
11544
|
-
this.listener?.(event);
|
|
11545
|
-
} catch {
|
|
11546
|
-
}
|
|
11547
|
-
}
|
|
11548
|
-
/** 타입 안전한 헬퍼: detect 이벤트 */
|
|
11549
|
-
detect(stage, message, meta) {
|
|
11550
|
-
this.emit({ type: "detect", stage, message, ...meta });
|
|
11551
|
-
}
|
|
11552
|
-
/** 타입 안전한 헬퍼: validate 이벤트 */
|
|
11553
|
-
validate(stage, message, meta) {
|
|
11554
|
-
this.emit({ type: "validate", stage, message, ...meta });
|
|
11555
|
-
}
|
|
11556
|
-
/** 타입 안전한 헬퍼: install 이벤트 */
|
|
11557
|
-
install(stage, message, meta) {
|
|
11558
|
-
this.emit({ type: "install", stage, message, ...meta });
|
|
11559
|
-
}
|
|
11560
|
-
/** 타입 안전한 헬퍼: convert 진행 이벤트 */
|
|
11561
|
-
progress(percent, message) {
|
|
11562
|
-
this.emit({ type: "convert", stage: "convert_progress", message, percent });
|
|
11563
|
-
}
|
|
11564
|
-
/** 타입 안전한 헬퍼: convert 시작 */
|
|
11565
|
-
convertStart(message) {
|
|
11566
|
-
this.emit({ type: "convert", stage: "convert_start", message, percent: 0 });
|
|
11567
|
-
}
|
|
11568
|
-
/** 타입 안전한 헬퍼: convert 완료 */
|
|
11569
|
-
convertDone(message) {
|
|
11570
|
-
this.emit({ type: "convert", stage: "convert_done", message, percent: 100 });
|
|
11571
|
-
}
|
|
11572
|
-
/** 타입 안전한 헬퍼: 완료 이벤트 */
|
|
11573
|
-
complete(result) {
|
|
11574
|
-
this.emit({ type: "complete", stage: "success", message: "\uBCC0\uD658 \uC644\uB8CC", result });
|
|
11575
|
-
}
|
|
11576
|
-
/** 타입 안전한 헬퍼: 에러 이벤트 */
|
|
11577
|
-
error(stage, code, message, suggestion) {
|
|
11578
|
-
this.emit({ type: "error", stage, code, message, recoverable: true, suggestion });
|
|
11579
|
-
}
|
|
11580
|
-
};
|
|
11581
|
-
|
|
11582
|
-
// src/convert/index.ts
|
|
11583
|
-
var isConverting = false;
|
|
11584
|
-
var queue = [];
|
|
11585
|
-
async function acquireConvertLock() {
|
|
11586
|
-
if (!isConverting) {
|
|
11587
|
-
isConverting = true;
|
|
11588
|
-
return () => {
|
|
11589
|
-
isConverting = false;
|
|
11590
|
-
const next = queue.shift();
|
|
11591
|
-
next?.();
|
|
11592
|
-
};
|
|
11593
|
-
}
|
|
11594
|
-
return new Promise((resolve4) => {
|
|
11595
|
-
queue.push(() => {
|
|
11596
|
-
isConverting = true;
|
|
11597
|
-
resolve4(() => {
|
|
11598
|
-
isConverting = false;
|
|
11599
|
-
const next = queue.shift();
|
|
11600
|
-
next?.();
|
|
11601
|
-
});
|
|
11602
|
-
});
|
|
11603
|
-
});
|
|
11604
|
-
}
|
|
11605
|
-
async function convertToPdf(input, options) {
|
|
11606
|
-
const emitter = new ConvertEventEmitter();
|
|
11607
|
-
if (options?.onEvent) {
|
|
11608
|
-
emitter.setListener(options.onEvent);
|
|
11609
|
-
}
|
|
11610
|
-
if (options?.onProgress) {
|
|
11611
|
-
const legacyProgress = options.onProgress;
|
|
11612
|
-
emitter.setListener((event) => {
|
|
11613
|
-
if (event.type === "convert" && event.stage === "convert_progress") {
|
|
11614
|
-
legacyProgress(event.percent, event.message);
|
|
11615
|
-
}
|
|
11616
|
-
});
|
|
11617
|
-
}
|
|
11618
|
-
try {
|
|
11619
|
-
emitter.detect("reading", "\uC785\uB825 \uD30C\uC77C \uC77D\uB294 \uC911...");
|
|
11620
|
-
let buffer;
|
|
11621
|
-
try {
|
|
11622
|
-
if (typeof input === "string") {
|
|
11623
|
-
buffer = await (0, import_promises3.readFile)(input);
|
|
11624
|
-
} else if (Buffer.isBuffer(input)) {
|
|
11625
|
-
buffer = input;
|
|
11626
|
-
} else {
|
|
11627
|
-
buffer = Buffer.from(input);
|
|
11628
|
-
}
|
|
11629
|
-
} catch (err) {
|
|
11630
|
-
emitter.error(
|
|
11631
|
-
"detect",
|
|
11632
|
-
"PARSE_ERROR",
|
|
11633
|
-
`\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`
|
|
11634
|
-
);
|
|
11635
|
-
return {
|
|
11636
|
-
success: false,
|
|
11637
|
-
code: "PARSE_ERROR",
|
|
11638
|
-
error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
|
|
11639
|
-
stage: "detect"
|
|
11640
|
-
};
|
|
11641
|
-
}
|
|
11642
|
-
const MAX_FILE_SIZE = 500 * 1024 * 1024;
|
|
11643
|
-
if (buffer.length > MAX_FILE_SIZE) {
|
|
11644
|
-
emitter.error(
|
|
11645
|
-
"detect",
|
|
11646
|
-
"FILE_TOO_LARGE",
|
|
11647
|
-
`\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`
|
|
11648
|
-
);
|
|
11649
|
-
return {
|
|
11650
|
-
success: false,
|
|
11651
|
-
code: "FILE_TOO_LARGE",
|
|
11652
|
-
error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
|
|
11653
|
-
stage: "detect"
|
|
11654
|
-
};
|
|
11655
|
-
}
|
|
11656
|
-
const format = detectFormat(toArrayBuffer(buffer));
|
|
11657
|
-
emitter.detect("format_detected", `\uD3EC\uB9F7 \uAC10\uC9C0 \uC644\uB8CC: ${format}`, { format });
|
|
11658
|
-
if (format !== "hwp" && format !== "hwpx") {
|
|
11659
|
-
emitter.error("detect", "UNSUPPORTED_FORMAT", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`);
|
|
11660
|
-
return {
|
|
11661
|
-
success: false,
|
|
11662
|
-
code: "UNSUPPORTED_FORMAT",
|
|
11663
|
-
error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
|
|
11664
|
-
stage: "detect"
|
|
11665
|
-
};
|
|
11666
|
-
}
|
|
11667
|
-
emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
|
|
11668
|
-
let sofficePath;
|
|
11669
|
-
try {
|
|
11670
|
-
sofficePath = await resolveSoffice(emitter, options?.autoInstallLibreOffice ?? true);
|
|
11671
|
-
} catch (err) {
|
|
11672
|
-
if (err instanceof ConvertError) {
|
|
11673
|
-
return {
|
|
11674
|
-
success: false,
|
|
11675
|
-
code: err.code,
|
|
11676
|
-
error: err.message,
|
|
11677
|
-
stage: "validate"
|
|
11678
|
-
};
|
|
11679
|
-
}
|
|
11680
|
-
throw err;
|
|
11681
|
-
}
|
|
11682
|
-
const releaseLock = await acquireConvertLock();
|
|
11683
|
-
try {
|
|
11684
|
-
emitter.convertStart("\uBCC0\uD658 \uC2DC\uC791...");
|
|
11685
|
-
emitter.progress(10, "\uBCC0\uD658 \uC911...");
|
|
11686
|
-
const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs, sofficePath);
|
|
11687
|
-
emitter.progress(100, "\uBCC0\uD658 \uC644\uB8CC");
|
|
11688
|
-
emitter.convertDone("\uBCC0\uD658 \uC644\uB8CC");
|
|
11689
|
-
const result = {
|
|
11690
|
-
success: true,
|
|
11691
|
-
pdf: new Uint8Array(pdf),
|
|
11692
|
-
sourceFormat: format
|
|
11693
|
-
};
|
|
11694
|
-
emitter.complete({
|
|
11695
|
-
sourceFormat: format,
|
|
11696
|
-
pdfSize: pdf.length
|
|
11697
|
-
});
|
|
11698
|
-
return result;
|
|
11699
|
-
} catch (err) {
|
|
11700
|
-
if (err instanceof ConvertError) {
|
|
11701
|
-
emitter.error("convert", err.code, err.message);
|
|
11702
|
-
return {
|
|
11703
|
-
success: false,
|
|
11704
|
-
code: err.code,
|
|
11705
|
-
error: err.message,
|
|
11706
|
-
stage: "convert"
|
|
11707
|
-
};
|
|
11708
|
-
}
|
|
11709
|
-
const errorMsg = err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328";
|
|
11710
|
-
emitter.error("convert", classifyError(err), errorMsg);
|
|
11711
|
-
return {
|
|
11712
|
-
success: false,
|
|
11713
|
-
code: classifyError(err),
|
|
11714
|
-
error: errorMsg,
|
|
11715
|
-
stage: "convert"
|
|
11716
|
-
};
|
|
11717
|
-
} finally {
|
|
11718
|
-
releaseLock();
|
|
11719
|
-
}
|
|
11720
|
-
} catch (unexpectedErr) {
|
|
11721
|
-
const errorMsg = unexpectedErr instanceof Error ? unexpectedErr.message : "\uC608\uC0C1\uCE58 \uBABB\uD55C \uC624\uB958";
|
|
11722
|
-
emitter.error("convert", "PARSE_ERROR", errorMsg);
|
|
11723
|
-
return {
|
|
11724
|
-
success: false,
|
|
11725
|
-
code: "PARSE_ERROR",
|
|
11726
|
-
error: errorMsg,
|
|
11727
|
-
stage: "convert"
|
|
11728
|
-
};
|
|
11729
|
-
}
|
|
11730
|
-
}
|
|
11731
|
-
async function convertHwpToPdf(input, options) {
|
|
11732
|
-
const result = await convertToPdf(input, options);
|
|
11733
|
-
if (result.success && result.sourceFormat !== "hwp") {
|
|
11734
|
-
return {
|
|
11735
|
-
success: false,
|
|
11736
|
-
code: "UNSUPPORTED_FORMAT",
|
|
11737
|
-
error: `HWP 5.x \uD3EC\uB9F7\uC774 \uC544\uB2D9\uB2C8\uB2E4: ${result.sourceFormat}`,
|
|
11738
|
-
stage: "detect"
|
|
11739
|
-
};
|
|
11740
|
-
}
|
|
11741
|
-
return result;
|
|
11742
|
-
}
|
|
11743
|
-
async function convertHwpxToPdf(input, options) {
|
|
11744
|
-
const result = await convertToPdf(input, options);
|
|
11745
|
-
if (result.success && result.sourceFormat !== "hwpx") {
|
|
11746
|
-
return {
|
|
11747
|
-
success: false,
|
|
11748
|
-
code: "UNSUPPORTED_FORMAT",
|
|
11749
|
-
error: `HWPX \uD3EC\uB9F7\uC774 \uC544\uB2D9\uB2C8\uB2E4: ${result.sourceFormat}`,
|
|
11750
|
-
stage: "detect"
|
|
11751
|
-
};
|
|
11752
|
-
}
|
|
11753
|
-
return result;
|
|
11754
|
-
}
|
|
11755
|
-
|
|
11756
11554
|
// src/ocr/api-key-rotation.ts
|
|
11757
11555
|
var AllKeysCoolingDownError = class extends Error {
|
|
11758
11556
|
waitMs;
|
|
@@ -11847,9 +11645,9 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
|
|
|
11847
11645
|
};
|
|
11848
11646
|
|
|
11849
11647
|
// src/pipeline/unified-ocr.ts
|
|
11850
|
-
var
|
|
11851
|
-
var
|
|
11852
|
-
var
|
|
11648
|
+
var import_promises2 = require("fs/promises");
|
|
11649
|
+
var import_path5 = require("path");
|
|
11650
|
+
var import_child_process4 = require("child_process");
|
|
11853
11651
|
var import_node_perf_hooks = require("perf_hooks");
|
|
11854
11652
|
init_logger();
|
|
11855
11653
|
|
|
@@ -11983,15 +11781,15 @@ function elapsedMs(startAt) {
|
|
|
11983
11781
|
return Math.round(import_node_perf_hooks.performance.now() - startAt);
|
|
11984
11782
|
}
|
|
11985
11783
|
async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
11986
|
-
const absInput = (0,
|
|
11987
|
-
const stem = (0,
|
|
11988
|
-
const workspaceDir = (0,
|
|
11989
|
-
const imagesDir = (0,
|
|
11990
|
-
const rawDir = (0,
|
|
11991
|
-
const diffDir = (0,
|
|
11992
|
-
const outputPath = (0,
|
|
11993
|
-
const reportPath = (0,
|
|
11994
|
-
const modelCachePath = (0,
|
|
11784
|
+
const absInput = (0, import_path5.resolve)(inputPath);
|
|
11785
|
+
const stem = (0, import_path5.basename)(absInput, (0, import_path5.extname)(absInput));
|
|
11786
|
+
const workspaceDir = (0, import_path5.resolve)(options.workspaceDir ?? (0, import_path5.join)((0, import_path5.dirname)(absInput), `${stem}_ocr_workspace`));
|
|
11787
|
+
const imagesDir = (0, import_path5.join)(workspaceDir, "images");
|
|
11788
|
+
const rawDir = (0, import_path5.join)(workspaceDir, "ocr", "raw");
|
|
11789
|
+
const diffDir = (0, import_path5.join)(workspaceDir, "ocr", "diff");
|
|
11790
|
+
const outputPath = (0, import_path5.resolve)(options.outputPath ?? (0, import_path5.join)((0, import_path5.dirname)(absInput), `${stem}.md`));
|
|
11791
|
+
const reportPath = (0, import_path5.join)(workspaceDir, "run-report.json");
|
|
11792
|
+
const modelCachePath = (0, import_path5.join)((0, import_path5.dirname)(absInput), ".kordoc-model-cache.json");
|
|
11995
11793
|
const baseUrl = options.baseUrl ?? "https://integrate.api.nvidia.com/v1/chat/completions";
|
|
11996
11794
|
const timeoutMs = options.timeoutMs ?? 6e4;
|
|
11997
11795
|
const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
|
|
@@ -12002,12 +11800,11 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
12002
11800
|
const models = sortModelsByCache(modelsInput, modelCache);
|
|
12003
11801
|
const modelMaxTokens = { ...DEFAULT_MODEL_MAX_TOKENS, ...options.modelMaxTokens ?? {} };
|
|
12004
11802
|
const stageWeights = normalizeWeights({ ...DEFAULT_STAGE_WEIGHTS, ...options.stageWeights ?? {} });
|
|
12005
|
-
const keyPool = ApiKeyRotationPool.fromEnv();
|
|
12006
11803
|
const runId = options.runId ?? generateRunId("ocr");
|
|
12007
11804
|
const logger = (options.logger ?? createLoggerFromEnv()).withRun(runId).child({ component: "pipeline/unified-ocr.ts" });
|
|
12008
|
-
await (0,
|
|
12009
|
-
await (0,
|
|
12010
|
-
await (0,
|
|
11805
|
+
await (0, import_promises2.mkdir)(imagesDir, { recursive: true });
|
|
11806
|
+
await (0, import_promises2.mkdir)(rawDir, { recursive: true });
|
|
11807
|
+
await (0, import_promises2.mkdir)(diffDir, { recursive: true });
|
|
12011
11808
|
const timingsMs = {};
|
|
12012
11809
|
const markStageStart = (stage, message) => emitProgress(options.onEvent, stage, 0, stageWeights, { message, type: "stage_start" });
|
|
12013
11810
|
const markStageProgress = (stage, stagePercent, current, total, message, model) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message, model });
|
|
@@ -12018,51 +11815,57 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
12018
11815
|
};
|
|
12019
11816
|
try {
|
|
12020
11817
|
ensureSupportedInput(absInput);
|
|
12021
|
-
let workingPdfPath = absInput;
|
|
12022
11818
|
const convertStart = import_node_perf_hooks.performance.now();
|
|
12023
11819
|
currentStage = "convert";
|
|
12024
|
-
|
|
12025
|
-
|
|
12026
|
-
|
|
12027
|
-
const
|
|
12028
|
-
|
|
12029
|
-
|
|
12030
|
-
|
|
12031
|
-
|
|
12032
|
-
|
|
12033
|
-
|
|
12034
|
-
|
|
12035
|
-
|
|
12036
|
-
|
|
12037
|
-
|
|
12038
|
-
}
|
|
12039
|
-
|
|
12040
|
-
|
|
12041
|
-
|
|
12042
|
-
|
|
12043
|
-
|
|
12044
|
-
|
|
12045
|
-
|
|
12046
|
-
|
|
12047
|
-
|
|
12048
|
-
|
|
12049
|
-
|
|
12050
|
-
|
|
12051
|
-
|
|
11820
|
+
if ((0, import_path5.extname)(absInput).toLowerCase() !== ".pdf") {
|
|
11821
|
+
markStageStart("convert", "\uC790\uCCB4 \uD30C\uC11C\uB85C Markdown \uBCC0\uD658 \uC911");
|
|
11822
|
+
logStage("info", "convert", "start", "\uC790\uCCB4 \uD30C\uC11C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
|
|
11823
|
+
const inputBuffer = await (0, import_promises2.readFile)(absInput);
|
|
11824
|
+
const parsed = await parseNativeDocument(inputBuffer);
|
|
11825
|
+
timingsMs.convert = elapsedMs(convertStart);
|
|
11826
|
+
markStageDone("convert", "\uC790\uCCB4 \uD30C\uC11C \uBCC0\uD658 \uC644\uB8CC");
|
|
11827
|
+
logStage("info", "convert", "done", "\uC790\uCCB4 \uD30C\uC11C \uBCC0\uD658 \uC644\uB8CC", { format: parsed.fileType, elapsedMs: timingsMs.convert });
|
|
11828
|
+
const mergeStart2 = import_node_perf_hooks.performance.now();
|
|
11829
|
+
currentStage = "merge";
|
|
11830
|
+
markStageStart("merge", "Markdown \uC800\uC7A5 \uC911");
|
|
11831
|
+
await (0, import_promises2.writeFile)(outputPath, parsed.markdown, "utf-8");
|
|
11832
|
+
timingsMs.merge = elapsedMs(mergeStart2);
|
|
11833
|
+
markStageDone("merge", "Markdown \uC800\uC7A5 \uC644\uB8CC");
|
|
11834
|
+
logStage("info", "merge", "done", "Markdown \uC800\uC7A5 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
|
|
11835
|
+
const report2 = {
|
|
11836
|
+
inputPath: absInput,
|
|
11837
|
+
outputPath,
|
|
11838
|
+
workspaceDir,
|
|
11839
|
+
selectedModel: "native-parser",
|
|
11840
|
+
probeImage: "",
|
|
11841
|
+
probeResults: [],
|
|
11842
|
+
pageCount: parsed.pageCount,
|
|
11843
|
+
sourceFormat: parsed.fileType,
|
|
11844
|
+
keyHealth: [],
|
|
11845
|
+
timingsMs,
|
|
11846
|
+
modelCachePath
|
|
11847
|
+
};
|
|
11848
|
+
await (0, import_promises2.writeFile)(reportPath, JSON.stringify(report2, null, 2), "utf-8");
|
|
11849
|
+
logStage("info", "finalize", "done", "native parse run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
|
|
11850
|
+
return { outputPath, reportPath, selectedModel: "native-parser" };
|
|
12052
11851
|
}
|
|
11852
|
+
const workingPdfPath = absInput;
|
|
11853
|
+
markStageStart("convert", "PDF \uC785\uB825 \uD655\uC778 \uC911");
|
|
11854
|
+
logStage("info", "convert", "start", "PDF \uC785\uB825 \uD655\uC778", { input: absInput });
|
|
12053
11855
|
timingsMs.convert = elapsedMs(convertStart);
|
|
12054
|
-
markStageDone("convert", "PDF \
|
|
12055
|
-
logStage("info", "convert", "done", "PDF \
|
|
11856
|
+
markStageDone("convert", "PDF \uC785\uB825 \uD655\uC778 \uC644\uB8CC");
|
|
11857
|
+
logStage("info", "convert", "done", "PDF \uC785\uB825 \uD655\uC778 \uC644\uB8CC", { elapsedMs: timingsMs.convert });
|
|
11858
|
+
const keyPool = ApiKeyRotationPool.fromEnv();
|
|
12056
11859
|
const renderStart = import_node_perf_hooks.performance.now();
|
|
12057
11860
|
currentStage = "render";
|
|
12058
11861
|
const totalPages = await getPdfPageCount(workingPdfPath).catch(() => 0);
|
|
12059
11862
|
if (totalPages === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC218\uB97C \uD655\uC778\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
12060
11863
|
markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
|
|
12061
11864
|
logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi, totalPages });
|
|
12062
|
-
await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, (0,
|
|
12063
|
-
const firstFiles = (await (0,
|
|
11865
|
+
await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, (0, import_path5.join)(imagesDir, "page")]);
|
|
11866
|
+
const firstFiles = (await (0, import_promises2.readdir)(imagesDir)).filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
|
|
12064
11867
|
if (firstFiles.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uCCAB \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328");
|
|
12065
|
-
const probeImage = (0,
|
|
11868
|
+
const probeImage = (0, import_path5.join)(imagesDir, firstFiles[0]);
|
|
12066
11869
|
markStageProgress("render", Math.round(1 / totalPages * 100), 1, totalPages, `\uD398\uC774\uC9C0 1/${totalPages} \uB80C\uB354\uB9C1`);
|
|
12067
11870
|
const probeStart = import_node_perf_hooks.performance.now();
|
|
12068
11871
|
currentStage = "probe";
|
|
@@ -12098,7 +11901,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
12098
11901
|
const keyCount = keyPool.snapshot().length;
|
|
12099
11902
|
const workerCount = Math.max(1, keyCount * concurrencyPerKey);
|
|
12100
11903
|
const queueCapacity = workerCount * 2;
|
|
12101
|
-
const
|
|
11904
|
+
const queue = new BoundedQueue(queueCapacity);
|
|
12102
11905
|
const ocrStart = import_node_perf_hooks.performance.now();
|
|
12103
11906
|
currentStage = "ocr";
|
|
12104
11907
|
markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (\uC6CC\uCEE4 ${workerCount}\uAC1C)`);
|
|
@@ -12106,17 +11909,17 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
12106
11909
|
let renderDone = 1;
|
|
12107
11910
|
const renderProducer = (async () => {
|
|
12108
11911
|
try {
|
|
12109
|
-
await
|
|
11912
|
+
await queue.enqueue({ pageNumber: 1, imagePath: probeImage });
|
|
12110
11913
|
if (totalPages > 1) {
|
|
12111
|
-
for await (const item of renderPdfToPngStream(workingPdfPath, (0,
|
|
12112
|
-
await
|
|
11914
|
+
for await (const item of renderPdfToPngStream(workingPdfPath, (0, import_path5.join)(imagesDir, "page"), dpi, totalPages, 2)) {
|
|
11915
|
+
await queue.enqueue(item);
|
|
12113
11916
|
renderDone++;
|
|
12114
11917
|
markStageProgress("render", Math.round(renderDone / totalPages * 100), renderDone, totalPages, `\uD398\uC774\uC9C0 ${renderDone}/${totalPages} \uB80C\uB354\uB9C1`);
|
|
12115
11918
|
logStage("debug", "render", "progress", "\uD398\uC774\uC9C0 \uB80C\uB354 \uC644\uB8CC", { page: item.pageNumber });
|
|
12116
11919
|
}
|
|
12117
11920
|
}
|
|
12118
11921
|
} finally {
|
|
12119
|
-
|
|
11922
|
+
queue.close();
|
|
12120
11923
|
timingsMs.render = elapsedMs(renderStart);
|
|
12121
11924
|
markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
|
|
12122
11925
|
logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", { pages: renderDone, elapsedMs: timingsMs.render });
|
|
@@ -12125,7 +11928,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
12125
11928
|
const [, pageResultsMap] = await Promise.all([
|
|
12126
11929
|
renderProducer,
|
|
12127
11930
|
ocrWorkerPool({
|
|
12128
|
-
queue
|
|
11931
|
+
queue,
|
|
12129
11932
|
workerCount,
|
|
12130
11933
|
totalPages,
|
|
12131
11934
|
ocrInput: {
|
|
@@ -12158,8 +11961,8 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
12158
11961
|
const sortedEntries = Array.from(pageResultsMap.entries()).sort((a, b) => a[0] - b[0]);
|
|
12159
11962
|
const rawPagePaths = [];
|
|
12160
11963
|
for (const [pageNum, markdown] of sortedEntries) {
|
|
12161
|
-
const pagePath = (0,
|
|
12162
|
-
await (0,
|
|
11964
|
+
const pagePath = (0, import_path5.join)(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
|
|
11965
|
+
await (0, import_promises2.writeFile)(pagePath, markdown, "utf-8");
|
|
12163
11966
|
rawPagePaths.push(pagePath);
|
|
12164
11967
|
}
|
|
12165
11968
|
const mergeStart = import_node_perf_hooks.performance.now();
|
|
@@ -12167,7 +11970,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
12167
11970
|
markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
|
|
12168
11971
|
logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: rawPagePaths.length });
|
|
12169
11972
|
const merged = await mergeMarkdownPages(rawPagePaths);
|
|
12170
|
-
await (0,
|
|
11973
|
+
await (0, import_promises2.writeFile)(outputPath, merged, "utf-8");
|
|
12171
11974
|
timingsMs.merge = elapsedMs(mergeStart);
|
|
12172
11975
|
markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
|
|
12173
11976
|
logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
|
|
@@ -12183,7 +11986,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
12183
11986
|
timingsMs,
|
|
12184
11987
|
modelCachePath
|
|
12185
11988
|
};
|
|
12186
|
-
await (0,
|
|
11989
|
+
await (0, import_promises2.writeFile)(reportPath, JSON.stringify(report, null, 2), "utf-8");
|
|
12187
11990
|
logStage("info", "finalize", "done", "run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
|
|
12188
11991
|
return { outputPath, reportPath, selectedModel };
|
|
12189
11992
|
} catch (err) {
|
|
@@ -12258,7 +12061,7 @@ async function getPdfPageCount(pdfPath) {
|
|
|
12258
12061
|
return n;
|
|
12259
12062
|
}
|
|
12260
12063
|
async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, startPage = 1) {
|
|
12261
|
-
const imagesDir = (0,
|
|
12064
|
+
const imagesDir = (0, import_path5.dirname)(prefixPath);
|
|
12262
12065
|
for (let page = startPage; page <= totalPages; page++) {
|
|
12263
12066
|
try {
|
|
12264
12067
|
await runCommand("pdftoppm", [
|
|
@@ -12272,9 +12075,9 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
|
|
|
12272
12075
|
pdfPath,
|
|
12273
12076
|
prefixPath
|
|
12274
12077
|
]);
|
|
12275
|
-
const files = await (0,
|
|
12078
|
+
const files = await (0, import_promises2.readdir)(imagesDir);
|
|
12276
12079
|
const pageFiles = files.filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
|
|
12277
|
-
const imagePath = (0,
|
|
12080
|
+
const imagePath = (0, import_path5.join)(imagesDir, pageFiles[pageFiles.length - 1]);
|
|
12278
12081
|
yield { pageNumber: page, imagePath };
|
|
12279
12082
|
} catch (err) {
|
|
12280
12083
|
yield {
|
|
@@ -12287,7 +12090,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
|
|
|
12287
12090
|
}
|
|
12288
12091
|
async function runCommand(cmd, args) {
|
|
12289
12092
|
await new Promise((resolvePromise, reject) => {
|
|
12290
|
-
const child = (0,
|
|
12093
|
+
const child = (0, import_child_process4.spawn)(cmd, args, { stdio: "pipe" });
|
|
12291
12094
|
let stderr = "";
|
|
12292
12095
|
child.stderr.on("data", (d) => {
|
|
12293
12096
|
stderr += String(d);
|
|
@@ -12301,7 +12104,7 @@ async function runCommand(cmd, args) {
|
|
|
12301
12104
|
}
|
|
12302
12105
|
async function runCommandWithStdout(cmd, args) {
|
|
12303
12106
|
return await new Promise((resolvePromise, reject) => {
|
|
12304
|
-
const child = (0,
|
|
12107
|
+
const child = (0, import_child_process4.spawn)(cmd, args, { stdio: "pipe" });
|
|
12305
12108
|
let stdout = "";
|
|
12306
12109
|
let stderr = "";
|
|
12307
12110
|
child.stdout.on("data", (d) => {
|
|
@@ -12317,6 +12120,32 @@ async function runCommandWithStdout(cmd, args) {
|
|
|
12317
12120
|
});
|
|
12318
12121
|
});
|
|
12319
12122
|
}
|
|
12123
|
+
async function parseNativeDocument(buffer) {
|
|
12124
|
+
const arrayBuffer = toArrayBuffer(buffer);
|
|
12125
|
+
const format = detectFormat(arrayBuffer);
|
|
12126
|
+
let result;
|
|
12127
|
+
let fileType;
|
|
12128
|
+
if (format === "hwp") {
|
|
12129
|
+
result = parseHwp5Document(buffer);
|
|
12130
|
+
fileType = "hwp";
|
|
12131
|
+
} else if (format === "hwpx") {
|
|
12132
|
+
const { format: zipFormat, zip } = await detectZipFormat(arrayBuffer);
|
|
12133
|
+
if (zipFormat === "xlsx") {
|
|
12134
|
+
result = await parseXlsxDocument(arrayBuffer, void 0, zip ?? void 0);
|
|
12135
|
+
fileType = "xlsx";
|
|
12136
|
+
} else if (zipFormat === "docx") {
|
|
12137
|
+
result = await parseDocxDocument(arrayBuffer, void 0, zip ?? void 0);
|
|
12138
|
+
fileType = "docx";
|
|
12139
|
+
} else {
|
|
12140
|
+
result = await parseHwpxDocument(arrayBuffer, void 0, zip ?? void 0);
|
|
12141
|
+
fileType = "hwpx";
|
|
12142
|
+
}
|
|
12143
|
+
} else {
|
|
12144
|
+
throw new UnifiedOcrError("UNSUPPORTED_INPUT", "convert", `\uC790\uCCB4 \uD30C\uC11C\uB85C \uCC98\uB9AC\uD560 \uC218 \uC5C6\uB294 \uC785\uB825 \uD3EC\uB9F7: ${format}`);
|
|
12145
|
+
}
|
|
12146
|
+
const pageCount = result.metadata?.pageCount ?? Math.max(1, ...result.blocks.map((block) => block.pageNumber ?? 1));
|
|
12147
|
+
return { markdown: result.markdown, fileType, pageCount };
|
|
12148
|
+
}
|
|
12320
12149
|
function naturalPageSort(a, b) {
|
|
12321
12150
|
const na = Number((a.match(/\d+/g) || ["0"]).at(-1) || 0);
|
|
12322
12151
|
const nb = Number((b.match(/\d+/g) || ["0"]).at(-1) || 0);
|
|
@@ -12390,7 +12219,7 @@ function startParallelProbeRuns(input) {
|
|
|
12390
12219
|
}
|
|
12391
12220
|
async function loadModelCache(path) {
|
|
12392
12221
|
try {
|
|
12393
|
-
const raw = await (0,
|
|
12222
|
+
const raw = await (0, import_promises2.readFile)(path, "utf-8");
|
|
12394
12223
|
return JSON.parse(raw);
|
|
12395
12224
|
} catch {
|
|
12396
12225
|
return null;
|
|
@@ -12421,15 +12250,15 @@ async function updateModelCache(path, probes) {
|
|
|
12421
12250
|
}
|
|
12422
12251
|
}
|
|
12423
12252
|
current.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
12424
|
-
await (0,
|
|
12253
|
+
await (0, import_promises2.writeFile)(path, JSON.stringify(current, null, 2), "utf-8");
|
|
12425
12254
|
}
|
|
12426
12255
|
async function ocrWorkerPool(input) {
|
|
12427
|
-
const { queue
|
|
12256
|
+
const { queue, workerCount, ocrInput, onPageDone } = input;
|
|
12428
12257
|
const results = /* @__PURE__ */ new Map();
|
|
12429
12258
|
let completedCount = 0;
|
|
12430
12259
|
async function worker() {
|
|
12431
12260
|
while (true) {
|
|
12432
|
-
const item = await
|
|
12261
|
+
const item = await queue.dequeue();
|
|
12433
12262
|
if (item === QUEUE_DONE) break;
|
|
12434
12263
|
const { pageNumber, imagePath, error } = item;
|
|
12435
12264
|
if (imagePath === null) {
|
|
@@ -12481,7 +12310,7 @@ async function ocrImageWithFallback(input) {
|
|
|
12481
12310
|
async function mergeMarkdownPages(paths) {
|
|
12482
12311
|
const out = [];
|
|
12483
12312
|
for (let i = 0; i < paths.length; i++) {
|
|
12484
|
-
const txt = (await (0,
|
|
12313
|
+
const txt = (await (0, import_promises2.readFile)(paths[i], "utf-8")).trim();
|
|
12485
12314
|
if (!txt) continue;
|
|
12486
12315
|
out.push(txt);
|
|
12487
12316
|
}
|
|
@@ -12597,7 +12426,7 @@ async function ocrImageViaNim(input) {
|
|
|
12597
12426
|
throw new UnifiedOcrError("OCR_FAILED", "ocr", `OCR \uC7AC\uC2DC\uB3C4 \uCD08\uACFC: ${lastErr}`);
|
|
12598
12427
|
}
|
|
12599
12428
|
async function encodeBase64(path) {
|
|
12600
|
-
const b = await (0,
|
|
12429
|
+
const b = await (0, import_promises2.readFile)(path);
|
|
12601
12430
|
return b.toString("base64");
|
|
12602
12431
|
}
|
|
12603
12432
|
function stripCodeFence3(text) {
|
|
@@ -12609,7 +12438,7 @@ async function delay(ms) {
|
|
|
12609
12438
|
await new Promise((resolvePromise) => setTimeout(resolvePromise, ms));
|
|
12610
12439
|
}
|
|
12611
12440
|
function ensureSupportedInput(path) {
|
|
12612
|
-
const ext = (0,
|
|
12441
|
+
const ext = (0, import_path5.extname)(path).toLowerCase();
|
|
12613
12442
|
const allowed = /* @__PURE__ */ new Set([".pdf", ".hwp", ".hwpx", ".docx", ".xlsx"]);
|
|
12614
12443
|
if (!allowed.has(ext)) {
|
|
12615
12444
|
throw new UnifiedOcrError("UNSUPPORTED_INPUT", "convert", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uC785\uB825 \uD3EC\uB9F7: ${ext}`);
|
|
@@ -12617,16 +12446,6 @@ function ensureSupportedInput(path) {
|
|
|
12617
12446
|
}
|
|
12618
12447
|
function normalizePipelineError(err, stage) {
|
|
12619
12448
|
if (err instanceof UnifiedOcrError) return err;
|
|
12620
|
-
if (err instanceof ConvertError) {
|
|
12621
|
-
const codeMap = {
|
|
12622
|
-
SOFFICE_NOT_FOUND: "SOFFICE_NOT_FOUND",
|
|
12623
|
-
CONVERT_FAILED: "CONVERT_FAILED",
|
|
12624
|
-
TIMEOUT: "CONVERT_FAILED",
|
|
12625
|
-
UNSUPPORTED_PLATFORM: "CONVERT_FAILED",
|
|
12626
|
-
UNSUPPORTED_FORMAT: "UNSUPPORTED_INPUT"
|
|
12627
|
-
};
|
|
12628
|
-
return new UnifiedOcrError(codeMap[err.code] ?? "CONVERT_FAILED", stage, err.message);
|
|
12629
|
-
}
|
|
12630
12449
|
const message = err instanceof Error ? err.message : String(err);
|
|
12631
12450
|
const codeByStage = {
|
|
12632
12451
|
convert: "CONVERT_FAILED",
|
|
@@ -12646,7 +12465,7 @@ async function parse2(input, options) {
|
|
|
12646
12465
|
let buffer;
|
|
12647
12466
|
if (typeof input === "string") {
|
|
12648
12467
|
try {
|
|
12649
|
-
const buf = await (0,
|
|
12468
|
+
const buf = await (0, import_promises3.readFile)(input);
|
|
12650
12469
|
buffer = toArrayBuffer(buf);
|
|
12651
12470
|
} catch (err) {
|
|
12652
12471
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
@@ -12806,9 +12625,6 @@ async function parseDocx(buffer, options, zip) {
|
|
|
12806
12625
|
VERSION,
|
|
12807
12626
|
blocksToMarkdown,
|
|
12808
12627
|
compare,
|
|
12809
|
-
convertHwpToPdf,
|
|
12810
|
-
convertHwpxToPdf,
|
|
12811
|
-
convertToPdf,
|
|
12812
12628
|
detectFormat,
|
|
12813
12629
|
detectZipFormat,
|
|
12814
12630
|
diffBlocks,
|