@clazic/kordoc 2.7.4 → 2.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3059,7 +3059,7 @@ var init_provider = __esm({
3059
3059
  });
3060
3060
 
3061
3061
  // src/index.ts
3062
- import { readFile as readFile3 } from "fs/promises";
3062
+ import { readFile as readFile2 } from "fs/promises";
3063
3063
 
3064
3064
  // src/detect.ts
3065
3065
  import JSZip from "jszip";
@@ -3112,7 +3112,7 @@ import JSZip2 from "jszip";
3112
3112
  import { DOMParser } from "@xmldom/xmldom";
3113
3113
 
3114
3114
  // src/utils.ts
3115
- var VERSION = true ? "2.7.3" : "0.0.0-dev";
3115
+ var VERSION = true ? "2.7.6" : "0.0.0-dev";
3116
3116
  function toArrayBuffer(buf) {
3117
3117
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3118
3118
  return buf.buffer;
@@ -3319,13 +3319,21 @@ function sanitizeText(text) {
3319
3319
  }
3320
3320
  return result;
3321
3321
  }
3322
+ function escapeGfm(text, inTableCell = false) {
3323
+ if (!text) return text;
3324
+ let result = text.replace(/(?<!\\)~/g, "\\~");
3325
+ if (inTableCell) {
3326
+ result = result.replace(/(?<!\\)\|/g, "\\|");
3327
+ }
3328
+ return result;
3329
+ }
3322
3330
  function blocksToMarkdown(blocks) {
3323
3331
  const lines = [];
3324
3332
  for (let i = 0; i < blocks.length; i++) {
3325
3333
  const block = blocks[i];
3326
3334
  if (block.type === "heading" && block.text) {
3327
3335
  const prefix = "#".repeat(Math.min(block.level || 2, 6));
3328
- const headingText = sanitizeText(block.text);
3336
+ const headingText = escapeGfm(sanitizeText(block.text), false);
3329
3337
  if (headingText) lines.push("", `${prefix} ${headingText}`, "");
3330
3338
  continue;
3331
3339
  }
@@ -3338,42 +3346,47 @@ function blocksToMarkdown(blocks) {
3338
3346
  continue;
3339
3347
  }
3340
3348
  if (block.type === "list" && block.text) {
3341
- const listText = sanitizeText(block.text);
3342
- if (!listText) continue;
3343
- const alreadyNumbered = block.listType === "ordered" && /^\d+\.\s/.test(listText);
3349
+ const sanitized = sanitizeText(block.text);
3350
+ if (!sanitized) continue;
3351
+ const alreadyNumbered = block.listType === "ordered" && /^\d+\.\s/.test(sanitized);
3344
3352
  const prefix = alreadyNumbered ? "" : block.listType === "ordered" ? "1. " : "- ";
3353
+ const listText = escapeGfm(sanitized, false);
3345
3354
  lines.push(`${prefix}${listText}`);
3346
3355
  if (block.children) {
3347
3356
  for (const child of block.children) {
3348
3357
  const childPrefix = child.listType === "ordered" ? "1." : "-";
3349
- lines.push(` ${childPrefix} ${child.text || ""}`);
3358
+ const childText = child.text ? escapeGfm(sanitizeText(child.text), false) : "";
3359
+ lines.push(` ${childPrefix} ${childText}`);
3350
3360
  }
3351
3361
  }
3352
3362
  continue;
3353
3363
  }
3354
3364
  if (block.type === "paragraph" && block.text) {
3355
- let text = sanitizeText(block.text);
3356
- if (!text) continue;
3357
- if (/^\[별표\s*\d+/.test(text)) {
3365
+ const sanitized = sanitizeText(block.text);
3366
+ if (!sanitized) continue;
3367
+ if (/^\[별표\s*\d+/.test(sanitized)) {
3358
3368
  const nextBlock = blocks[i + 1];
3369
+ const escapedSelf = escapeGfm(sanitized, false);
3359
3370
  if (nextBlock?.type === "paragraph" && nextBlock.text && /관련\)?$/.test(nextBlock.text)) {
3360
- lines.push("", `## ${text} ${nextBlock.text}`, "");
3371
+ const nextEscaped = escapeGfm(sanitizeText(nextBlock.text), false);
3372
+ lines.push("", `## ${escapedSelf} ${nextEscaped}`, "");
3361
3373
  i++;
3362
3374
  } else {
3363
- lines.push("", `## ${text}`, "");
3375
+ lines.push("", `## ${escapedSelf}`, "");
3364
3376
  }
3365
3377
  continue;
3366
3378
  }
3367
- if (/^\([^)]*조[^)]*관련\)$/.test(text)) {
3368
- lines.push(`*${text}*`, "");
3379
+ if (/^\([^)]*조[^)]*관련\)$/.test(sanitized)) {
3380
+ lines.push(`*${escapeGfm(sanitized, false)}*`, "");
3369
3381
  continue;
3370
3382
  }
3383
+ let text = escapeGfm(sanitized, false);
3371
3384
  if (block.href) {
3372
3385
  const href = sanitizeHref(block.href);
3373
3386
  if (href) text = `[${text}](${href})`;
3374
3387
  }
3375
3388
  if (block.footnoteText) {
3376
- text += ` (\uC8FC: ${block.footnoteText})`;
3389
+ text += ` (\uC8FC: ${escapeGfm(block.footnoteText, false)})`;
3377
3390
  }
3378
3391
  lines.push(text);
3379
3392
  } else if (block.type === "table" && block.table) {
@@ -3398,13 +3411,13 @@ function tableToMarkdown(table) {
3398
3411
  return content.split(/\n/).map((line) => {
3399
3412
  const trimmed = line.trim();
3400
3413
  if (!trimmed) return "";
3401
- if (/^\d+\.\s/.test(trimmed)) return `**${trimmed}**`;
3402
- if (/^[가-힣]\.\s/.test(trimmed)) return ` ${trimmed}`;
3403
- return trimmed;
3414
+ if (/^\d+\.\s/.test(trimmed)) return `**${escapeGfm(trimmed, false)}**`;
3415
+ if (/^[가-힣]\.\s/.test(trimmed)) return ` ${escapeGfm(trimmed, false)}`;
3416
+ return escapeGfm(trimmed, false);
3404
3417
  }).filter(Boolean).join("\n");
3405
3418
  }
3406
3419
  if (numCols === 1 && numRows >= 2) {
3407
- return cells.map((row) => sanitizeText(row[0].text).replace(/\n/g, " ")).filter(Boolean).join("\n");
3420
+ return cells.map((row) => escapeGfm(sanitizeText(row[0].text).replace(/\n/g, " "), false)).filter(Boolean).join("\n");
3408
3421
  }
3409
3422
  const display = Array.from({ length: numRows }, () => Array(numCols).fill(""));
3410
3423
  const skip = /* @__PURE__ */ new Set();
@@ -3413,7 +3426,7 @@ function tableToMarkdown(table) {
3413
3426
  if (skip.has(`${r},${c}`)) continue;
3414
3427
  const cell = cells[r]?.[c];
3415
3428
  if (!cell) continue;
3416
- display[r][c] = sanitizeText(cell.text).replace(/\n/g, "<br>");
3429
+ display[r][c] = escapeGfm(sanitizeText(cell.text).replace(/\n/g, "<br>"), true);
3417
3430
  for (let dr = 0; dr < cell.rowSpan; dr++) {
3418
3431
  for (let dc = 0; dc < cell.colSpan; dc++) {
3419
3432
  if (dr === 0 && dc === 0) continue;
@@ -3460,6 +3473,223 @@ var HEADING_RATIO_H1 = 1.5;
3460
3473
  var HEADING_RATIO_H2 = 1.3;
3461
3474
  var HEADING_RATIO_H3 = 1.15;
3462
3475
 
3476
+ // src/hwp5/equation.ts
3477
+ var WORD_COMMANDS = /* @__PURE__ */ new Map([
3478
+ ["alpha", "\\alpha"],
3479
+ ["beta", "\\beta"],
3480
+ ["gamma", "\\gamma"],
3481
+ ["delta", "\\delta"],
3482
+ ["epsilon", "\\epsilon"],
3483
+ ["theta", "\\theta"],
3484
+ ["lambda", "\\lambda"],
3485
+ ["mu", "\\mu"],
3486
+ ["pi", "\\pi"],
3487
+ ["sigma", "\\sigma"],
3488
+ ["tau", "\\tau"],
3489
+ ["phi", "\\phi"],
3490
+ ["omega", "\\omega"],
3491
+ ["sin", "\\sin"],
3492
+ ["cos", "\\cos"],
3493
+ ["tan", "\\tan"],
3494
+ ["sec", "\\sec"],
3495
+ ["csc", "\\csc"],
3496
+ ["cot", "\\cot"],
3497
+ ["log", "\\log"],
3498
+ ["ln", "\\ln"],
3499
+ ["lim", "\\lim"],
3500
+ ["inf", "\\infty"],
3501
+ ["sum", "\\sum"],
3502
+ ["smallsum", "\\sum"],
3503
+ ["prod", "\\prod"],
3504
+ ["int", "\\int"],
3505
+ ["oint", "\\oint"],
3506
+ ["rightarrow", "\\rightarrow"],
3507
+ ["leftarrow", "\\leftarrow"],
3508
+ ["partial", "\\partial"],
3509
+ ["nabla", "\\nabla"],
3510
+ ["angle", "\\angle"],
3511
+ ["triangle", "\\triangle"],
3512
+ ["vec", "\\vec"],
3513
+ ["bar", "\\overline"],
3514
+ ["dot", "\\dot"],
3515
+ ["hat", "\\hat"],
3516
+ ["left", "\\left"],
3517
+ ["right", "\\right"]
3518
+ ]);
3519
+ var SYMBOL_WORDS = /* @__PURE__ */ new Map([
3520
+ ["times", "\\times"],
3521
+ ["divide", "\\div"],
3522
+ ["div", "\\div"],
3523
+ ["le", "\\leq"],
3524
+ ["ge", "\\geq"],
3525
+ ["geq", "\\geq"],
3526
+ ["deg", "^\\circ"],
3527
+ ["rarrow", "\\rightarrow"],
3528
+ ["larrow", "\\leftarrow"],
3529
+ ["lrarrow", "\\leftrightarrow"],
3530
+ ["in", "\\in"],
3531
+ ["notin", "\\notin"],
3532
+ ["emptyset", "\\emptyset"],
3533
+ ["subset", "\\subset"],
3534
+ ["nsubset", "\\nsubseteq"],
3535
+ ["cup", "\\cup"],
3536
+ ["cap", "\\cap"],
3537
+ ["smallinter", "\\cap"],
3538
+ ["sim", "\\sim"],
3539
+ ["circ", "\\circ"],
3540
+ ["bot", "\\perp"],
3541
+ ["dyad", "\\overleftrightarrow"],
3542
+ ["arch", "\\overset{\\frown}"]
3543
+ ]);
3544
+ function hwpEquationToLatex(equation) {
3545
+ return convertEquation(equation.replace(/\0/g, "").trim(), 0);
3546
+ }
3547
+ function convertEquation(equation, depth) {
3548
+ if (!equation || depth > 12) return equation;
3549
+ let result = equation.replace(/\s+/g, " ").replace(/`+/g, "\\,").replace(/~+/g, "\\,").trim();
3550
+ result = convertMatrixLike(result);
3551
+ result = convertRoots(result, depth);
3552
+ result = convertOver(result, depth);
3553
+ result = convertSqrt(result, depth);
3554
+ result = convertScripts(result);
3555
+ result = convertOperators(result);
3556
+ result = removeFontDirectives(result);
3557
+ result = convertWords(result);
3558
+ result = cleanupLatexSpacing(result);
3559
+ return result;
3560
+ }
3561
+ function convertMatrixLike(input) {
3562
+ return input.replace(
3563
+ /\bmatrix\s*\{([^{}]*)\}/gi,
3564
+ (_match, body) => `\\begin{matrix} ${body.split("#").map((part) => part.trim()).join(" & ")} \\end{matrix}`
3565
+ ).replace(
3566
+ /\bcases\s*\{([^{}]*)\}/gi,
3567
+ (_match, body) => `\\begin{cases} ${body.split("#").map((part) => part.trim()).join(" \\\\ ")} \\end{cases}`
3568
+ );
3569
+ }
3570
+ function convertRoots(input, depth) {
3571
+ return input.replace(/(?<!\\)\broot\s+({[^{}]*}|\S+)\s+of\s+({[^{}]*}|\S+)/gi, (_match, degree, radicand) => {
3572
+ return `\\sqrt[${convertEquation(unwrapGroup(degree), depth + 1)}]{${convertEquation(unwrapGroup(radicand), depth + 1)}}`;
3573
+ });
3574
+ }
3575
+ function convertSqrt(input, depth) {
3576
+ return input.replace(/(?<!\\)\bsqrt\s*({[^{}]*}|\S+)/gi, (_match, radicand) => {
3577
+ return `\\sqrt{${convertEquation(unwrapGroup(radicand), depth + 1)}}`;
3578
+ });
3579
+ }
3580
+ function convertOver(input, depth) {
3581
+ let result = input;
3582
+ for (let guard = 0; guard < 50; guard++) {
3583
+ const over = findTopLevelWord(result, "over");
3584
+ if (over < 0) break;
3585
+ const left = readLeftAtom(result, over);
3586
+ const right = readRightAtom(result, over + "over".length);
3587
+ if (!left || !right) break;
3588
+ const numerator = convertEquation(unwrapGroup(left.atom), depth + 1);
3589
+ const denominator = convertEquation(unwrapGroup(right.atom), depth + 1);
3590
+ result = result.slice(0, left.start) + `\\frac{${numerator}}{${denominator}}` + result.slice(right.end);
3591
+ }
3592
+ return result;
3593
+ }
3594
+ function convertScripts(input) {
3595
+ return input.replace(/\s*\^\s*/g, "^").replace(/\s*_\s*/g, "_").replace(/\^(?!\{)([^\s{}_^]+)/g, "^{$1}").replace(/_(?!\{)([^\s{}_^]+)/g, "_{$1}");
3596
+ }
3597
+ function convertOperators(input) {
3598
+ return input.replace(/\+-/g, "\\pm").replace(/-\+/g, "\\mp").replace(/\/\//g, "\\parallel").replace(/△/g, "\\triangle ").replace(/□/g, "\\square ").replace(/‧/g, "\\cdot ").replace(/!=/g, "\\neq").replace(/<=/g, "\\leq").replace(/>=/g, "\\geq").replace(/==/g, "\\equiv");
3599
+ }
3600
+ function removeFontDirectives(input) {
3601
+ return input.replace(/(?<!\\)\b(?:rm|it)\b\s*/gi, "");
3602
+ }
3603
+ function convertWords(input) {
3604
+ return input.replace(/(?<![\\A-Za-z0-9])([A-Za-z][A-Za-z0-9]*)(?![A-Za-z0-9])/g, (word) => {
3605
+ const exact = SYMBOL_WORDS.get(word);
3606
+ if (exact) return exact;
3607
+ const lower = word.toLowerCase();
3608
+ return SYMBOL_WORDS.get(lower) ?? WORD_COMMANDS.get(lower) ?? word;
3609
+ });
3610
+ }
3611
+ function cleanupLatexSpacing(input) {
3612
+ return input.replace(/\\left\s*\{/g, "\\left\\{").replace(/\\right\s*\}/g, "\\right\\}").replace(/\\left\s*([\[\]\(\)\|])/g, "\\left$1").replace(/\\right\s*([\[\]\(\)\|])/g, "\\right$1").replace(/\s*\\,\s*/g, "\\,").replace(/\s+/g, " ").replace(/\{\s+/g, "{").replace(/\s+\}/g, "}").trim();
3613
+ }
3614
+ function findTopLevelWord(input, word) {
3615
+ let curly = 0;
3616
+ let paren = 0;
3617
+ for (let i = 0; i <= input.length - word.length; i++) {
3618
+ const ch = input[i];
3619
+ if (ch === "{") curly++;
3620
+ else if (ch === "}") curly = Math.max(0, curly - 1);
3621
+ else if (ch === "(") paren++;
3622
+ else if (ch === ")") paren = Math.max(0, paren - 1);
3623
+ if (curly !== 0 || paren !== 0) continue;
3624
+ if (input.slice(i, i + word.length).toLowerCase() !== word) continue;
3625
+ if (isWordChar(input[i - 1]) || isWordChar(input[i + word.length])) continue;
3626
+ return i;
3627
+ }
3628
+ return -1;
3629
+ }
3630
+ function readLeftAtom(input, end) {
3631
+ let pos = end - 1;
3632
+ while (pos >= 0 && /\s/.test(input[pos])) pos--;
3633
+ if (pos < 0) return null;
3634
+ if (input[pos] === "}") {
3635
+ const start2 = findMatchingLeft(input, pos, "{", "}");
3636
+ if (start2 >= 0) return { start: start2, atom: input.slice(start2, pos + 1) };
3637
+ }
3638
+ if (input[pos] === ")") {
3639
+ const start2 = findMatchingLeft(input, pos, "(", ")");
3640
+ if (start2 >= 0) return { start: start2, atom: input.slice(start2, pos + 1) };
3641
+ }
3642
+ let start = pos;
3643
+ while (start >= 0 && !/\s/.test(input[start]) && !/[+\-=<>]/.test(input[start])) start--;
3644
+ return { start: start + 1, atom: input.slice(start + 1, pos + 1) };
3645
+ }
3646
+ function readRightAtom(input, start) {
3647
+ let pos = start;
3648
+ while (pos < input.length && /\s/.test(input[pos])) pos++;
3649
+ if (pos >= input.length) return null;
3650
+ if (input[pos] === "{") {
3651
+ const end2 = findMatchingRight(input, pos, "{", "}");
3652
+ if (end2 >= 0) return { end: end2 + 1, atom: input.slice(pos, end2 + 1) };
3653
+ }
3654
+ if (input[pos] === "(") {
3655
+ const end2 = findMatchingRight(input, pos, "(", ")");
3656
+ if (end2 >= 0) return { end: end2 + 1, atom: input.slice(pos, end2 + 1) };
3657
+ }
3658
+ let end = pos;
3659
+ while (end < input.length && !/\s/.test(input[end]) && !/[+\-=<>]/.test(input[end])) end++;
3660
+ return { end, atom: input.slice(pos, end) };
3661
+ }
3662
+ function findMatchingLeft(input, closeIndex, open, close) {
3663
+ let depth = 0;
3664
+ for (let i = closeIndex; i >= 0; i--) {
3665
+ if (input[i] === close) depth++;
3666
+ else if (input[i] === open) {
3667
+ depth--;
3668
+ if (depth === 0) return i;
3669
+ }
3670
+ }
3671
+ return -1;
3672
+ }
3673
+ function findMatchingRight(input, openIndex, open, close) {
3674
+ let depth = 0;
3675
+ for (let i = openIndex; i < input.length; i++) {
3676
+ if (input[i] === open) depth++;
3677
+ else if (input[i] === close) {
3678
+ depth--;
3679
+ if (depth === 0) return i;
3680
+ }
3681
+ }
3682
+ return -1;
3683
+ }
3684
+ function unwrapGroup(input) {
3685
+ const trimmed = input.trim();
3686
+ if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed.slice(1, -1);
3687
+ return trimmed;
3688
+ }
3689
+ function isWordChar(ch) {
3690
+ return !!ch && /[A-Za-z0-9_]/.test(ch);
3691
+ }
3692
+
3463
3693
  // src/hwpx/parser.ts
3464
3694
  init_page_range();
3465
3695
  init_logger();
@@ -4141,6 +4371,17 @@ function findDescendant(node, targetTag, depth = 0) {
4141
4371
  }
4142
4372
  return null;
4143
4373
  }
4374
+ function findChildByLocalName(node, targetTag) {
4375
+ const children = node.childNodes;
4376
+ if (!children) return null;
4377
+ for (let i = 0; i < children.length; i++) {
4378
+ const child = children[i];
4379
+ if (child.nodeType !== 1) continue;
4380
+ const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
4381
+ if (tag === targetTag) return child;
4382
+ }
4383
+ return null;
4384
+ }
4144
4385
  function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
4145
4386
  const children = drawTextNode.childNodes;
4146
4387
  if (!children) return;
@@ -4243,6 +4484,22 @@ function extractParagraphInfo(para, styleMap) {
4243
4484
  case "shapeComment":
4244
4485
  case "drawText":
4245
4486
  break;
4487
+ // 수식: <hp:equation> 내부의 <hp:script>에 HML/HULK-style 수식 본문이
4488
+ // 들어있음. hwpEquationToLatex로 LaTeX 변환 후 `$...$`로 래핑하여
4489
+ // 본문 텍스트에 인라인 삽입. 변환 실패/빈 결과는 조용히 드롭
4490
+ // (대체 텍스트 "수식입니다." 누출 방지는 기존 정규식이 처리).
4491
+ case "equation": {
4492
+ const script = findChildByLocalName(child, "script");
4493
+ const raw = script ? extractTextFromNode(script) : "";
4494
+ if (raw.trim()) {
4495
+ try {
4496
+ const latex = hwpEquationToLatex(raw).trim();
4497
+ if (latex) text += " $" + latex.replace(/\$/g, "\\$") + "$ ";
4498
+ } catch {
4499
+ }
4500
+ }
4501
+ break;
4502
+ }
4246
4503
  // run 요소에서 charPrIDRef 추출
4247
4504
  case "r": {
4248
4505
  const runCharPr = child.getAttribute("charPrIDRef");
@@ -4309,8 +4566,13 @@ var TAG_CHAR_SHAPE = 68;
4309
4566
  var TAG_CTRL_HEADER = 71;
4310
4567
  var TAG_LIST_HEADER = 72;
4311
4568
  var TAG_TABLE = 77;
4312
- var TAG_DOC_CHAR_SHAPE = 55;
4313
- var TAG_DOC_STYLE = 58;
4569
+ var TAG_EQEDIT = 88;
4570
+ var HWPTAG_BEGIN = 16;
4571
+ var TAG_ID_MAPPINGS = HWPTAG_BEGIN + 1;
4572
+ var TAG_FACE_NAME = HWPTAG_BEGIN + 3;
4573
+ var TAG_DOC_CHAR_SHAPE = HWPTAG_BEGIN + 5;
4574
+ var TAG_DOC_PARA_SHAPE = HWPTAG_BEGIN + 9;
4575
+ var TAG_DOC_STYLE = HWPTAG_BEGIN + 10;
4314
4576
  var CHAR_LINE = 0;
4315
4577
  var CHAR_SECTION_BREAK = 10;
4316
4578
  var CHAR_PARA = 13;
@@ -4468,6 +4730,15 @@ function extractText(data) {
4468
4730
  }
4469
4731
  return result;
4470
4732
  }
4733
+ function extractEquationText(data) {
4734
+ if (data.length < 6) return null;
4735
+ const scriptLength = data.readUInt16LE(4);
4736
+ const scriptStart = 6;
4737
+ const scriptEnd = scriptStart + scriptLength * 2;
4738
+ if (scriptLength <= 0 || scriptEnd > data.length) return null;
4739
+ const equation = data.subarray(scriptStart, scriptEnd).toString("utf16le").replace(/\0+/g, "").trim();
4740
+ return equation || null;
4741
+ }
4471
4742
 
4472
4743
  // src/hwp5/aes.ts
4473
4744
  var S_BOX = new Uint8Array([
@@ -5627,6 +5898,26 @@ function findViewTextSectionsLenient(lcfb, compressed) {
5627
5898
  return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
5628
5899
  }
5629
5900
  var TAG_SHAPE_COMPONENT = 74;
5901
+ var CTRL_ID_EQEDIT = "deqe";
5902
+ function isEquationControlId(ctrlId) {
5903
+ return ctrlId === CTRL_ID_EQEDIT || ctrlId === "eqed";
5904
+ }
5905
+ function formatEquationForMarkdown(equation) {
5906
+ const normalized = hwpEquationToLatex(equation);
5907
+ if (!normalized) return "";
5908
+ return `$${normalized.replace(/\$/g, "\\$")}$`;
5909
+ }
5910
+ function extractEquationFromControl(records, ctrlIdx) {
5911
+ const ctrlLevel = records[ctrlIdx].level;
5912
+ for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 10; j++) {
5913
+ const r = records[j];
5914
+ if (r.level <= ctrlLevel) break;
5915
+ if (r.tagId !== TAG_EQEDIT) continue;
5916
+ const equation = extractEquationText(r.data);
5917
+ return equation ? formatEquationForMarkdown(equation) : null;
5918
+ }
5919
+ return null;
5920
+ }
5630
5921
  function extractBinDataId(records, ctrlIdx) {
5631
5922
  const ctrlLevel = records[ctrlIdx].level;
5632
5923
  for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 50; j++) {
@@ -5786,6 +6077,16 @@ function parseSection(records, docInfo, warnings, sectionNum) {
5786
6077
  }
5787
6078
  } else if (ctrlId === " elo" || ctrlId === "ole ") {
5788
6079
  warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
6080
+ } else if (isEquationControlId(ctrlId)) {
6081
+ const equation = extractEquationFromControl(records, i);
6082
+ if (equation) {
6083
+ const lastBlock = blocks[blocks.length - 1];
6084
+ if (lastBlock && lastBlock.type === "paragraph" && lastBlock.text) {
6085
+ lastBlock.text = lastBlock.text + " " + equation;
6086
+ } else {
6087
+ blocks.push({ type: "paragraph", text: equation, pageNumber: sectionNum });
6088
+ }
6089
+ }
5789
6090
  } else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
5790
6091
  const noteText = extractNoteText(records, i);
5791
6092
  if (noteText && blocks.length > 0) {
@@ -5818,6 +6119,13 @@ function extractNoteText(records, ctrlIdx) {
5818
6119
  const t = extractText(r.data).trim();
5819
6120
  if (t) texts.push(t);
5820
6121
  }
6122
+ if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
6123
+ const innerCtrlId = r.data.subarray(0, 4).toString("ascii");
6124
+ if (isEquationControlId(innerCtrlId)) {
6125
+ const equation = extractEquationFromControl(records, j);
6126
+ if (equation) texts.push(equation);
6127
+ }
6128
+ }
5821
6129
  }
5822
6130
  return texts.length > 0 ? texts.join(" ") : null;
5823
6131
  }
@@ -5831,6 +6139,13 @@ function extractTextBoxText(records, ctrlIdx) {
5831
6139
  const t = extractText(r.data).trim();
5832
6140
  if (t) texts.push(t);
5833
6141
  }
6142
+ if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
6143
+ const innerCtrlId = r.data.subarray(0, 4).toString("ascii");
6144
+ if (isEquationControlId(innerCtrlId)) {
6145
+ const equation = extractEquationFromControl(records, j);
6146
+ if (equation) texts.push(equation);
6147
+ }
6148
+ }
5834
6149
  }
5835
6150
  return texts.length > 0 ? texts.join("\n") : null;
5836
6151
  }
@@ -5899,6 +6214,12 @@ function parseParagraphWithTables(records, startIdx) {
5899
6214
  i = nextIdx;
5900
6215
  continue;
5901
6216
  }
6217
+ if (isEquationControlId(ctrlId)) {
6218
+ const equation = extractEquationFromControl(records, i);
6219
+ if (equation) {
6220
+ text = text ? text + " " + equation : equation;
6221
+ }
6222
+ }
5902
6223
  }
5903
6224
  i++;
5904
6225
  }
@@ -11208,526 +11529,6 @@ async function markdownToXlsx(markdown, options) {
11208
11529
  return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
11209
11530
  }
11210
11531
 
11211
- // src/convert/index.ts
11212
- import { readFile } from "fs/promises";
11213
-
11214
- // src/convert/libreoffice.ts
11215
- import libre from "libreoffice-convert";
11216
-
11217
- // src/convert/error.ts
11218
- var ConvertError = class extends Error {
11219
- constructor(code, message) {
11220
- super(message);
11221
- this.code = code;
11222
- this.name = "ConvertError";
11223
- }
11224
- };
11225
-
11226
- // src/convert/installer.ts
11227
- import { homedir } from "os";
11228
- import { join as join4, delimiter } from "path";
11229
- import { mkdir, access, symlink, rm } from "fs/promises";
11230
- import { createWriteStream } from "fs";
11231
- import { spawn as spawn2 } from "child_process";
11232
- var installInFlight = null;
11233
- var CACHE_DIR = join4(homedir(), ".cache", "kordoc", "libreoffice");
11234
- var VERSION_FILE = join4(CACHE_DIR, "version");
11235
- var PACKAGES = {
11236
- darwin: {
11237
- url: "https://ftp.osuosl.org/pub/tdf/libreoffice/stable/26.2.3/mac/x86_64/LibreOffice_26.2.3_MacOS_x86-64.dmg",
11238
- binPath: "LibreOffice.app/Contents/MacOS/soffice",
11239
- sizeMb: 300
11240
- },
11241
- linux: {
11242
- url: "https://ftp.osuosl.org/pub/tdf/libreoffice/stable/26.2.3/deb/x86_64/LibreOffice_26.2.3_Linux_x86-64_deb.tar.gz",
11243
- binPath: "opt/libreoffice26.2/program/soffice",
11244
- sizeMb: 210
11245
- },
11246
- win32: {
11247
- url: "https://ftp.osuosl.org/pub/tdf/libreoffice/stable/26.2.3/win/x86_64/LibreOffice_26.2.3_Win_x86-64.msi",
11248
- binPath: "LibreOffice/program/soffice.exe",
11249
- sizeMb: 360
11250
- }
11251
- };
11252
- async function findInPath() {
11253
- return new Promise((resolve4) => {
11254
- const child = spawn2("soffice", ["--version"], { stdio: "ignore" });
11255
- child.on("close", (code) => resolve4(code === 0 ? "soffice" : null));
11256
- child.on("error", () => resolve4(null));
11257
- });
11258
- }
11259
- async function findInCache() {
11260
- const cachedBin = join4(CACHE_DIR, "bin", "soffice");
11261
- try {
11262
- await access(cachedBin);
11263
- return cachedBin;
11264
- } catch {
11265
- return null;
11266
- }
11267
- }
11268
- async function findInDefaultPaths() {
11269
- const platform = process.platform;
11270
- const paths = [];
11271
- if (platform === "darwin") {
11272
- paths.push(
11273
- "/Applications/LibreOffice.app/Contents/MacOS/soffice",
11274
- "/opt/homebrew/bin/soffice",
11275
- "/usr/local/bin/soffice"
11276
- );
11277
- } else if (platform === "linux") {
11278
- paths.push(
11279
- "/usr/bin/soffice",
11280
- "/usr/lib/libreoffice/program/soffice"
11281
- );
11282
- } else if (platform === "win32") {
11283
- const pf = process.env["ProgramFiles"] ?? "C:\\Program Files";
11284
- const pf86 = process.env["ProgramFiles(x86)"] ?? "C:\\Program Files (x86)";
11285
- paths.push(
11286
- join4(pf, "LibreOffice", "program", "soffice.exe"),
11287
- join4(pf86, "LibreOffice", "program", "soffice.exe")
11288
- );
11289
- }
11290
- for (const p of paths) {
11291
- try {
11292
- await access(p);
11293
- return p;
11294
- } catch {
11295
- continue;
11296
- }
11297
- }
11298
- return null;
11299
- }
11300
- async function downloadWithProgress(url, dest, totalBytes, onProgress) {
11301
- const response = await fetch(url);
11302
- if (!response.ok) throw new Error(`\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: HTTP ${response.status} (${url})`);
11303
- if (!response.body) throw new Error("\uB2E4\uC6B4\uB85C\uB4DC \uC2E4\uD328: response body \uC5C6\uC74C");
11304
- const file = createWriteStream(dest);
11305
- const reader = response.body.getReader();
11306
- let downloaded = 0;
11307
- try {
11308
- while (true) {
11309
- const { done, value } = await reader.read();
11310
- if (done) break;
11311
- if (!file.write(value)) {
11312
- await new Promise((resolve4) => file.once("drain", resolve4));
11313
- }
11314
- downloaded += value.length;
11315
- onProgress?.(downloaded, totalBytes);
11316
- }
11317
- } finally {
11318
- reader.releaseLock();
11319
- await new Promise((resolve4, reject) => {
11320
- file.end((err) => err ? reject(err) : resolve4());
11321
- });
11322
- }
11323
- }
11324
- async function installForPlatform(pkg, onProgress) {
11325
- const platform = process.platform;
11326
- await mkdir(CACHE_DIR, { recursive: true });
11327
- const downloadPath = join4(CACHE_DIR, `download-${Date.now()}`);
11328
- await downloadWithProgress(pkg.url, downloadPath, pkg.sizeMb * 1024 * 1024, onProgress);
11329
- try {
11330
- if (platform === "darwin") {
11331
- return await installMacOS(pkg, downloadPath);
11332
- } else if (platform === "linux") {
11333
- return await installLinux(pkg, downloadPath);
11334
- } else if (platform === "win32") {
11335
- return await installWindows(pkg, downloadPath);
11336
- }
11337
- } catch (err) {
11338
- await rm(downloadPath, { force: true });
11339
- throw err;
11340
- }
11341
- throw new ConvertError("UNSUPPORTED_PLATFORM", `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4`);
11342
- }
11343
- async function installMacOS(pkg, downloadPath) {
11344
- const mountPoint = `/Volumes/LibreOffice_${Date.now()}`;
11345
- await new Promise((resolve4, reject) => {
11346
- const stderr = [];
11347
- const child = spawn2("hdiutil", ["attach", "-nobrowse", "-noverify", "-mountpoint", mountPoint, downloadPath]);
11348
- child.stderr?.on("data", (d) => stderr.push(d.toString()));
11349
- child.on(
11350
- "close",
11351
- (code) => code === 0 ? resolve4() : reject(new Error(`dmg \uB9C8\uC6B4\uD2B8 \uC2E4\uD328 (code=${code}): ${stderr.join("").trim()}`))
11352
- );
11353
- });
11354
- try {
11355
- const appSource = join4(mountPoint, "LibreOffice.app");
11356
- const appDest = join4(CACHE_DIR, "LibreOffice.app");
11357
- await new Promise((resolve4, reject) => {
11358
- const child = spawn2("cp", ["-R", appSource, appDest]);
11359
- child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(".app \uBCF5\uC0AC \uC2E4\uD328")));
11360
- });
11361
- } finally {
11362
- await new Promise((resolve4) => {
11363
- const child = spawn2("hdiutil", ["detach", mountPoint]);
11364
- child.on("close", () => resolve4());
11365
- });
11366
- }
11367
- await rm(downloadPath, { force: true });
11368
- return await createSymlink(join4(CACHE_DIR, pkg.binPath));
11369
- }
11370
- async function installLinux(pkg, downloadPath) {
11371
- const extractDir = join4(CACHE_DIR, `extract-${Date.now()}`);
11372
- await mkdir(extractDir, { recursive: true });
11373
- await new Promise((resolve4, reject) => {
11374
- const child = spawn2("tar", ["xzf", downloadPath, "-C", extractDir]);
11375
- child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("\uC555\uCD95 \uD574\uC81C \uC2E4\uD328")));
11376
- });
11377
- const debsDir = join4(extractDir, "DEBS");
11378
- try {
11379
- await access(debsDir);
11380
- const entries = await (await import("fs/promises")).readdir(debsDir);
11381
- for (const entry of entries) {
11382
- if (entry.endsWith(".deb")) {
11383
- await new Promise((resolve4, reject) => {
11384
- const child = spawn2("dpkg-deb", ["-x", join4(debsDir, entry), CACHE_DIR]);
11385
- child.on("close", (code) => code === 0 ? resolve4() : reject(new Error(`${entry} \uCD94\uCD9C \uC2E4\uD328`)));
11386
- });
11387
- }
11388
- }
11389
- } catch {
11390
- }
11391
- await rm(downloadPath, { force: true });
11392
- await rm(extractDir, { recursive: true, force: true });
11393
- return await createSymlink(join4(CACHE_DIR, pkg.binPath));
11394
- }
11395
- async function installWindows(pkg, downloadPath) {
11396
- await new Promise((resolve4, reject) => {
11397
- const child = spawn2("msiexec", ["/a", downloadPath, "/qn", `TARGETDIR=${CACHE_DIR}`]);
11398
- child.on("close", (code) => code === 0 ? resolve4() : reject(new Error("MSI \uC124\uCE58 \uC2E4\uD328")));
11399
- });
11400
- await rm(downloadPath, { force: true });
11401
- return join4(CACHE_DIR, pkg.binPath);
11402
- }
11403
- async function createSymlink(actualBin) {
11404
- const binDir = join4(CACHE_DIR, "bin");
11405
- await mkdir(binDir, { recursive: true });
11406
- const linkBin = join4(binDir, "soffice");
11407
- try {
11408
- await symlink(actualBin, linkBin);
11409
- } catch {
11410
- }
11411
- process.env.PATH = `${binDir}${delimiter}${process.env.PATH}`;
11412
- return linkBin;
11413
- }
11414
- async function installLibreOffice(onProgress) {
11415
- const platform = process.platform;
11416
- const pkg = PACKAGES[platform];
11417
- if (!pkg) {
11418
- throw new ConvertError(
11419
- "UNSUPPORTED_PLATFORM",
11420
- `${platform}\uC740 \uC790\uB3D9 \uC124\uCE58\uB97C \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4. \uC218\uB3D9\uC73C\uB85C LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694.`
11421
- );
11422
- }
11423
- return await installForPlatform(pkg, onProgress);
11424
- }
11425
- async function resolveSoffice(emitter, autoInstall = true) {
11426
- emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
11427
- const inPath = await findInPath();
11428
- if (inPath) {
11429
- emitter.validate("soffice_found", "\uC2DC\uC2A4\uD15C PATH\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inPath });
11430
- return inPath;
11431
- }
11432
- const inCache = await findInCache();
11433
- if (inCache) {
11434
- emitter.validate("soffice_found", "\uCE90\uC2DC\uB41C LibreOffice \uBC1C\uACAC", { sofficePath: inCache });
11435
- return inCache;
11436
- }
11437
- const inDefault = await findInDefaultPaths();
11438
- if (inDefault) {
11439
- emitter.validate("soffice_found", "\uAE30\uBCF8 \uACBD\uB85C\uC5D0\uC11C LibreOffice \uBC1C\uACAC", { sofficePath: inDefault });
11440
- return inDefault;
11441
- }
11442
- if (!autoInstall) {
11443
- emitter.error(
11444
- "validate",
11445
- "SOFFICE_NOT_FOUND",
11446
- "LibreOffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4",
11447
- "\uC218\uB3D9\uC73C\uB85C \uC124\uCE58\uD558\uAC70\uB098 autoInstallLibreOffice: true \uC635\uC158\uC744 \uC0AC\uC6A9\uD558\uC138\uC694."
11448
- );
11449
- throw new ConvertError("SOFFICE_NOT_FOUND", "LibreOffice\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4");
11450
- }
11451
- if (installInFlight) {
11452
- return installInFlight;
11453
- }
11454
- emitter.install("install_start", "LibreOffice \uC790\uB3D9 \uC124\uCE58\uB97C \uC2DC\uC791\uD569\uB2C8\uB2E4...");
11455
- installInFlight = (async () => {
11456
- try {
11457
- const installed = await installLibreOffice((downloaded, total) => {
11458
- const percent = Math.round(downloaded / total * 100);
11459
- emitter.install("download_progress", `\uB2E4\uC6B4\uB85C\uB4DC \uC911... ${percent}%`, {
11460
- percent,
11461
- downloadedBytes: downloaded,
11462
- totalBytes: total
11463
- });
11464
- });
11465
- emitter.install("install_complete", "\uC124\uCE58 \uC644\uB8CC", { installedPath: installed });
11466
- return installed;
11467
- } catch (err) {
11468
- const errorMsg = err instanceof Error ? err.message : String(err);
11469
- emitter.install("install_failed", "\uC124\uCE58 \uC2E4\uD328", { error: errorMsg });
11470
- throw err;
11471
- } finally {
11472
- installInFlight = null;
11473
- }
11474
- })();
11475
- return installInFlight;
11476
- }
11477
-
11478
- // src/convert/libreoffice.ts
11479
- var libreConvert = libre.convert;
11480
- var libreConvertWithOptions = libre.convertWithOptions;
11481
- async function convertBuffer(buffer, targetExt, timeoutMs = 6e4, sofficePath) {
11482
- return new Promise((resolve4, reject) => {
11483
- const timer = setTimeout(() => {
11484
- reject(
11485
- new ConvertError("TIMEOUT", `\uBCC0\uD658 \uD0C0\uC784\uC544\uC6C3 (${timeoutMs}ms \uCD08\uACFC)`)
11486
- );
11487
- }, timeoutMs);
11488
- const cb = (err, done) => {
11489
- clearTimeout(timer);
11490
- if (err || !done) {
11491
- reject(
11492
- new ConvertError(
11493
- "CONVERT_FAILED",
11494
- err?.message ?? "LibreOffice \uBCC0\uD658 \uC2E4\uD328"
11495
- )
11496
- );
11497
- return;
11498
- }
11499
- resolve4(done);
11500
- };
11501
- if (sofficePath) {
11502
- libreConvertWithOptions(buffer, targetExt, void 0, { sofficeBinaryPaths: [sofficePath] }, cb);
11503
- } else {
11504
- libreConvert(buffer, targetExt, void 0, cb);
11505
- }
11506
- });
11507
- }
11508
-
11509
- // src/convert/events.ts
11510
- var ConvertEventEmitter = class {
11511
- listener = null;
11512
- /** 이벤트 리스너 등록 */
11513
- setListener(listener) {
11514
- this.listener = listener;
11515
- }
11516
- /** 이벤트 발송 */
11517
- emit(event) {
11518
- try {
11519
- this.listener?.(event);
11520
- } catch {
11521
- }
11522
- }
11523
- /** 타입 안전한 헬퍼: detect 이벤트 */
11524
- detect(stage, message, meta) {
11525
- this.emit({ type: "detect", stage, message, ...meta });
11526
- }
11527
- /** 타입 안전한 헬퍼: validate 이벤트 */
11528
- validate(stage, message, meta) {
11529
- this.emit({ type: "validate", stage, message, ...meta });
11530
- }
11531
- /** 타입 안전한 헬퍼: install 이벤트 */
11532
- install(stage, message, meta) {
11533
- this.emit({ type: "install", stage, message, ...meta });
11534
- }
11535
- /** 타입 안전한 헬퍼: convert 진행 이벤트 */
11536
- progress(percent, message) {
11537
- this.emit({ type: "convert", stage: "convert_progress", message, percent });
11538
- }
11539
- /** 타입 안전한 헬퍼: convert 시작 */
11540
- convertStart(message) {
11541
- this.emit({ type: "convert", stage: "convert_start", message, percent: 0 });
11542
- }
11543
- /** 타입 안전한 헬퍼: convert 완료 */
11544
- convertDone(message) {
11545
- this.emit({ type: "convert", stage: "convert_done", message, percent: 100 });
11546
- }
11547
- /** 타입 안전한 헬퍼: 완료 이벤트 */
11548
- complete(result) {
11549
- this.emit({ type: "complete", stage: "success", message: "\uBCC0\uD658 \uC644\uB8CC", result });
11550
- }
11551
- /** 타입 안전한 헬퍼: 에러 이벤트 */
11552
- error(stage, code, message, suggestion) {
11553
- this.emit({ type: "error", stage, code, message, recoverable: true, suggestion });
11554
- }
11555
- };
11556
-
11557
- // src/convert/index.ts
11558
- var isConverting = false;
11559
- var queue = [];
11560
- async function acquireConvertLock() {
11561
- if (!isConverting) {
11562
- isConverting = true;
11563
- return () => {
11564
- isConverting = false;
11565
- const next = queue.shift();
11566
- next?.();
11567
- };
11568
- }
11569
- return new Promise((resolve4) => {
11570
- queue.push(() => {
11571
- isConverting = true;
11572
- resolve4(() => {
11573
- isConverting = false;
11574
- const next = queue.shift();
11575
- next?.();
11576
- });
11577
- });
11578
- });
11579
- }
11580
- async function convertToPdf(input, options) {
11581
- const emitter = new ConvertEventEmitter();
11582
- if (options?.onEvent) {
11583
- emitter.setListener(options.onEvent);
11584
- }
11585
- if (options?.onProgress) {
11586
- const legacyProgress = options.onProgress;
11587
- emitter.setListener((event) => {
11588
- if (event.type === "convert" && event.stage === "convert_progress") {
11589
- legacyProgress(event.percent, event.message);
11590
- }
11591
- });
11592
- }
11593
- try {
11594
- emitter.detect("reading", "\uC785\uB825 \uD30C\uC77C \uC77D\uB294 \uC911...");
11595
- let buffer;
11596
- try {
11597
- if (typeof input === "string") {
11598
- buffer = await readFile(input);
11599
- } else if (Buffer.isBuffer(input)) {
11600
- buffer = input;
11601
- } else {
11602
- buffer = Buffer.from(input);
11603
- }
11604
- } catch (err) {
11605
- emitter.error(
11606
- "detect",
11607
- "PARSE_ERROR",
11608
- `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`
11609
- );
11610
- return {
11611
- success: false,
11612
- code: "PARSE_ERROR",
11613
- error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
11614
- stage: "detect"
11615
- };
11616
- }
11617
- const MAX_FILE_SIZE = 500 * 1024 * 1024;
11618
- if (buffer.length > MAX_FILE_SIZE) {
11619
- emitter.error(
11620
- "detect",
11621
- "FILE_TOO_LARGE",
11622
- `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`
11623
- );
11624
- return {
11625
- success: false,
11626
- code: "FILE_TOO_LARGE",
11627
- error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
11628
- stage: "detect"
11629
- };
11630
- }
11631
- const format = detectFormat(toArrayBuffer(buffer));
11632
- emitter.detect("format_detected", `\uD3EC\uB9F7 \uAC10\uC9C0 \uC644\uB8CC: ${format}`, { format });
11633
- if (format !== "hwp" && format !== "hwpx") {
11634
- emitter.error("detect", "UNSUPPORTED_FORMAT", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`);
11635
- return {
11636
- success: false,
11637
- code: "UNSUPPORTED_FORMAT",
11638
- error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
11639
- stage: "detect"
11640
- };
11641
- }
11642
- emitter.validate("soffice_check", "LibreOffice \uAC00\uC6A9\uC131 \uD655\uC778 \uC911...");
11643
- let sofficePath;
11644
- try {
11645
- sofficePath = await resolveSoffice(emitter, options?.autoInstallLibreOffice ?? true);
11646
- } catch (err) {
11647
- if (err instanceof ConvertError) {
11648
- return {
11649
- success: false,
11650
- code: err.code,
11651
- error: err.message,
11652
- stage: "validate"
11653
- };
11654
- }
11655
- throw err;
11656
- }
11657
- const releaseLock = await acquireConvertLock();
11658
- try {
11659
- emitter.convertStart("\uBCC0\uD658 \uC2DC\uC791...");
11660
- emitter.progress(10, "\uBCC0\uD658 \uC911...");
11661
- const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs, sofficePath);
11662
- emitter.progress(100, "\uBCC0\uD658 \uC644\uB8CC");
11663
- emitter.convertDone("\uBCC0\uD658 \uC644\uB8CC");
11664
- const result = {
11665
- success: true,
11666
- pdf: new Uint8Array(pdf),
11667
- sourceFormat: format
11668
- };
11669
- emitter.complete({
11670
- sourceFormat: format,
11671
- pdfSize: pdf.length
11672
- });
11673
- return result;
11674
- } catch (err) {
11675
- if (err instanceof ConvertError) {
11676
- emitter.error("convert", err.code, err.message);
11677
- return {
11678
- success: false,
11679
- code: err.code,
11680
- error: err.message,
11681
- stage: "convert"
11682
- };
11683
- }
11684
- const errorMsg = err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328";
11685
- emitter.error("convert", classifyError(err), errorMsg);
11686
- return {
11687
- success: false,
11688
- code: classifyError(err),
11689
- error: errorMsg,
11690
- stage: "convert"
11691
- };
11692
- } finally {
11693
- releaseLock();
11694
- }
11695
- } catch (unexpectedErr) {
11696
- const errorMsg = unexpectedErr instanceof Error ? unexpectedErr.message : "\uC608\uC0C1\uCE58 \uBABB\uD55C \uC624\uB958";
11697
- emitter.error("convert", "PARSE_ERROR", errorMsg);
11698
- return {
11699
- success: false,
11700
- code: "PARSE_ERROR",
11701
- error: errorMsg,
11702
- stage: "convert"
11703
- };
11704
- }
11705
- }
11706
- async function convertHwpToPdf(input, options) {
11707
- const result = await convertToPdf(input, options);
11708
- if (result.success && result.sourceFormat !== "hwp") {
11709
- return {
11710
- success: false,
11711
- code: "UNSUPPORTED_FORMAT",
11712
- error: `HWP 5.x \uD3EC\uB9F7\uC774 \uC544\uB2D9\uB2C8\uB2E4: ${result.sourceFormat}`,
11713
- stage: "detect"
11714
- };
11715
- }
11716
- return result;
11717
- }
11718
- async function convertHwpxToPdf(input, options) {
11719
- const result = await convertToPdf(input, options);
11720
- if (result.success && result.sourceFormat !== "hwpx") {
11721
- return {
11722
- success: false,
11723
- code: "UNSUPPORTED_FORMAT",
11724
- error: `HWPX \uD3EC\uB9F7\uC774 \uC544\uB2D9\uB2C8\uB2E4: ${result.sourceFormat}`,
11725
- stage: "detect"
11726
- };
11727
- }
11728
- return result;
11729
- }
11730
-
11731
11532
  // src/ocr/api-key-rotation.ts
11732
11533
  var AllKeysCoolingDownError = class extends Error {
11733
11534
  waitMs;
@@ -11822,9 +11623,9 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
11822
11623
  };
11823
11624
 
11824
11625
  // src/pipeline/unified-ocr.ts
11825
- import { mkdir as mkdir2, readdir, readFile as readFile2, stat, writeFile as writeFile2 } from "fs/promises";
11826
- import { basename as basename2, delimiter as delimiter2, dirname as dirname3, extname, join as join5, resolve as resolve3 } from "path";
11827
- import { spawn as spawn3 } from "child_process";
11626
+ import { mkdir, readdir, readFile, stat, writeFile } from "fs/promises";
11627
+ import { basename as basename2, dirname as dirname3, extname, join as join4, resolve as resolve3 } from "path";
11628
+ import { spawn as spawn2 } from "child_process";
11828
11629
  import { performance } from "perf_hooks";
11829
11630
  init_logger();
11830
11631
 
@@ -11960,13 +11761,13 @@ function elapsedMs(startAt) {
11960
11761
  async function runUnifiedOcrPipeline(inputPath, options = {}) {
11961
11762
  const absInput = resolve3(inputPath);
11962
11763
  const stem = basename2(absInput, extname(absInput));
11963
- const workspaceDir = resolve3(options.workspaceDir ?? join5(dirname3(absInput), `${stem}_ocr_workspace`));
11964
- const imagesDir = join5(workspaceDir, "images");
11965
- const rawDir = join5(workspaceDir, "ocr", "raw");
11966
- const diffDir = join5(workspaceDir, "ocr", "diff");
11967
- const outputPath = resolve3(options.outputPath ?? join5(dirname3(absInput), `${stem}.md`));
11968
- const reportPath = join5(workspaceDir, "run-report.json");
11969
- const modelCachePath = join5(dirname3(absInput), ".kordoc-model-cache.json");
11764
+ const workspaceDir = resolve3(options.workspaceDir ?? join4(dirname3(absInput), `${stem}_ocr_workspace`));
11765
+ const imagesDir = join4(workspaceDir, "images");
11766
+ const rawDir = join4(workspaceDir, "ocr", "raw");
11767
+ const diffDir = join4(workspaceDir, "ocr", "diff");
11768
+ const outputPath = resolve3(options.outputPath ?? join4(dirname3(absInput), `${stem}.md`));
11769
+ const reportPath = join4(workspaceDir, "run-report.json");
11770
+ const modelCachePath = join4(dirname3(absInput), ".kordoc-model-cache.json");
11970
11771
  const baseUrl = options.baseUrl ?? "https://integrate.api.nvidia.com/v1/chat/completions";
11971
11772
  const timeoutMs = options.timeoutMs ?? 6e4;
11972
11773
  const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
@@ -11977,12 +11778,11 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11977
11778
  const models = sortModelsByCache(modelsInput, modelCache);
11978
11779
  const modelMaxTokens = { ...DEFAULT_MODEL_MAX_TOKENS, ...options.modelMaxTokens ?? {} };
11979
11780
  const stageWeights = normalizeWeights({ ...DEFAULT_STAGE_WEIGHTS, ...options.stageWeights ?? {} });
11980
- const keyPool = ApiKeyRotationPool.fromEnv();
11981
11781
  const runId = options.runId ?? generateRunId("ocr");
11982
11782
  const logger = (options.logger ?? createLoggerFromEnv()).withRun(runId).child({ component: "pipeline/unified-ocr.ts" });
11983
- await mkdir2(imagesDir, { recursive: true });
11984
- await mkdir2(rawDir, { recursive: true });
11985
- await mkdir2(diffDir, { recursive: true });
11783
+ await mkdir(imagesDir, { recursive: true });
11784
+ await mkdir(rawDir, { recursive: true });
11785
+ await mkdir(diffDir, { recursive: true });
11986
11786
  const timingsMs = {};
11987
11787
  const markStageStart = (stage, message) => emitProgress(options.onEvent, stage, 0, stageWeights, { message, type: "stage_start" });
11988
11788
  const markStageProgress = (stage, stagePercent, current, total, message, model) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message, model });
@@ -11993,51 +11793,57 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11993
11793
  };
11994
11794
  try {
11995
11795
  ensureSupportedInput(absInput);
11996
- let workingPdfPath = absInput;
11997
11796
  const convertStart = performance.now();
11998
11797
  currentStage = "convert";
11999
- markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
12000
- logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
12001
11798
  if (extname(absInput).toLowerCase() !== ".pdf") {
12002
- const convertEmitter = new ConvertEventEmitter();
12003
- if (options.onEvent) {
12004
- convertEmitter.setListener((evt) => {
12005
- if (evt.type === "install" || evt.type === "validate" || evt.type === "error") {
12006
- try {
12007
- ;
12008
- options.onEvent(evt);
12009
- } catch {
12010
- }
12011
- }
12012
- });
12013
- }
12014
- let resolvedSofficePath;
12015
- if (options.sofficePath) {
12016
- const sofficeDir = dirname3(options.sofficePath);
12017
- process.env.PATH = `${sofficeDir}${delimiter2}${process.env.PATH ?? ""}`;
12018
- convertEmitter.validate("soffice_found", "\uC9C1\uC811 \uC9C0\uC815\uB41C LibreOffice \uACBD\uB85C \uC0AC\uC6A9", { sofficePath: options.sofficePath });
12019
- resolvedSofficePath = options.sofficePath;
12020
- } else {
12021
- resolvedSofficePath = await resolveSoffice(convertEmitter, options.autoInstallLibreOffice ?? false);
12022
- }
12023
- workingPdfPath = join5(workspaceDir, `${stem}.pdf`);
12024
- const inputBuffer = await readFile2(absInput);
12025
- const out = await convertBuffer(inputBuffer, ".pdf", 5 * 6e4, resolvedSofficePath);
12026
- await writeFile2(workingPdfPath, out);
11799
+ markStageStart("convert", "\uC790\uCCB4 \uD30C\uC11C\uB85C Markdown \uBCC0\uD658 \uC911");
11800
+ logStage("info", "convert", "start", "\uC790\uCCB4 \uD30C\uC11C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
11801
+ const inputBuffer = await readFile(absInput);
11802
+ const parsed = await parseNativeDocument(inputBuffer);
11803
+ timingsMs.convert = elapsedMs(convertStart);
11804
+ markStageDone("convert", "\uC790\uCCB4 \uD30C\uC11C \uBCC0\uD658 \uC644\uB8CC");
11805
+ logStage("info", "convert", "done", "\uC790\uCCB4 \uD30C\uC11C \uBCC0\uD658 \uC644\uB8CC", { format: parsed.fileType, elapsedMs: timingsMs.convert });
11806
+ const mergeStart2 = performance.now();
11807
+ currentStage = "merge";
11808
+ markStageStart("merge", "Markdown \uC800\uC7A5 \uC911");
11809
+ await writeFile(outputPath, parsed.markdown, "utf-8");
11810
+ timingsMs.merge = elapsedMs(mergeStart2);
11811
+ markStageDone("merge", "Markdown \uC800\uC7A5 \uC644\uB8CC");
11812
+ logStage("info", "merge", "done", "Markdown \uC800\uC7A5 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
11813
+ const report2 = {
11814
+ inputPath: absInput,
11815
+ outputPath,
11816
+ workspaceDir,
11817
+ selectedModel: "native-parser",
11818
+ probeImage: "",
11819
+ probeResults: [],
11820
+ pageCount: parsed.pageCount,
11821
+ sourceFormat: parsed.fileType,
11822
+ keyHealth: [],
11823
+ timingsMs,
11824
+ modelCachePath
11825
+ };
11826
+ await writeFile(reportPath, JSON.stringify(report2, null, 2), "utf-8");
11827
+ logStage("info", "finalize", "done", "native parse run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
11828
+ return { outputPath, reportPath, selectedModel: "native-parser" };
12027
11829
  }
11830
+ const workingPdfPath = absInput;
11831
+ markStageStart("convert", "PDF \uC785\uB825 \uD655\uC778 \uC911");
11832
+ logStage("info", "convert", "start", "PDF \uC785\uB825 \uD655\uC778", { input: absInput });
12028
11833
  timingsMs.convert = elapsedMs(convertStart);
12029
- markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
12030
- logStage("info", "convert", "done", "PDF \uBCC0\uD658 \uC644\uB8CC", { elapsedMs: timingsMs.convert });
11834
+ markStageDone("convert", "PDF \uC785\uB825 \uD655\uC778 \uC644\uB8CC");
11835
+ logStage("info", "convert", "done", "PDF \uC785\uB825 \uD655\uC778 \uC644\uB8CC", { elapsedMs: timingsMs.convert });
11836
+ const keyPool = ApiKeyRotationPool.fromEnv();
12031
11837
  const renderStart = performance.now();
12032
11838
  currentStage = "render";
12033
11839
  const totalPages = await getPdfPageCount(workingPdfPath).catch(() => 0);
12034
11840
  if (totalPages === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC218\uB97C \uD655\uC778\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
12035
11841
  markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
12036
11842
  logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi, totalPages });
12037
- await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, join5(imagesDir, "page")]);
11843
+ await runCommand("pdftoppm", ["-png", "-r", String(dpi), "-f", "1", "-l", "1", workingPdfPath, join4(imagesDir, "page")]);
12038
11844
  const firstFiles = (await readdir(imagesDir)).filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
12039
11845
  if (firstFiles.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uCCAB \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328");
12040
- const probeImage = join5(imagesDir, firstFiles[0]);
11846
+ const probeImage = join4(imagesDir, firstFiles[0]);
12041
11847
  markStageProgress("render", Math.round(1 / totalPages * 100), 1, totalPages, `\uD398\uC774\uC9C0 1/${totalPages} \uB80C\uB354\uB9C1`);
12042
11848
  const probeStart = performance.now();
12043
11849
  currentStage = "probe";
@@ -12073,7 +11879,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
12073
11879
  const keyCount = keyPool.snapshot().length;
12074
11880
  const workerCount = Math.max(1, keyCount * concurrencyPerKey);
12075
11881
  const queueCapacity = workerCount * 2;
12076
- const queue2 = new BoundedQueue(queueCapacity);
11882
+ const queue = new BoundedQueue(queueCapacity);
12077
11883
  const ocrStart = performance.now();
12078
11884
  currentStage = "ocr";
12079
11885
  markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (\uC6CC\uCEE4 ${workerCount}\uAC1C)`);
@@ -12081,17 +11887,17 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
12081
11887
  let renderDone = 1;
12082
11888
  const renderProducer = (async () => {
12083
11889
  try {
12084
- await queue2.enqueue({ pageNumber: 1, imagePath: probeImage });
11890
+ await queue.enqueue({ pageNumber: 1, imagePath: probeImage });
12085
11891
  if (totalPages > 1) {
12086
- for await (const item of renderPdfToPngStream(workingPdfPath, join5(imagesDir, "page"), dpi, totalPages, 2)) {
12087
- await queue2.enqueue(item);
11892
+ for await (const item of renderPdfToPngStream(workingPdfPath, join4(imagesDir, "page"), dpi, totalPages, 2)) {
11893
+ await queue.enqueue(item);
12088
11894
  renderDone++;
12089
11895
  markStageProgress("render", Math.round(renderDone / totalPages * 100), renderDone, totalPages, `\uD398\uC774\uC9C0 ${renderDone}/${totalPages} \uB80C\uB354\uB9C1`);
12090
11896
  logStage("debug", "render", "progress", "\uD398\uC774\uC9C0 \uB80C\uB354 \uC644\uB8CC", { page: item.pageNumber });
12091
11897
  }
12092
11898
  }
12093
11899
  } finally {
12094
- queue2.close();
11900
+ queue.close();
12095
11901
  timingsMs.render = elapsedMs(renderStart);
12096
11902
  markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
12097
11903
  logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", { pages: renderDone, elapsedMs: timingsMs.render });
@@ -12100,7 +11906,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
12100
11906
  const [, pageResultsMap] = await Promise.all([
12101
11907
  renderProducer,
12102
11908
  ocrWorkerPool({
12103
- queue: queue2,
11909
+ queue,
12104
11910
  workerCount,
12105
11911
  totalPages,
12106
11912
  ocrInput: {
@@ -12133,8 +11939,8 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
12133
11939
  const sortedEntries = Array.from(pageResultsMap.entries()).sort((a, b) => a[0] - b[0]);
12134
11940
  const rawPagePaths = [];
12135
11941
  for (const [pageNum, markdown] of sortedEntries) {
12136
- const pagePath = join5(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
12137
- await writeFile2(pagePath, markdown, "utf-8");
11942
+ const pagePath = join4(rawDir, `page_${String(pageNum).padStart(4, "0")}.md`);
11943
+ await writeFile(pagePath, markdown, "utf-8");
12138
11944
  rawPagePaths.push(pagePath);
12139
11945
  }
12140
11946
  const mergeStart = performance.now();
@@ -12142,7 +11948,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
12142
11948
  markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
12143
11949
  logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: rawPagePaths.length });
12144
11950
  const merged = await mergeMarkdownPages(rawPagePaths);
12145
- await writeFile2(outputPath, merged, "utf-8");
11951
+ await writeFile(outputPath, merged, "utf-8");
12146
11952
  timingsMs.merge = elapsedMs(mergeStart);
12147
11953
  markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
12148
11954
  logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
@@ -12158,7 +11964,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
12158
11964
  timingsMs,
12159
11965
  modelCachePath
12160
11966
  };
12161
- await writeFile2(reportPath, JSON.stringify(report, null, 2), "utf-8");
11967
+ await writeFile(reportPath, JSON.stringify(report, null, 2), "utf-8");
12162
11968
  logStage("info", "finalize", "done", "run-report \uC800\uC7A5 \uC644\uB8CC", { reportPath });
12163
11969
  return { outputPath, reportPath, selectedModel };
12164
11970
  } catch (err) {
@@ -12249,7 +12055,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
12249
12055
  ]);
12250
12056
  const files = await readdir(imagesDir);
12251
12057
  const pageFiles = files.filter((f) => f.endsWith(".png")).sort((a, b) => naturalPageSort(a, b));
12252
- const imagePath = join5(imagesDir, pageFiles[pageFiles.length - 1]);
12058
+ const imagePath = join4(imagesDir, pageFiles[pageFiles.length - 1]);
12253
12059
  yield { pageNumber: page, imagePath };
12254
12060
  } catch (err) {
12255
12061
  yield {
@@ -12262,7 +12068,7 @@ async function* renderPdfToPngStream(pdfPath, prefixPath, dpi, totalPages, start
12262
12068
  }
12263
12069
  async function runCommand(cmd, args) {
12264
12070
  await new Promise((resolvePromise, reject) => {
12265
- const child = spawn3(cmd, args, { stdio: "pipe" });
12071
+ const child = spawn2(cmd, args, { stdio: "pipe" });
12266
12072
  let stderr = "";
12267
12073
  child.stderr.on("data", (d) => {
12268
12074
  stderr += String(d);
@@ -12276,7 +12082,7 @@ async function runCommand(cmd, args) {
12276
12082
  }
12277
12083
  async function runCommandWithStdout(cmd, args) {
12278
12084
  return await new Promise((resolvePromise, reject) => {
12279
- const child = spawn3(cmd, args, { stdio: "pipe" });
12085
+ const child = spawn2(cmd, args, { stdio: "pipe" });
12280
12086
  let stdout = "";
12281
12087
  let stderr = "";
12282
12088
  child.stdout.on("data", (d) => {
@@ -12292,6 +12098,32 @@ async function runCommandWithStdout(cmd, args) {
12292
12098
  });
12293
12099
  });
12294
12100
  }
12101
+ async function parseNativeDocument(buffer) {
12102
+ const arrayBuffer = toArrayBuffer(buffer);
12103
+ const format = detectFormat(arrayBuffer);
12104
+ let result;
12105
+ let fileType;
12106
+ if (format === "hwp") {
12107
+ result = parseHwp5Document(buffer);
12108
+ fileType = "hwp";
12109
+ } else if (format === "hwpx") {
12110
+ const { format: zipFormat, zip } = await detectZipFormat(arrayBuffer);
12111
+ if (zipFormat === "xlsx") {
12112
+ result = await parseXlsxDocument(arrayBuffer, void 0, zip ?? void 0);
12113
+ fileType = "xlsx";
12114
+ } else if (zipFormat === "docx") {
12115
+ result = await parseDocxDocument(arrayBuffer, void 0, zip ?? void 0);
12116
+ fileType = "docx";
12117
+ } else {
12118
+ result = await parseHwpxDocument(arrayBuffer, void 0, zip ?? void 0);
12119
+ fileType = "hwpx";
12120
+ }
12121
+ } else {
12122
+ throw new UnifiedOcrError("UNSUPPORTED_INPUT", "convert", `\uC790\uCCB4 \uD30C\uC11C\uB85C \uCC98\uB9AC\uD560 \uC218 \uC5C6\uB294 \uC785\uB825 \uD3EC\uB9F7: ${format}`);
12123
+ }
12124
+ const pageCount = result.metadata?.pageCount ?? Math.max(1, ...result.blocks.map((block) => block.pageNumber ?? 1));
12125
+ return { markdown: result.markdown, fileType, pageCount };
12126
+ }
12295
12127
  function naturalPageSort(a, b) {
12296
12128
  const na = Number((a.match(/\d+/g) || ["0"]).at(-1) || 0);
12297
12129
  const nb = Number((b.match(/\d+/g) || ["0"]).at(-1) || 0);
@@ -12365,7 +12197,7 @@ function startParallelProbeRuns(input) {
12365
12197
  }
12366
12198
  async function loadModelCache(path) {
12367
12199
  try {
12368
- const raw = await readFile2(path, "utf-8");
12200
+ const raw = await readFile(path, "utf-8");
12369
12201
  return JSON.parse(raw);
12370
12202
  } catch {
12371
12203
  return null;
@@ -12396,15 +12228,15 @@ async function updateModelCache(path, probes) {
12396
12228
  }
12397
12229
  }
12398
12230
  current.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
12399
- await writeFile2(path, JSON.stringify(current, null, 2), "utf-8");
12231
+ await writeFile(path, JSON.stringify(current, null, 2), "utf-8");
12400
12232
  }
12401
12233
  async function ocrWorkerPool(input) {
12402
- const { queue: queue2, workerCount, ocrInput, onPageDone } = input;
12234
+ const { queue, workerCount, ocrInput, onPageDone } = input;
12403
12235
  const results = /* @__PURE__ */ new Map();
12404
12236
  let completedCount = 0;
12405
12237
  async function worker() {
12406
12238
  while (true) {
12407
- const item = await queue2.dequeue();
12239
+ const item = await queue.dequeue();
12408
12240
  if (item === QUEUE_DONE) break;
12409
12241
  const { pageNumber, imagePath, error } = item;
12410
12242
  if (imagePath === null) {
@@ -12456,7 +12288,7 @@ async function ocrImageWithFallback(input) {
12456
12288
  async function mergeMarkdownPages(paths) {
12457
12289
  const out = [];
12458
12290
  for (let i = 0; i < paths.length; i++) {
12459
- const txt = (await readFile2(paths[i], "utf-8")).trim();
12291
+ const txt = (await readFile(paths[i], "utf-8")).trim();
12460
12292
  if (!txt) continue;
12461
12293
  out.push(txt);
12462
12294
  }
@@ -12572,7 +12404,7 @@ async function ocrImageViaNim(input) {
12572
12404
  throw new UnifiedOcrError("OCR_FAILED", "ocr", `OCR \uC7AC\uC2DC\uB3C4 \uCD08\uACFC: ${lastErr}`);
12573
12405
  }
12574
12406
  async function encodeBase64(path) {
12575
- const b = await readFile2(path);
12407
+ const b = await readFile(path);
12576
12408
  return b.toString("base64");
12577
12409
  }
12578
12410
  function stripCodeFence3(text) {
@@ -12592,16 +12424,6 @@ function ensureSupportedInput(path) {
12592
12424
  }
12593
12425
  function normalizePipelineError(err, stage) {
12594
12426
  if (err instanceof UnifiedOcrError) return err;
12595
- if (err instanceof ConvertError) {
12596
- const codeMap = {
12597
- SOFFICE_NOT_FOUND: "SOFFICE_NOT_FOUND",
12598
- CONVERT_FAILED: "CONVERT_FAILED",
12599
- TIMEOUT: "CONVERT_FAILED",
12600
- UNSUPPORTED_PLATFORM: "CONVERT_FAILED",
12601
- UNSUPPORTED_FORMAT: "UNSUPPORTED_INPUT"
12602
- };
12603
- return new UnifiedOcrError(codeMap[err.code] ?? "CONVERT_FAILED", stage, err.message);
12604
- }
12605
12427
  const message = err instanceof Error ? err.message : String(err);
12606
12428
  const codeByStage = {
12607
12429
  convert: "CONVERT_FAILED",
@@ -12621,7 +12443,7 @@ async function parse2(input, options) {
12621
12443
  let buffer;
12622
12444
  if (typeof input === "string") {
12623
12445
  try {
12624
- const buf = await readFile3(input);
12446
+ const buf = await readFile2(input);
12625
12447
  buffer = toArrayBuffer(buf);
12626
12448
  } catch (err) {
12627
12449
  const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
@@ -12780,9 +12602,6 @@ export {
12780
12602
  VERSION,
12781
12603
  blocksToMarkdown,
12782
12604
  compare,
12783
- convertHwpToPdf,
12784
- convertHwpxToPdf,
12785
- convertToPdf,
12786
12605
  detectFormat,
12787
12606
  detectZipFormat,
12788
12607
  diffBlocks,