kordoc 2.2.5 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +16 -4
  2. package/dist/{chunk-UU2O6D3R.js → chunk-JFTFC2BB.js} +2 -2
  3. package/dist/{chunk-JH5XLWJQ.js.map → chunk-JFTFC2BB.js.map} +1 -1
  4. package/dist/{chunk-5Y2Q3BRW.js → chunk-M3E3C5GS.js} +8 -1
  5. package/dist/chunk-M3E3C5GS.js.map +1 -0
  6. package/dist/{chunk-RQWICKON.js → chunk-OEJJPCMM.js} +369 -73
  7. package/dist/chunk-OEJJPCMM.js.map +1 -0
  8. package/dist/{chunk-JH5XLWJQ.js → chunk-Z7UPTVMX.js} +2 -2
  9. package/dist/{chunk-UU2O6D3R.js.map → chunk-Z7UPTVMX.js.map} +1 -1
  10. package/dist/{chunk-OJ4QR33V.cjs → chunk-ZNJPRRIA.cjs} +2 -2
  11. package/dist/{chunk-OJ4QR33V.cjs.map → chunk-ZNJPRRIA.cjs.map} +1 -1
  12. package/dist/cli.js +7 -4
  13. package/dist/cli.js.map +1 -1
  14. package/dist/{detect-GYK3HKD5.js → detect-I7YIS4Q6.js} +4 -2
  15. package/dist/index.cjs +463 -160
  16. package/dist/index.cjs.map +1 -1
  17. package/dist/index.d.cts +4 -2
  18. package/dist/index.d.ts +4 -2
  19. package/dist/index.js +387 -84
  20. package/dist/index.js.map +1 -1
  21. package/dist/mcp.js +5 -5
  22. package/dist/{parser-OIRWPKIQ.js → parser-25LF2S2J.js} +45 -42
  23. package/dist/{parser-OIRWPKIQ.js.map → parser-25LF2S2J.js.map} +1 -1
  24. package/dist/{parser-PXD73E4H.js → parser-4LKJXBPP.js} +45 -42
  25. package/dist/{parser-PXD73E4H.js.map → parser-4LKJXBPP.js.map} +1 -1
  26. package/dist/{parser-CYBX5MP4.cjs → parser-KBQZB3QY.cjs} +61 -58
  27. package/dist/{parser-CYBX5MP4.cjs.map → parser-KBQZB3QY.cjs.map} +1 -1
  28. package/dist/{watch-NSBABJ4A.js → watch-GXRBLW3Y.js} +4 -4
  29. package/package.json +2 -2
  30. package/dist/chunk-5Y2Q3BRW.js.map +0 -1
  31. package/dist/chunk-RQWICKON.js.map +0 -1
  32. /package/dist/{detect-GYK3HKD5.js.map → detect-I7YIS4Q6.js.map} +0 -0
  33. /package/dist/{watch-NSBABJ4A.js.map → watch-GXRBLW3Y.js.map} +0 -0
package/dist/index.js CHANGED
@@ -16,7 +16,7 @@ import {
16
16
  sanitizeHref,
17
17
  stripDtd,
18
18
  toArrayBuffer
19
- } from "./chunk-UU2O6D3R.js";
19
+ } from "./chunk-JFTFC2BB.js";
20
20
  import {
21
21
  parsePageRange
22
22
  } from "./chunk-SBVRCJFH.js";
@@ -44,11 +44,17 @@ function isPdfFile(buffer) {
44
44
  const b = magicBytes(buffer);
45
45
  return b[0] === 37 && b[1] === 80 && b[2] === 68 && b[3] === 70;
46
46
  }
47
+ function isHwpmlFile(buffer) {
48
+ const bytes = new Uint8Array(buffer, 0, Math.min(512, buffer.byteLength));
49
+ const head = new TextDecoder("utf-8", { fatal: false }).decode(bytes).replace(/^\uFEFF/, "");
50
+ return head.trimStart().startsWith("<?xml") && head.includes("<HWPML");
51
+ }
47
52
  function detectFormat(buffer) {
48
53
  if (buffer.byteLength < 4) return "unknown";
49
54
  if (isZipFile(buffer)) return "hwpx";
50
55
  if (isOldHwpFile(buffer)) return "hwp";
51
56
  if (isPdfFile(buffer)) return "pdf";
57
+ if (isHwpmlFile(buffer)) return "hwpml";
52
58
  return "unknown";
53
59
  }
54
60
  async function detectZipFormat(buffer) {
@@ -184,6 +190,7 @@ async function parseHwpxDocument(buffer, options) {
184
190
  const pageFilter = options?.pages ? parsePageRange(options.pages, sectionPaths.length) : null;
185
191
  const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
186
192
  const blocks = [];
193
+ const nestedTableCounter = { count: 0 };
187
194
  let parsedSections = 0;
188
195
  for (let si = 0; si < sectionPaths.length; si++) {
189
196
  if (pageFilter && !pageFilter.has(si + 1)) continue;
@@ -193,7 +200,7 @@ async function parseHwpxDocument(buffer, options) {
193
200
  const xml = await file.async("text");
194
201
  decompressed.total += xml.length * 2;
195
202
  if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new KordocError("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
196
- blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1));
203
+ blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, nestedTableCounter));
197
204
  parsedSections++;
198
205
  options?.onProgress?.(parsedSections, totalTarget);
199
206
  } catch (secErr) {
@@ -254,8 +261,20 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
254
261
  ref
255
262
  // 절대 경로일 수도 있음
256
263
  ];
264
+ let resolvedPath = null;
265
+ if (!ref.includes(".")) {
266
+ const prefixes = [`BinData/${ref}`, `Contents/BinData/${ref}`];
267
+ for (const prefix of prefixes) {
268
+ const match = zip.file(new RegExp(`^${prefix.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\.[a-zA-Z0-9]+$`));
269
+ if (match.length > 0) {
270
+ resolvedPath = match[0].name;
271
+ break;
272
+ }
273
+ }
274
+ }
257
275
  let found = false;
258
- for (const path of candidates) {
276
+ const allCandidates = resolvedPath ? [resolvedPath, ...candidates] : candidates;
277
+ for (const path of allCandidates) {
259
278
  if (isPathTraversal(path)) continue;
260
279
  const file = zip.file(path);
261
280
  if (!file) continue;
@@ -263,7 +282,8 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
263
282
  const data = await file.async("uint8array");
264
283
  decompressed.total += data.length;
265
284
  if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new KordocError("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
266
- const ext = ref.includes(".") ? ref.split(".").pop() || "png" : "png";
285
+ const actualPath = path;
286
+ const ext = actualPath.includes(".") ? actualPath.split(".").pop() || "png" : "png";
267
287
  const mimeType = imageExtToMime(ext);
268
288
  imageIndex++;
269
289
  const filename = `image_${String(imageIndex).padStart(3, "0")}.${mimeToExt(mimeType)}`;
@@ -336,6 +356,7 @@ function extractFromBrokenZip(buffer) {
336
356
  let totalDecompressed = 0;
337
357
  let entryCount = 0;
338
358
  let sectionNum = 0;
359
+ const nestedTableCounter = { count: 0 };
339
360
  while (pos < data.length - 30) {
340
361
  if (data[pos] !== 80 || data[pos + 1] !== 75 || data[pos + 2] !== 3 || data[pos + 3] !== 4) {
341
362
  pos++;
@@ -382,7 +403,7 @@ function extractFromBrokenZip(buffer) {
382
403
  totalDecompressed += content.length * 2;
383
404
  if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new KordocError("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
384
405
  sectionNum++;
385
- blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum));
406
+ blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, nestedTableCounter));
386
407
  } catch {
387
408
  continue;
388
409
  }
@@ -467,12 +488,40 @@ function detectHwpxHeadings(blocks, styleMap) {
467
488
  }
468
489
  }
469
490
  }
470
- function parseSectionXml(xml, styleMap, warnings, sectionNum) {
491
+ function makeNestedTableMarker(counter, rows) {
492
+ counter.count++;
493
+ const firstRow = rows[0] ?? [];
494
+ const hint = firstRow.map((c) => c.text.trim().replace(/\n/g, " ")).filter(Boolean).join(" | ");
495
+ const hintChars = [...hint];
496
+ const truncated = hintChars.length > 60 ? hintChars.slice(0, 60).join("") + "\u2026" : hint;
497
+ return truncated ? `[\uC911\uCCA9 \uD14C\uC774\uBE14 #${counter.count}: ${truncated}]` : `[\uC911\uCCA9 \uD14C\uC774\uBE14 #${counter.count}]`;
498
+ }
499
+ function handleNestedTable(newTable, tableStack, blocks, ctx) {
500
+ const parentTable = tableStack.pop();
501
+ let nestedCols = 0;
502
+ for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
503
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
504
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: ctx.sectionNum });
505
+ if (parentTable.cell) {
506
+ const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
507
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker;
508
+ }
509
+ } else {
510
+ const nestedText = convertTableToText(newTable.rows);
511
+ if (parentTable.cell) {
512
+ const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
513
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker + "\n" + nestedText;
514
+ }
515
+ }
516
+ return parentTable;
517
+ }
518
+ function parseSectionXml(xml, styleMap, warnings, sectionNum, counter) {
471
519
  const parser = createXmlParser(warnings);
472
520
  const doc = parser.parseFromString(stripDtd(xml), "text/xml");
473
521
  if (!doc.documentElement) return [];
474
522
  const blocks = [];
475
- walkSection(doc.documentElement, blocks, null, [], styleMap, warnings, sectionNum);
523
+ const ctx = { styleMap, warnings, sectionNum, counter };
524
+ walkSection(doc.documentElement, blocks, null, [], ctx);
476
525
  return blocks;
477
526
  }
478
527
  function extractImageRef(el) {
@@ -493,7 +542,7 @@ function extractImageRef(el) {
493
542
  if (directRef) return directRef;
494
543
  return null;
495
544
  }
496
- function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth = 0) {
545
+ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
497
546
  if (depth > MAX_XML_DEPTH) return;
498
547
  const children = node.childNodes;
499
548
  if (!children) return;
@@ -506,23 +555,12 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
506
555
  case "tbl": {
507
556
  if (tableCtx) tableStack.push(tableCtx);
508
557
  const newTable = { rows: [], currentRow: [], cell: null };
509
- walkSection(el, blocks, newTable, tableStack, styleMap, warnings, sectionNum, depth + 1);
558
+ walkSection(el, blocks, newTable, tableStack, ctx, depth + 1);
510
559
  if (newTable.rows.length > 0) {
511
560
  if (tableStack.length > 0) {
512
- const parentTable = tableStack.pop();
513
- let nestedCols = 0;
514
- for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
515
- if (newTable.rows.length >= 3 && nestedCols >= 2) {
516
- blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
517
- } else {
518
- const nestedText = convertTableToText(newTable.rows);
519
- if (parentTable.cell) {
520
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
521
- }
522
- }
523
- tableCtx = parentTable;
561
+ tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
524
562
  } else {
525
- blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
563
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: ctx.sectionNum });
526
564
  tableCtx = null;
527
565
  }
528
566
  } else {
@@ -533,7 +571,7 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
533
571
  case "tr":
534
572
  if (tableCtx) {
535
573
  tableCtx.currentRow = [];
536
- walkSection(el, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth + 1);
574
+ walkSection(el, blocks, tableCtx, tableStack, ctx, depth + 1);
537
575
  if (tableCtx.currentRow.length > 0) tableCtx.rows.push(tableCtx.currentRow);
538
576
  tableCtx.currentRow = [];
539
577
  }
@@ -541,7 +579,7 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
541
579
  case "tc":
542
580
  if (tableCtx) {
543
581
  tableCtx.cell = { text: "", colSpan: 1, rowSpan: 1 };
544
- walkSection(el, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth + 1);
582
+ walkSection(el, blocks, tableCtx, tableStack, ctx, depth + 1);
545
583
  if (tableCtx.cell) {
546
584
  tableCtx.currentRow.push(tableCtx.cell);
547
585
  tableCtx.cell = null;
@@ -567,19 +605,19 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
567
605
  }
568
606
  break;
569
607
  case "p": {
570
- const { text, href, footnote, style } = extractParagraphInfo(el, styleMap);
608
+ const { text, href, footnote, style } = extractParagraphInfo(el, ctx.styleMap);
571
609
  if (text) {
572
610
  if (tableCtx?.cell) {
573
611
  tableCtx.cell.text += (tableCtx.cell.text ? "\n" : "") + text;
574
612
  } else if (!tableCtx) {
575
- const block = { type: "paragraph", text, pageNumber: sectionNum };
613
+ const block = { type: "paragraph", text, pageNumber: ctx.sectionNum };
576
614
  if (style) block.style = style;
577
615
  if (href) block.href = href;
578
616
  if (footnote) block.footnoteText = footnote;
579
617
  blocks.push(block);
580
618
  }
581
619
  }
582
- tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth + 1);
620
+ tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, ctx, depth + 1);
583
621
  break;
584
622
  }
585
623
  // 이미지/그림 — 경로 추출 또는 경고
@@ -588,19 +626,19 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
588
626
  case "drawingObject": {
589
627
  const imgRef = extractImageRef(el);
590
628
  if (imgRef) {
591
- blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
592
- } else if (warnings && sectionNum) {
593
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
629
+ blocks.push({ type: "image", text: imgRef, pageNumber: ctx.sectionNum });
630
+ } else if (ctx.warnings && ctx.sectionNum) {
631
+ ctx.warnings.push({ page: ctx.sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
594
632
  }
595
633
  break;
596
634
  }
597
635
  default:
598
- walkSection(el, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth + 1);
636
+ walkSection(el, blocks, tableCtx, tableStack, ctx, depth + 1);
599
637
  break;
600
638
  }
601
639
  }
602
640
  }
603
- function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth = 0) {
641
+ function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
604
642
  if (depth > MAX_XML_DEPTH) return tableCtx;
605
643
  const children = node.childNodes;
606
644
  if (!children) return tableCtx;
@@ -616,23 +654,12 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
616
654
  if (localTag === "tbl") {
617
655
  if (tableCtx) tableStack.push(tableCtx);
618
656
  const newTable = { rows: [], currentRow: [], cell: null };
619
- walkSection(el, blocks, newTable, tableStack, styleMap, warnings, sectionNum, d + 1);
657
+ walkSection(el, blocks, newTable, tableStack, ctx, d + 1);
620
658
  if (newTable.rows.length > 0) {
621
659
  if (tableStack.length > 0) {
622
- const parentTable = tableStack.pop();
623
- let nestedCols = 0;
624
- for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
625
- if (newTable.rows.length >= 3 && nestedCols >= 2) {
626
- blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
627
- } else {
628
- const nestedText = convertTableToText(newTable.rows);
629
- if (parentTable.cell) {
630
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
631
- }
632
- }
633
- tableCtx = parentTable;
660
+ tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
634
661
  } else {
635
- blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
662
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: ctx.sectionNum });
636
663
  tableCtx = null;
637
664
  }
638
665
  } else {
@@ -641,21 +668,21 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
641
668
  } else if (localTag === "pic" || localTag === "shape" || localTag === "drawingObject") {
642
669
  const drawTextChild = findDescendant(el, "drawText");
643
670
  if (drawTextChild) {
644
- extractDrawTextBlocks(drawTextChild, blocks, styleMap, sectionNum);
671
+ extractDrawTextBlocks(drawTextChild, blocks, ctx.styleMap, ctx.sectionNum);
645
672
  } else {
646
673
  const imgRef = extractImageRef(el);
647
674
  if (imgRef) {
648
- blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
649
- } else if (warnings && sectionNum) {
650
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
675
+ blocks.push({ type: "image", text: imgRef, pageNumber: ctx.sectionNum });
676
+ } else if (ctx.warnings && ctx.sectionNum) {
677
+ ctx.warnings.push({ page: ctx.sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
651
678
  }
652
679
  }
653
680
  } else if (localTag === "drawText") {
654
- extractDrawTextBlocks(el, blocks, styleMap, sectionNum);
681
+ extractDrawTextBlocks(el, blocks, ctx.styleMap, ctx.sectionNum);
655
682
  } else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
656
683
  walkChildren(el, d + 1);
657
684
  } else if (localTag === "run") {
658
- tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth + 1);
685
+ tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, ctx, depth + 1);
659
686
  }
660
687
  }
661
688
  };
@@ -1928,6 +1955,7 @@ function parseHwp5Document(buffer, options) {
1928
1955
  const pageFilter = options?.pages ? parsePageRange(options.pages, sections.length) : null;
1929
1956
  const totalTarget = pageFilter ? pageFilter.size : sections.length;
1930
1957
  const blocks = [];
1958
+ const nestedTableCounter = { count: 0 };
1931
1959
  let totalDecompressed = 0;
1932
1960
  let parsedSections = 0;
1933
1961
  for (let si = 0; si < sections.length; si++) {
@@ -1938,7 +1966,7 @@ function parseHwp5Document(buffer, options) {
1938
1966
  totalDecompressed += data.length;
1939
1967
  if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new KordocError("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
1940
1968
  const records = readRecords(data);
1941
- const sectionBlocks = parseSection(records, docInfo, warnings, si + 1);
1969
+ const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, nestedTableCounter);
1942
1970
  blocks.push(...sectionBlocks);
1943
1971
  parsedSections++;
1944
1972
  options?.onProgress?.(parsedSections, totalTarget);
@@ -2258,13 +2286,13 @@ function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
2258
2286
  }
2259
2287
  return images;
2260
2288
  }
2261
- function parseSection(records, docInfo, warnings, sectionNum) {
2289
+ function parseSection(records, docInfo, warnings, sectionNum, counter) {
2262
2290
  const blocks = [];
2263
2291
  let i = 0;
2264
2292
  while (i < records.length) {
2265
2293
  const rec = records[i];
2266
2294
  if (rec.tagId === TAG_PARA_HEADER && rec.level === 0) {
2267
- const { paragraph, tables, nextIdx, charShapeIds, paraShapeId } = parseParagraphWithTables(records, i);
2295
+ const { paragraph, tables, nextIdx, charShapeIds, paraShapeId } = parseParagraphWithTables(records, i, counter);
2268
2296
  if (paragraph) {
2269
2297
  const block = { type: "paragraph", text: paragraph, pageNumber: sectionNum };
2270
2298
  if (docInfo && charShapeIds.length > 0) {
@@ -2287,7 +2315,7 @@ function parseSection(records, docInfo, warnings, sectionNum) {
2287
2315
  if (rec.tagId === TAG_CTRL_HEADER && rec.level <= 1 && rec.data.length >= 4) {
2288
2316
  const ctrlId = rec.data.subarray(0, 4).toString("ascii");
2289
2317
  if (ctrlId === " lbt" || ctrlId === "tbl ") {
2290
- const { table, nextIdx } = parseTableBlock(records, i);
2318
+ const { table, nextIdx } = parseTableBlock(records, i, counter);
2291
2319
  if (table) blocks.push({ type: "table", table, pageNumber: sectionNum });
2292
2320
  i = nextIdx;
2293
2321
  continue;
@@ -2392,7 +2420,7 @@ function resolveCharStyle(charShapeIds, docInfo) {
2392
2420
  if (cs.attrFlags & 2) style.bold = true;
2393
2421
  return style.fontSize || style.bold || style.italic ? style : void 0;
2394
2422
  }
2395
- function parseParagraphWithTables(records, startIdx) {
2423
+ function parseParagraphWithTables(records, startIdx, counter) {
2396
2424
  const startLevel = records[startIdx].level;
2397
2425
  let text = "";
2398
2426
  const tables = [];
@@ -2414,7 +2442,7 @@ function parseParagraphWithTables(records, startIdx) {
2414
2442
  if (rec.tagId === TAG_CTRL_HEADER && rec.data.length >= 4) {
2415
2443
  const ctrlId = rec.data.subarray(0, 4).toString("ascii");
2416
2444
  if (ctrlId === " lbt" || ctrlId === "tbl ") {
2417
- const { table, nextIdx } = parseTableBlock(records, i);
2445
+ const { table, nextIdx } = parseTableBlock(records, i, counter);
2418
2446
  if (table) tables.push(table);
2419
2447
  i = nextIdx;
2420
2448
  continue;
@@ -2425,7 +2453,7 @@ function parseParagraphWithTables(records, startIdx) {
2425
2453
  const trimmed = text.trim();
2426
2454
  return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds, paraShapeId };
2427
2455
  }
2428
- function parseTableBlock(records, startIdx) {
2456
+ function parseTableBlock(records, startIdx, counter) {
2429
2457
  const tableLevel = records[startIdx].level;
2430
2458
  let i = startIdx + 1;
2431
2459
  let rows = 0, cols = 0;
@@ -2439,7 +2467,7 @@ function parseTableBlock(records, startIdx) {
2439
2467
  cols = Math.min(rec.data.readUInt16LE(6), MAX_COLS);
2440
2468
  }
2441
2469
  if (rec.tagId === TAG_LIST_HEADER) {
2442
- const { cell, nextIdx } = parseCellBlock(records, i, tableLevel);
2470
+ const { cell, nextIdx } = parseCellBlock(records, i, tableLevel, counter);
2443
2471
  if (cell) cells.push(cell);
2444
2472
  i = nextIdx;
2445
2473
  continue;
@@ -2460,7 +2488,7 @@ function parseTableBlock(records, startIdx) {
2460
2488
  const cellRows = arrangeCells(rows, cols, cells);
2461
2489
  return { table: buildTable(cellRows), nextIdx: i };
2462
2490
  }
2463
- function parseCellBlock(records, startIdx, tableLevel) {
2491
+ function parseCellBlock(records, startIdx, tableLevel, counter) {
2464
2492
  const rec = records[startIdx];
2465
2493
  const cellLevel = rec.level;
2466
2494
  const texts = [];
@@ -2485,6 +2513,17 @@ function parseCellBlock(records, startIdx, tableLevel) {
2485
2513
  const t = extractText(r.data).trim();
2486
2514
  if (t) texts.push(t);
2487
2515
  }
2516
+ if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
2517
+ const ctrlId = r.data.subarray(0, 4).toString("ascii");
2518
+ if (ctrlId === " lbt" || ctrlId === "tbl ") {
2519
+ if (counter) {
2520
+ counter.count++;
2521
+ texts.push(`[\uC911\uCCA9 \uD14C\uC774\uBE14 #${counter.count}]`);
2522
+ } else {
2523
+ texts.push("[\uC911\uCCA9 \uD14C\uC774\uBE14]");
2524
+ }
2525
+ }
2526
+ }
2488
2527
  i++;
2489
2528
  }
2490
2529
  return { cell: { text: texts.join("\n"), colSpan, rowSpan, colAddr, rowAddr }, nextIdx: i };
@@ -2811,21 +2850,21 @@ async function parseXlsxDocument(buffer, options) {
2811
2850
  import JSZip4 from "jszip";
2812
2851
  import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
2813
2852
  var MAX_DECOMPRESS_SIZE4 = 100 * 1024 * 1024;
2814
- function getChildElements(parent, localName2) {
2853
+ function getChildElements(parent, localName3) {
2815
2854
  const result = [];
2816
2855
  const children = parent.childNodes;
2817
2856
  for (let i = 0; i < children.length; i++) {
2818
2857
  const node = children[i];
2819
2858
  if (node.nodeType === 1) {
2820
2859
  const el = node;
2821
- if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
2860
+ if (el.localName === localName3 || el.tagName?.endsWith(`:${localName3}`)) {
2822
2861
  result.push(el);
2823
2862
  }
2824
2863
  }
2825
2864
  }
2826
2865
  return result;
2827
2866
  }
2828
- function findElements(parent, localName2) {
2867
+ function findElements(parent, localName3) {
2829
2868
  const result = [];
2830
2869
  const walk = (node) => {
2831
2870
  const children = node.childNodes;
@@ -2833,7 +2872,7 @@ function findElements(parent, localName2) {
2833
2872
  const child = children[i];
2834
2873
  if (child.nodeType === 1) {
2835
2874
  const el = child;
2836
- if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
2875
+ if (el.localName === localName3 || el.tagName?.endsWith(`:${localName3}`)) {
2837
2876
  result.push(el);
2838
2877
  }
2839
2878
  walk(el);
@@ -2843,11 +2882,11 @@ function findElements(parent, localName2) {
2843
2882
  walk(parent);
2844
2883
  return result;
2845
2884
  }
2846
- function getAttr(el, localName2) {
2885
+ function getAttr(el, localName3) {
2847
2886
  const attrs = el.attributes;
2848
2887
  for (let i = 0; i < attrs.length; i++) {
2849
2888
  const attr = attrs[i];
2850
- if (attr.localName === localName2 || attr.name === localName2) return attr.value;
2889
+ if (attr.localName === localName3 || attr.name === localName3) return attr.value;
2851
2890
  }
2852
2891
  return null;
2853
2892
  }
@@ -3194,11 +3233,11 @@ async function parseDocxDocument(buffer, options) {
3194
3233
  const node = children[i];
3195
3234
  if (node.nodeType !== 1) continue;
3196
3235
  const el = node;
3197
- const localName2 = el.localName ?? el.tagName?.split(":").pop();
3198
- if (localName2 === "p") {
3236
+ const localName3 = el.localName ?? el.tagName?.split(":").pop();
3237
+ if (localName3 === "p") {
3199
3238
  const block = parseParagraph(el, styles, numbering, footnotes, rels);
3200
3239
  if (block) blocks.push(block);
3201
- } else if (localName2 === "tbl") {
3240
+ } else if (localName3 === "tbl") {
3202
3241
  const block = parseTable(el, styles, numbering, footnotes, rels);
3203
3242
  if (block) blocks.push(block);
3204
3243
  }
@@ -3236,6 +3275,259 @@ async function parseDocxDocument(buffer, options) {
3236
3275
  };
3237
3276
  }
3238
3277
 
3278
+ // src/hwpml/parser.ts
3279
+ import { DOMParser as DOMParser4 } from "@xmldom/xmldom";
3280
+ var MAX_XML_DEPTH2 = 200;
3281
+ var MAX_TABLE_ROWS = 5e3;
3282
+ var MAX_TABLE_COLS = 500;
3283
+ var MAX_HWPML_BYTES = 50 * 1024 * 1024;
3284
+ function parseHwpmlDocument(buffer, options) {
3285
+ if (buffer.byteLength > MAX_HWPML_BYTES) {
3286
+ throw new Error(`HWPML \uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC (${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB > 50MB)`);
3287
+ }
3288
+ const text = new TextDecoder("utf-8").decode(buffer).replace(/^\uFEFF/, "");
3289
+ const normalized = text.replace(/&nbsp;/g, "&#160;");
3290
+ const xml = stripDtd(normalized);
3291
+ const warnings = [];
3292
+ const parser = new DOMParser4({
3293
+ onError: (_level, msg) => {
3294
+ warnings.push({ message: `HWPML XML \uD30C\uC2F1 \uACBD\uACE0: ${msg}`, code: "MALFORMED_XML" });
3295
+ }
3296
+ });
3297
+ const doc = parser.parseFromString(xml, "text/xml");
3298
+ if (!doc.documentElement) {
3299
+ return { markdown: "", blocks: [], warnings };
3300
+ }
3301
+ const root = doc.documentElement;
3302
+ const metadata = {};
3303
+ const docSummary = findChild(root, "DOCSUMMARY");
3304
+ if (docSummary) {
3305
+ const title = findChild(docSummary, "TITLE");
3306
+ const author = findChild(docSummary, "AUTHOR");
3307
+ const date = findChild(docSummary, "DATE");
3308
+ if (title) metadata.title = textContent(title).trim();
3309
+ if (author) metadata.author = textContent(author).trim();
3310
+ if (date) metadata.createdAt = textContent(date).trim() || void 0;
3311
+ }
3312
+ const paraShapeMap = buildParaShapeMap(root);
3313
+ const body = findChild(root, "BODY");
3314
+ if (!body) {
3315
+ return { markdown: "", blocks: [], metadata, warnings };
3316
+ }
3317
+ const blocks = [];
3318
+ const pageFilter = options?.pages ? parsePageRange(options.pages, countSections(body)) : null;
3319
+ let sectionIdx = 0;
3320
+ const children = body.childNodes;
3321
+ for (let i = 0; i < children.length; i++) {
3322
+ const el = children[i];
3323
+ if (el.nodeType !== 1) continue;
3324
+ if (localName(el) !== "SECTION") continue;
3325
+ sectionIdx++;
3326
+ if (pageFilter && !pageFilter.has(sectionIdx)) continue;
3327
+ parseSection2(el, blocks, paraShapeMap, sectionIdx, warnings);
3328
+ }
3329
+ const outline = blocks.filter((b) => b.type === "heading" && b.text).map((b) => ({ level: b.level ?? 1, text: b.text, pageNumber: b.pageNumber }));
3330
+ const markdown = blocksToMarkdown(blocks);
3331
+ return {
3332
+ markdown,
3333
+ blocks,
3334
+ metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
3335
+ outline: outline.length > 0 ? outline : void 0,
3336
+ warnings: warnings.length > 0 ? warnings : void 0
3337
+ };
3338
+ }
3339
+ function buildParaShapeMap(root) {
3340
+ const map = /* @__PURE__ */ new Map();
3341
+ const head = findChild(root, "HEAD");
3342
+ if (!head) return map;
3343
+ const mappingTable = findChild(head, "MAPPINGTABLE");
3344
+ if (!mappingTable) return map;
3345
+ const paraShapeList = findChild(mappingTable, "PARASHAPELIST");
3346
+ if (!paraShapeList) return map;
3347
+ const children = paraShapeList.childNodes;
3348
+ for (let i = 0; i < children.length; i++) {
3349
+ const el = children[i];
3350
+ if (el.nodeType !== 1 || localName(el) !== "PARASHAPE") continue;
3351
+ const id = el.getAttribute("Id") ?? "";
3352
+ const headingType = el.getAttribute("HeadingType") ?? "None";
3353
+ const level = parseInt(el.getAttribute("Level") ?? "0", 10);
3354
+ let headingLevel = null;
3355
+ if (headingType === "Outline") {
3356
+ const safeLevel = isNaN(level) ? 0 : Math.max(0, level);
3357
+ headingLevel = Math.min(safeLevel + 1, 6);
3358
+ }
3359
+ map.set(id, { headingLevel });
3360
+ }
3361
+ return map;
3362
+ }
3363
+ function parseSection2(section, blocks, paraShapeMap, sectionNum, warnings) {
3364
+ walkContent(section, blocks, paraShapeMap, sectionNum, warnings, false);
3365
+ }
3366
+ function walkContent(node, blocks, paraShapeMap, sectionNum, warnings, inHeaderFooter, depth = 0) {
3367
+ if (depth > MAX_XML_DEPTH2) return;
3368
+ const children = node.childNodes;
3369
+ for (let i = 0; i < children.length; i++) {
3370
+ const el = children[i];
3371
+ if (el.nodeType !== 1) continue;
3372
+ const tag = localName(el);
3373
+ if (tag === "HEADER" || tag === "FOOTER") {
3374
+ continue;
3375
+ }
3376
+ if (tag === "P") {
3377
+ if (!inHeaderFooter) {
3378
+ parseParagraph2(el, blocks, paraShapeMap, sectionNum);
3379
+ }
3380
+ continue;
3381
+ }
3382
+ if (tag === "TABLE") {
3383
+ if (!inHeaderFooter) {
3384
+ parseTable2(el, blocks, paraShapeMap, sectionNum, warnings);
3385
+ }
3386
+ continue;
3387
+ }
3388
+ if (tag === "PARALIST" || tag === "SECTION" || tag === "COLDEF") {
3389
+ walkContent(el, blocks, paraShapeMap, sectionNum, warnings, inHeaderFooter, depth + 1);
3390
+ continue;
3391
+ }
3392
+ walkContent(el, blocks, paraShapeMap, sectionNum, warnings, inHeaderFooter, depth + 1);
3393
+ }
3394
+ }
3395
+ function parseParagraph2(el, blocks, paraShapeMap, sectionNum) {
3396
+ const paraShapeId = el.getAttribute("ParaShape") ?? "";
3397
+ const shapeInfo = paraShapeMap.get(paraShapeId);
3398
+ const text = extractParagraphText(el);
3399
+ if (!text) return;
3400
+ if (shapeInfo?.headingLevel != null) {
3401
+ blocks.push({ type: "heading", text, level: shapeInfo.headingLevel, pageNumber: sectionNum });
3402
+ } else {
3403
+ blocks.push({ type: "paragraph", text, pageNumber: sectionNum });
3404
+ }
3405
+ }
3406
+ function extractParagraphText(p) {
3407
+ const parts = [];
3408
+ collectCharText(p, parts);
3409
+ return parts.join("").trim();
3410
+ }
3411
+ function collectCharText(node, parts, depth = 0) {
3412
+ if (depth > MAX_XML_DEPTH2) return;
3413
+ const children = node.childNodes;
3414
+ for (let i = 0; i < children.length; i++) {
3415
+ const el = children[i];
3416
+ if (el.nodeType !== 1) continue;
3417
+ const tag = localName(el);
3418
+ if (tag === "CHAR") {
3419
+ const t = textContent(el);
3420
+ if (t) parts.push(t);
3421
+ } else if (tag === "TABLE" || tag === "PICTURE" || tag === "SHAPEOBJECT") {
3422
+ } else if (tag === "AUTONUM") {
3423
+ } else {
3424
+ collectCharText(el, parts, depth + 1);
3425
+ }
3426
+ }
3427
+ }
3428
+ function parseTable2(el, blocks, paraShapeMap, sectionNum, warnings) {
3429
+ const cells = [];
3430
+ const rowCount = parseInt(el.getAttribute("RowCount") ?? "0", 10);
3431
+ const colCount = parseInt(el.getAttribute("ColCount") ?? "0", 10);
3432
+ if (isNaN(rowCount) || isNaN(colCount) || rowCount === 0 || colCount === 0) return;
3433
+ if (rowCount > MAX_TABLE_ROWS || colCount > MAX_TABLE_COLS) {
3434
+ warnings.push({ message: `\uD14C\uC774\uBE14 \uD06C\uAE30 \uCD08\uACFC (${rowCount}x${colCount}) \u2014 \uC2A4\uD0B5`, code: "TRUNCATED_TABLE" });
3435
+ return;
3436
+ }
3437
+ const children = el.childNodes;
3438
+ for (let i = 0; i < children.length; i++) {
3439
+ const rowEl = children[i];
3440
+ if (rowEl.nodeType !== 1 || localName(rowEl) !== "ROW") continue;
3441
+ const rowCells = rowEl.childNodes;
3442
+ for (let j = 0; j < rowCells.length; j++) {
3443
+ const cellEl = rowCells[j];
3444
+ if (cellEl.nodeType !== 1 || localName(cellEl) !== "CELL") continue;
3445
+ const colAddr = parseInt(cellEl.getAttribute("ColAddr") ?? "0", 10);
3446
+ const rowAddr = parseInt(cellEl.getAttribute("RowAddr") ?? "0", 10);
3447
+ const colSpan = Math.min(Math.max(1, parseInt(cellEl.getAttribute("ColSpan") ?? "1", 10) || 1), MAX_TABLE_COLS);
3448
+ const rowSpan = Math.min(Math.max(1, parseInt(cellEl.getAttribute("RowSpan") ?? "1", 10) || 1), MAX_TABLE_ROWS);
3449
+ const cellText = extractCellText(cellEl);
3450
+ cells.push({ text: cellText, colSpan, rowSpan, colAddr, rowAddr });
3451
+ }
3452
+ }
3453
+ if (cells.length === 0) return;
3454
+ const grid = Array.from({ length: rowCount }, () => Array(colCount).fill(null));
3455
+ for (const cell of cells) {
3456
+ const r = cell.rowAddr ?? 0;
3457
+ const c = cell.colAddr ?? 0;
3458
+ if (isNaN(r) || isNaN(c) || r >= rowCount || c >= colCount) continue;
3459
+ grid[r][c] = cell;
3460
+ for (let dr = 0; dr < cell.rowSpan; dr++) {
3461
+ for (let dc = 0; dc < cell.colSpan; dc++) {
3462
+ if (dr === 0 && dc === 0) continue;
3463
+ if (r + dr < rowCount && c + dc < colCount) {
3464
+ grid[r + dr][c + dc] = { text: "", colSpan: 1, rowSpan: 1 };
3465
+ }
3466
+ }
3467
+ }
3468
+ }
3469
+ const cellRows = grid.map(
3470
+ (row) => row.map((cell) => cell ?? { text: "", colSpan: 1, rowSpan: 1 })
3471
+ );
3472
+ const table = buildTable(cellRows);
3473
+ blocks.push({ type: "table", table, pageNumber: sectionNum });
3474
+ }
3475
+ function extractCellText(cellEl) {
3476
+ const textParts = [];
3477
+ collectCellText(cellEl, textParts, 0);
3478
+ return textParts.filter(Boolean).join("\n").trim();
3479
+ }
3480
+ function collectCellText(node, parts, depth) {
3481
+ if (depth > 20) return;
3482
+ const children = node.childNodes;
3483
+ for (let i = 0; i < children.length; i++) {
3484
+ const el = children[i];
3485
+ if (el.nodeType !== 1) continue;
3486
+ const tag = localName(el);
3487
+ if (tag === "P") {
3488
+ const t = extractParagraphText(el);
3489
+ if (t) parts.push(t);
3490
+ } else if (tag === "TABLE") {
3491
+ parts.push("[\uC911\uCCA9 \uD14C\uC774\uBE14]");
3492
+ } else {
3493
+ collectCellText(el, parts, depth + 1);
3494
+ }
3495
+ }
3496
+ }
3497
+ function localName(el) {
3498
+ return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
3499
+ }
3500
+ function findChild(parent, tag) {
3501
+ const children = parent.childNodes;
3502
+ for (let i = 0; i < children.length; i++) {
3503
+ const el = children[i];
3504
+ if (el.nodeType === 1 && localName(el) === tag) return el;
3505
+ }
3506
+ return null;
3507
+ }
3508
+ function textContent(el) {
3509
+ const children = el.childNodes;
3510
+ const parts = [];
3511
+ for (let i = 0; i < children.length; i++) {
3512
+ const node = children[i];
3513
+ if (node.nodeType === 3) {
3514
+ parts.push(node.nodeValue || "");
3515
+ } else if (node.nodeType === 1) {
3516
+ parts.push(textContent(node));
3517
+ }
3518
+ }
3519
+ return parts.join("");
3520
+ }
3521
+ function countSections(body) {
3522
+ let count = 0;
3523
+ const children = body.childNodes;
3524
+ for (let i = 0; i < children.length; i++) {
3525
+ const el = children[i];
3526
+ if (el.nodeType === 1 && localName(el) === "SECTION") count++;
3527
+ }
3528
+ return count;
3529
+ }
3530
+
3239
3531
  // src/form/recognize.ts
3240
3532
  var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
3241
3533
  "\uC131\uBA85",
@@ -3570,7 +3862,7 @@ function fillInlineFields(text, values, filled, matchedLabels) {
3570
3862
 
3571
3863
  // src/form/filler-hwpx.ts
3572
3864
  import JSZip5 from "jszip";
3573
- import { DOMParser as DOMParser4, XMLSerializer } from "@xmldom/xmldom";
3865
+ import { DOMParser as DOMParser5, XMLSerializer } from "@xmldom/xmldom";
3574
3866
  async function fillHwpx(hwpxBuffer, values) {
3575
3867
  const zip = await JSZip5.loadAsync(hwpxBuffer);
3576
3868
  const filled = [];
@@ -3580,7 +3872,7 @@ async function fillHwpx(hwpxBuffer, values) {
3580
3872
  if (sectionFiles.length === 0) {
3581
3873
  throw new KordocError("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
3582
3874
  }
3583
- const xmlParser = new DOMParser4();
3875
+ const xmlParser = new DOMParser5();
3584
3876
  const xmlSerializer = new XMLSerializer();
3585
3877
  for (const sectionPath of sectionFiles) {
3586
3878
  const zipEntry = zip.file(sectionPath);
@@ -3612,10 +3904,10 @@ async function fillHwpx(hwpxBuffer, values) {
3612
3904
  const trEl = rows[rowIdx];
3613
3905
  const cells = findDirectChildren(trEl, "tc");
3614
3906
  for (let colIdx = 0; colIdx < cells.length - 1; colIdx++) {
3615
- const labelText = extractCellText(cells[colIdx]);
3907
+ const labelText = extractCellText2(cells[colIdx]);
3616
3908
  if (!isLabelCell(labelText)) continue;
3617
3909
  const valueCell = cells[colIdx + 1];
3618
- const valueText = extractCellText(valueCell);
3910
+ const valueText = extractCellText2(valueCell);
3619
3911
  if (isKeywordLabel(valueText)) continue;
3620
3912
  const normalizedCellLabel = normalizeLabel(labelText);
3621
3913
  if (!normalizedCellLabel) continue;
@@ -3640,14 +3932,14 @@ async function fillHwpx(hwpxBuffer, values) {
3640
3932
  if (rows.length >= 2) {
3641
3933
  const headerCells = findDirectChildren(rows[0], "tc");
3642
3934
  const allLabels = headerCells.every((cell) => {
3643
- const t = extractCellText(cell).trim();
3935
+ const t = extractCellText2(cell).trim();
3644
3936
  return t.length > 0 && t.length <= 20 && isLabelCell(t);
3645
3937
  });
3646
3938
  if (allLabels) {
3647
3939
  for (let rowIdx = 1; rowIdx < rows.length; rowIdx++) {
3648
3940
  const dataCells = findDirectChildren(rows[rowIdx], "tc");
3649
3941
  for (let colIdx = 0; colIdx < Math.min(headerCells.length, dataCells.length); colIdx++) {
3650
- const headerLabel = normalizeLabel(extractCellText(headerCells[colIdx]));
3942
+ const headerLabel = normalizeLabel(extractCellText2(headerCells[colIdx]));
3651
3943
  const matchKey = findMatchingKey(headerLabel, normalizedValues);
3652
3944
  if (matchKey === void 0) continue;
3653
3945
  if (matchedLabels.has(matchKey)) continue;
@@ -3655,7 +3947,7 @@ async function fillHwpx(hwpxBuffer, values) {
3655
3947
  replaceCellText(dataCells[colIdx], newValue);
3656
3948
  matchedLabels.add(matchKey);
3657
3949
  filled.push({
3658
- label: extractCellText(headerCells[colIdx]).trim(),
3950
+ label: extractCellText2(headerCells[colIdx]).trim(),
3659
3951
  value: newValue,
3660
3952
  row: rowIdx,
3661
3953
  col: colIdx
@@ -3697,7 +3989,7 @@ async function fillHwpx(hwpxBuffer, values) {
3697
3989
  const buffer = await zip.generateAsync({ type: "arraybuffer" });
3698
3990
  return { buffer, filled, unmatched };
3699
3991
  }
3700
- function localName(el) {
3992
+ function localName2(el) {
3701
3993
  return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
3702
3994
  }
3703
3995
  function findAllElements(node, tagLocalName) {
@@ -3708,7 +4000,7 @@ function findAllElements(node, tagLocalName) {
3708
4000
  for (let i = 0; i < children.length; i++) {
3709
4001
  const child = children[i];
3710
4002
  if (child.nodeType !== 1) continue;
3711
- if (localName(child) === tagLocalName) result.push(child);
4003
+ if (localName2(child) === tagLocalName) result.push(child);
3712
4004
  walk(child);
3713
4005
  }
3714
4006
  };
@@ -3721,7 +4013,7 @@ function findDirectChildren(parent, tagLocalName) {
3721
4013
  if (!children) return result;
3722
4014
  for (let i = 0; i < children.length; i++) {
3723
4015
  const child = children[i];
3724
- if (child.nodeType === 1 && localName(child) === tagLocalName) {
4016
+ if (child.nodeType === 1 && localName2(child) === tagLocalName) {
3725
4017
  result.push(child);
3726
4018
  }
3727
4019
  }
@@ -3730,12 +4022,12 @@ function findDirectChildren(parent, tagLocalName) {
3730
4022
  function isInsideTable(el) {
3731
4023
  let parent = el.parentNode;
3732
4024
  while (parent) {
3733
- if (parent.nodeType === 1 && localName(parent) === "tbl") return true;
4025
+ if (parent.nodeType === 1 && localName2(parent) === "tbl") return true;
3734
4026
  parent = parent.parentNode;
3735
4027
  }
3736
4028
  return false;
3737
4029
  }
3738
- function extractCellText(tcEl) {
4030
+ function extractCellText2(tcEl) {
3739
4031
  const parts = [];
3740
4032
  const walk = (node) => {
3741
4033
  const children = node.childNodes;
@@ -3745,7 +4037,7 @@ function extractCellText(tcEl) {
3745
4037
  if (child.nodeType === 3) {
3746
4038
  parts.push(child.textContent || "");
3747
4039
  } else if (child.nodeType === 1) {
3748
- const tag = localName(child);
4040
+ const tag = localName2(child);
3749
4041
  if (tag === "t") walk(child);
3750
4042
  else if (tag === "run" || tag === "r" || tag === "p" || tag === "subList") walk(child);
3751
4043
  else if (tag === "tab") parts.push(" ");
@@ -4470,6 +4762,8 @@ async function parse(input, options) {
4470
4762
  }
4471
4763
  case "hwp":
4472
4764
  return parseHwp(buffer, options);
4765
+ case "hwpml":
4766
+ return parseHwpml(buffer, options);
4473
4767
  case "pdf":
4474
4768
  return parsePdf(buffer, options);
4475
4769
  default:
@@ -4495,7 +4789,7 @@ async function parseHwp(buffer, options) {
4495
4789
  async function parsePdf(buffer, options) {
4496
4790
  let parsePdfDocument;
4497
4791
  try {
4498
- const mod = await import("./parser-OIRWPKIQ.js");
4792
+ const mod = await import("./parser-25LF2S2J.js");
4499
4793
  parsePdfDocument = mod.parsePdfDocument;
4500
4794
  } catch {
4501
4795
  return {
@@ -4529,6 +4823,14 @@ async function parseDocx(buffer, options) {
4529
4823
  return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: classifyError(err) };
4530
4824
  }
4531
4825
  }
4826
+ async function parseHwpml(buffer, options) {
4827
+ try {
4828
+ const { markdown, blocks, metadata, outline, warnings } = parseHwpmlDocument(buffer, options);
4829
+ return { success: true, fileType: "hwpml", markdown, blocks, metadata, outline, warnings };
4830
+ } catch (err) {
4831
+ return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code: classifyError(err) };
4832
+ }
4833
+ }
4532
4834
  async function fillForm(input, values, outputFormat = "markdown") {
4533
4835
  let buffer;
4534
4836
  if (typeof input === "string") {
@@ -4588,6 +4890,7 @@ export {
4588
4890
  parse,
4589
4891
  parseDocx,
4590
4892
  parseHwp,
4893
+ parseHwpml,
4591
4894
  parseHwpx,
4592
4895
  parsePdf,
4593
4896
  parseXlsx