kordoc 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/utils.ts
4
- var VERSION = true ? "2.0.1" : "0.0.0-dev";
4
+ var VERSION = true ? "2.0.2" : "0.0.0-dev";
5
5
  function toArrayBuffer(buf) {
6
6
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
7
7
  return buf.buffer;
@@ -90,4 +90,4 @@ export {
90
90
  sanitizeHref,
91
91
  classifyError
92
92
  };
93
- //# sourceMappingURL=chunk-L4OFASDS.js.map
93
+ //# sourceMappingURL=chunk-EVWOJ4T5.js.map
@@ -6,7 +6,7 @@ import {
6
6
  precheckZipSize,
7
7
  sanitizeHref,
8
8
  toArrayBuffer
9
- } from "./chunk-L4OFASDS.js";
9
+ } from "./chunk-EVWOJ4T5.js";
10
10
  import {
11
11
  parsePageRange
12
12
  } from "./chunk-MOL7MDBG.js";
@@ -224,8 +224,11 @@ function blocksToMarkdown(blocks) {
224
224
  if (lines.length > 0 && lines[lines.length - 1] !== "") {
225
225
  lines.push("");
226
226
  }
227
- lines.push(tableToMarkdown(block.table));
228
- lines.push("");
227
+ const tableMd = tableToMarkdown(block.table);
228
+ if (tableMd) {
229
+ lines.push(tableMd);
230
+ lines.push("");
231
+ }
229
232
  }
230
233
  }
231
234
  return lines.join("\n").trim();
@@ -235,6 +238,7 @@ function tableToMarkdown(table) {
235
238
  const { cells, rows: numRows, cols: numCols } = table;
236
239
  if (numRows === 1 && numCols === 1) {
237
240
  const content = sanitizeText(cells[0][0].text);
241
+ if (!content) return "";
238
242
  return content.split(/\n/).map((line) => {
239
243
  const trimmed = line.trim();
240
244
  if (!trimmed) return "";
@@ -271,9 +275,9 @@ function tableToMarkdown(table) {
271
275
  const row = display[r];
272
276
  const isEmptyPlaceholder = row.every((cell) => cell === "");
273
277
  if (isEmptyPlaceholder) continue;
274
- const hasSkippedCols = row.some((cell, c) => cell === "" && skip.has(`${r},${c}`));
275
278
  const nonEmptyCols = row.filter((cell) => cell !== "");
276
- if (!hasSkippedCols && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
279
+ const hasSkipInRow = row.some((_, c) => skip.has(`${r},${c}`));
280
+ if (!hasSkipInRow && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
277
281
  pendingFirstCol = row[0];
278
282
  continue;
279
283
  }
@@ -705,7 +709,8 @@ function detectHwpxHeadings(blocks, styleMap) {
705
709
  else if (ratio >= HEADING_RATIO_H2) level = 2;
706
710
  else if (ratio >= HEADING_RATIO_H3) level = 3;
707
711
  }
708
- if (/^제\d+[조장절편]/.test(text) && text.length <= 50) {
712
+ const compactText = text.replace(/\s+/g, "");
713
+ if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
709
714
  if (level === 0) level = 3;
710
715
  }
711
716
  if (level > 0) {
@@ -757,9 +762,14 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
757
762
  if (newTable.rows.length > 0) {
758
763
  if (tableStack.length > 0) {
759
764
  const parentTable = tableStack.pop();
760
- const nestedText = convertTableToText(newTable.rows);
761
- if (parentTable.cell) {
762
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
765
+ const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
766
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
767
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
768
+ } else {
769
+ const nestedText = convertTableToText(newTable.rows);
770
+ if (parentTable.cell) {
771
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
772
+ }
763
773
  }
764
774
  tableCtx = parentTable;
765
775
  } else {
@@ -859,9 +869,14 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
859
869
  if (newTable.rows.length > 0) {
860
870
  if (tableStack.length > 0) {
861
871
  const parentTable = tableStack.pop();
862
- const nestedText = convertTableToText(newTable.rows);
863
- if (parentTable.cell) {
864
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
872
+ const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
873
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
874
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
875
+ } else {
876
+ const nestedText = convertTableToText(newTable.rows);
877
+ if (parentTable.cell) {
878
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
879
+ }
865
880
  }
866
881
  tableCtx = parentTable;
867
882
  } else {
@@ -872,13 +887,20 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
872
887
  tableCtx = tableStack.length > 0 ? tableStack.pop() : null;
873
888
  }
874
889
  } else if (localTag === "pic" || localTag === "shape" || localTag === "drawingObject") {
875
- const imgRef = extractImageRef(el);
876
- if (imgRef) {
877
- blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
878
- } else if (warnings && sectionNum) {
879
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
890
+ const drawTextChild = findDescendant(el, "drawText");
891
+ if (drawTextChild) {
892
+ extractDrawTextBlocks(drawTextChild, blocks, styleMap, sectionNum);
893
+ } else {
894
+ const imgRef = extractImageRef(el);
895
+ if (imgRef) {
896
+ blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
897
+ } else if (warnings && sectionNum) {
898
+ warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
899
+ }
880
900
  }
881
- } else if (localTag === "r" || localTag === "run" || localTag === "ctrl") {
901
+ } else if (localTag === "drawText") {
902
+ extractDrawTextBlocks(el, blocks, styleMap, sectionNum);
903
+ } else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
882
904
  walkChildren(el, d + 1);
883
905
  }
884
906
  }
@@ -886,6 +908,40 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
886
908
  walkChildren(node, depth);
887
909
  return tableCtx;
888
910
  }
911
+ function findDescendant(node, targetTag, depth = 0) {
912
+ if (depth > 5) return null;
913
+ const children = node.childNodes;
914
+ if (!children) return null;
915
+ for (let i = 0; i < children.length; i++) {
916
+ const child = children[i];
917
+ if (child.nodeType !== 1) continue;
918
+ const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
919
+ if (tag === targetTag) return child;
920
+ const found = findDescendant(child, targetTag, depth + 1);
921
+ if (found) return found;
922
+ }
923
+ return null;
924
+ }
925
+ function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
926
+ const children = drawTextNode.childNodes;
927
+ if (!children) return;
928
+ for (let i = 0; i < children.length; i++) {
929
+ const child = children[i];
930
+ if (child.nodeType !== 1) continue;
931
+ const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
932
+ if (tag === "subList" || tag === "p" || tag === "para") {
933
+ if (tag === "subList") {
934
+ extractDrawTextBlocks(child, blocks, styleMap, sectionNum);
935
+ } else {
936
+ const info = extractParagraphInfo(child, styleMap);
937
+ const text = info.text.trim();
938
+ if (text) {
939
+ blocks.push({ type: "paragraph", text, style: info.style ?? void 0, pageNumber: sectionNum });
940
+ }
941
+ }
942
+ }
943
+ }
944
+ }
889
945
  function extractParagraphInfo(para, styleMap) {
890
946
  let text = "";
891
947
  let href;
@@ -904,11 +960,18 @@ function extractParagraphInfo(para, styleMap) {
904
960
  const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
905
961
  switch (tag) {
906
962
  case "t":
907
- text += child.textContent || "";
963
+ walk(child);
908
964
  break;
909
- case "tab":
910
- text += " ";
965
+ // 자식 순회 (tab 등 하위 요소 처리)
966
+ case "tab": {
967
+ const leader = child.getAttribute("leader");
968
+ if (leader && leader !== "0") {
969
+ text += "";
970
+ } else {
971
+ text += " ";
972
+ }
911
973
  break;
974
+ }
912
975
  case "br":
913
976
  if ((child.getAttribute("type") || "line") === "line") text += "\n";
914
977
  break;
@@ -975,6 +1038,8 @@ function extractParagraphInfo(para, styleMap) {
975
1038
  }
976
1039
  };
977
1040
  walk(para);
1041
+ const leaderIdx = text.indexOf("");
1042
+ if (leaderIdx >= 0) text = text.substring(0, leaderIdx);
978
1043
  let cleanText = text.replace(/[ \t]+/g, " ").trim();
979
1044
  if (/^그림입니다\.?\s*원본\s*그림의\s*(이름|크기)/.test(cleanText)) cleanText = "";
980
1045
  cleanText = cleanText.replace(/그림입니다\.?\s*원본\s*그림의\s*(이름|크기)[^\n]*(\n[^\n]*원본\s*그림의\s*(이름|크기)[^\n]*)*/g, "").trim();
@@ -2458,7 +2523,10 @@ function parseSection(records, docInfo, warnings, sectionNum) {
2458
2523
  if (binId >= 0) {
2459
2524
  blocks.push({ type: "image", text: String(binId), pageNumber: sectionNum });
2460
2525
  } else {
2461
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
2526
+ const boxText = extractTextBoxText(records, i);
2527
+ if (boxText) {
2528
+ blocks.push({ type: "paragraph", text: boxText, pageNumber: sectionNum });
2529
+ }
2462
2530
  }
2463
2531
  } else if (ctrlId === " elo" || ctrlId === "ole ") {
2464
2532
  warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
@@ -2497,6 +2565,19 @@ function extractNoteText(records, ctrlIdx) {
2497
2565
  }
2498
2566
  return texts.length > 0 ? texts.join(" ") : null;
2499
2567
  }
2568
+ function extractTextBoxText(records, ctrlIdx) {
2569
+ const ctrlLevel = records[ctrlIdx].level;
2570
+ const texts = [];
2571
+ for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 200; j++) {
2572
+ const r = records[j];
2573
+ if (r.level <= ctrlLevel) break;
2574
+ if (r.tagId === TAG_PARA_TEXT) {
2575
+ const t = extractText(r.data).trim();
2576
+ if (t) texts.push(t);
2577
+ }
2578
+ }
2579
+ return texts.length > 0 ? texts.join("\n") : null;
2580
+ }
2500
2581
  function extractHyperlinkUrl(data) {
2501
2582
  try {
2502
2583
  const httpSig = Buffer.from("http", "utf16le");
@@ -5365,4 +5446,4 @@ export {
5365
5446
  extractFormFields,
5366
5447
  parse
5367
5448
  };
5368
- //# sourceMappingURL=chunk-JJ65GKUH.js.map
5449
+ //# sourceMappingURL=chunk-XJYM2AUA.js.map