kordoc 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -138,7 +138,7 @@ import { inflateRawSync } from "zlib";
138
138
  import { DOMParser } from "@xmldom/xmldom";
139
139
 
140
140
  // src/utils.ts
141
- var VERSION = true ? "2.0.0" : "0.0.0-dev";
141
+ var VERSION = true ? "2.0.2" : "0.0.0-dev";
142
142
  function toArrayBuffer(buf) {
143
143
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
144
144
  return buf.buffer;
@@ -388,8 +388,11 @@ function blocksToMarkdown(blocks) {
388
388
  if (lines.length > 0 && lines[lines.length - 1] !== "") {
389
389
  lines.push("");
390
390
  }
391
- lines.push(tableToMarkdown(block.table));
392
- lines.push("");
391
+ const tableMd = tableToMarkdown(block.table);
392
+ if (tableMd) {
393
+ lines.push(tableMd);
394
+ lines.push("");
395
+ }
393
396
  }
394
397
  }
395
398
  return lines.join("\n").trim();
@@ -399,6 +402,7 @@ function tableToMarkdown(table) {
399
402
  const { cells, rows: numRows, cols: numCols } = table;
400
403
  if (numRows === 1 && numCols === 1) {
401
404
  const content = sanitizeText(cells[0][0].text);
405
+ if (!content) return "";
402
406
  return content.split(/\n/).map((line) => {
403
407
  const trimmed = line.trim();
404
408
  if (!trimmed) return "";
@@ -435,9 +439,9 @@ function tableToMarkdown(table) {
435
439
  const row = display[r];
436
440
  const isEmptyPlaceholder = row.every((cell) => cell === "");
437
441
  if (isEmptyPlaceholder) continue;
438
- const hasSkippedCols = row.some((cell, c) => cell === "" && skip.has(`${r},${c}`));
439
442
  const nonEmptyCols = row.filter((cell) => cell !== "");
440
- if (!hasSkippedCols && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
443
+ const hasSkipInRow = row.some((_, c) => skip.has(`${r},${c}`));
444
+ if (!hasSkipInRow && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
441
445
  pendingFirstCol = row[0];
442
446
  continue;
443
447
  }
@@ -852,7 +856,8 @@ function detectHwpxHeadings(blocks, styleMap) {
852
856
  else if (ratio >= HEADING_RATIO_H2) level = 2;
853
857
  else if (ratio >= HEADING_RATIO_H3) level = 3;
854
858
  }
855
- if (/^제\d+[조장절편]/.test(text) && text.length <= 50) {
859
+ const compactText = text.replace(/\s+/g, "");
860
+ if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
856
861
  if (level === 0) level = 3;
857
862
  }
858
863
  if (level > 0) {
@@ -904,9 +909,14 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
904
909
  if (newTable.rows.length > 0) {
905
910
  if (tableStack.length > 0) {
906
911
  const parentTable = tableStack.pop();
907
- const nestedText = convertTableToText(newTable.rows);
908
- if (parentTable.cell) {
909
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
912
+ const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
913
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
914
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
915
+ } else {
916
+ const nestedText = convertTableToText(newTable.rows);
917
+ if (parentTable.cell) {
918
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
919
+ }
910
920
  }
911
921
  tableCtx = parentTable;
912
922
  } else {
@@ -1006,9 +1016,14 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1006
1016
  if (newTable.rows.length > 0) {
1007
1017
  if (tableStack.length > 0) {
1008
1018
  const parentTable = tableStack.pop();
1009
- const nestedText = convertTableToText(newTable.rows);
1010
- if (parentTable.cell) {
1011
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
1019
+ const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
1020
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
1021
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
1022
+ } else {
1023
+ const nestedText = convertTableToText(newTable.rows);
1024
+ if (parentTable.cell) {
1025
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
1026
+ }
1012
1027
  }
1013
1028
  tableCtx = parentTable;
1014
1029
  } else {
@@ -1019,13 +1034,20 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1019
1034
  tableCtx = tableStack.length > 0 ? tableStack.pop() : null;
1020
1035
  }
1021
1036
  } else if (localTag === "pic" || localTag === "shape" || localTag === "drawingObject") {
1022
- const imgRef = extractImageRef(el);
1023
- if (imgRef) {
1024
- blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
1025
- } else if (warnings && sectionNum) {
1026
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
1037
+ const drawTextChild = findDescendant(el, "drawText");
1038
+ if (drawTextChild) {
1039
+ extractDrawTextBlocks(drawTextChild, blocks, styleMap, sectionNum);
1040
+ } else {
1041
+ const imgRef = extractImageRef(el);
1042
+ if (imgRef) {
1043
+ blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
1044
+ } else if (warnings && sectionNum) {
1045
+ warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
1046
+ }
1027
1047
  }
1028
- } else if (localTag === "r" || localTag === "run" || localTag === "ctrl") {
1048
+ } else if (localTag === "drawText") {
1049
+ extractDrawTextBlocks(el, blocks, styleMap, sectionNum);
1050
+ } else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
1029
1051
  walkChildren(el, d + 1);
1030
1052
  }
1031
1053
  }
@@ -1033,6 +1055,40 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1033
1055
  walkChildren(node, depth);
1034
1056
  return tableCtx;
1035
1057
  }
1058
+ function findDescendant(node, targetTag, depth = 0) {
1059
+ if (depth > 5) return null;
1060
+ const children = node.childNodes;
1061
+ if (!children) return null;
1062
+ for (let i = 0; i < children.length; i++) {
1063
+ const child = children[i];
1064
+ if (child.nodeType !== 1) continue;
1065
+ const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1066
+ if (tag === targetTag) return child;
1067
+ const found = findDescendant(child, targetTag, depth + 1);
1068
+ if (found) return found;
1069
+ }
1070
+ return null;
1071
+ }
1072
+ function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
1073
+ const children = drawTextNode.childNodes;
1074
+ if (!children) return;
1075
+ for (let i = 0; i < children.length; i++) {
1076
+ const child = children[i];
1077
+ if (child.nodeType !== 1) continue;
1078
+ const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1079
+ if (tag === "subList" || tag === "p" || tag === "para") {
1080
+ if (tag === "subList") {
1081
+ extractDrawTextBlocks(child, blocks, styleMap, sectionNum);
1082
+ } else {
1083
+ const info = extractParagraphInfo(child, styleMap);
1084
+ const text = info.text.trim();
1085
+ if (text) {
1086
+ blocks.push({ type: "paragraph", text, style: info.style ?? void 0, pageNumber: sectionNum });
1087
+ }
1088
+ }
1089
+ }
1090
+ }
1091
+ }
1036
1092
  function extractParagraphInfo(para, styleMap) {
1037
1093
  let text = "";
1038
1094
  let href;
@@ -1051,11 +1107,18 @@ function extractParagraphInfo(para, styleMap) {
1051
1107
  const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1052
1108
  switch (tag) {
1053
1109
  case "t":
1054
- text += child.textContent || "";
1110
+ walk(child);
1055
1111
  break;
1056
- case "tab":
1057
- text += " ";
1112
+ // 자식 순회 (tab 등 하위 요소 처리)
1113
+ case "tab": {
1114
+ const leader = child.getAttribute("leader");
1115
+ if (leader && leader !== "0") {
1116
+ text += "";
1117
+ } else {
1118
+ text += " ";
1119
+ }
1058
1120
  break;
1121
+ }
1059
1122
  case "br":
1060
1123
  if ((child.getAttribute("type") || "line") === "line") text += "\n";
1061
1124
  break;
@@ -1122,6 +1185,8 @@ function extractParagraphInfo(para, styleMap) {
1122
1185
  }
1123
1186
  };
1124
1187
  walk(para);
1188
+ const leaderIdx = text.indexOf("");
1189
+ if (leaderIdx >= 0) text = text.substring(0, leaderIdx);
1125
1190
  let cleanText = text.replace(/[ \t]+/g, " ").trim();
1126
1191
  if (/^그림입니다\.?\s*원본\s*그림의\s*(이름|크기)/.test(cleanText)) cleanText = "";
1127
1192
  cleanText = cleanText.replace(/그림입니다\.?\s*원본\s*그림의\s*(이름|크기)[^\n]*(\n[^\n]*원본\s*그림의\s*(이름|크기)[^\n]*)*/g, "").trim();
@@ -1950,18 +2015,20 @@ function decryptDistributePayload(payload) {
1950
2015
  if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
1951
2016
  const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
1952
2017
  const lcg = new MsvcLcg(seed);
1953
- const result = new Uint8Array(256);
1954
- result[0] = payload[0];
1955
- result[1] = payload[1];
1956
- result[2] = payload[2];
1957
- result[3] = payload[3];
1958
- let i = 4;
2018
+ const result = new Uint8Array(payload.subarray(0, 256));
2019
+ let i = 0;
2020
+ let n = 0;
2021
+ let key = 0;
1959
2022
  while (i < 256) {
1960
- const keyByte = lcg.rand() & 255;
1961
- const n = (lcg.rand() & 15) + 1;
1962
- for (let j = 0; j < n && i < 256; j++, i++) {
1963
- result[i] = payload[i] ^ keyByte;
2023
+ if (n === 0) {
2024
+ key = lcg.rand() & 255;
2025
+ n = (lcg.rand() & 15) + 1;
2026
+ }
2027
+ if (i >= 4) {
2028
+ result[i] ^= key;
1964
2029
  }
2030
+ i++;
2031
+ n--;
1965
2032
  }
1966
2033
  return result;
1967
2034
  }
@@ -1985,7 +2052,7 @@ function parseRecordHeader(data, offset) {
1985
2052
  }
1986
2053
  return { tagId, size, headerSize };
1987
2054
  }
1988
- var TAG_DISTRIBUTE_DOC_DATA = 16 + 28;
2055
+ var TAG_DISTRIBUTE_DOC_DATA = 16 + 12;
1989
2056
  function decryptViewText(viewTextRaw, compressed) {
1990
2057
  const data = new Uint8Array(viewTextRaw);
1991
2058
  const rec = parseRecordHeader(data, 0);
@@ -2590,7 +2657,10 @@ function parseSection(records, docInfo, warnings, sectionNum) {
2590
2657
  if (binId >= 0) {
2591
2658
  blocks.push({ type: "image", text: String(binId), pageNumber: sectionNum });
2592
2659
  } else {
2593
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
2660
+ const boxText = extractTextBoxText(records, i);
2661
+ if (boxText) {
2662
+ blocks.push({ type: "paragraph", text: boxText, pageNumber: sectionNum });
2663
+ }
2594
2664
  }
2595
2665
  } else if (ctrlId === " elo" || ctrlId === "ole ") {
2596
2666
  warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
@@ -2629,6 +2699,19 @@ function extractNoteText(records, ctrlIdx) {
2629
2699
  }
2630
2700
  return texts.length > 0 ? texts.join(" ") : null;
2631
2701
  }
2702
+ function extractTextBoxText(records, ctrlIdx) {
2703
+ const ctrlLevel = records[ctrlIdx].level;
2704
+ const texts = [];
2705
+ for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 200; j++) {
2706
+ const r = records[j];
2707
+ if (r.level <= ctrlLevel) break;
2708
+ if (r.tagId === TAG_PARA_TEXT) {
2709
+ const t = extractText(r.data).trim();
2710
+ if (t) texts.push(t);
2711
+ }
2712
+ }
2713
+ return texts.length > 0 ? texts.join("\n") : null;
2714
+ }
2632
2715
  function extractHyperlinkUrl(data) {
2633
2716
  try {
2634
2717
  const httpSig = Buffer.from("http", "utf16le");