kordoc 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2,11 +2,11 @@
2
2
  import {
3
3
  detectFormat,
4
4
  parse
5
- } from "./chunk-UUHAAZYN.js";
5
+ } from "./chunk-XJYM2AUA.js";
6
6
  import {
7
7
  VERSION,
8
8
  toArrayBuffer
9
- } from "./chunk-UMO6QQO5.js";
9
+ } from "./chunk-EVWOJ4T5.js";
10
10
  import "./chunk-MOL7MDBG.js";
11
11
 
12
12
  // src/cli.ts
@@ -92,7 +92,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
92
92
  process.stdout.write(output + "\n");
93
93
  }
94
94
  } catch (err) {
95
- const { sanitizeError } = await import("./utils-K23YMTIM.js");
95
+ const { sanitizeError } = await import("./utils-6JEIFBCJ.js");
96
96
  process.stderr.write(`
97
97
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
98
98
  `);
@@ -101,7 +101,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
101
101
  }
102
102
  });
103
103
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
104
- const { watchDirectory } = await import("./watch-CGG7CCHJ.js");
104
+ const { watchDirectory } = await import("./watch-BCPDLGOE.js");
105
105
  await watchDirectory({
106
106
  dir,
107
107
  outDir: opts.outDir,
package/dist/index.cjs CHANGED
@@ -182,7 +182,7 @@ var import_zlib = require("zlib");
182
182
  var import_xmldom = require("@xmldom/xmldom");
183
183
 
184
184
  // src/utils.ts
185
- var VERSION = true ? "2.0.0" : "0.0.0-dev";
185
+ var VERSION = true ? "2.0.2" : "0.0.0-dev";
186
186
  function toArrayBuffer(buf) {
187
187
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
188
188
  return buf.buffer;
@@ -432,8 +432,11 @@ function blocksToMarkdown(blocks) {
432
432
  if (lines.length > 0 && lines[lines.length - 1] !== "") {
433
433
  lines.push("");
434
434
  }
435
- lines.push(tableToMarkdown(block.table));
436
- lines.push("");
435
+ const tableMd = tableToMarkdown(block.table);
436
+ if (tableMd) {
437
+ lines.push(tableMd);
438
+ lines.push("");
439
+ }
437
440
  }
438
441
  }
439
442
  return lines.join("\n").trim();
@@ -443,6 +446,7 @@ function tableToMarkdown(table) {
443
446
  const { cells, rows: numRows, cols: numCols } = table;
444
447
  if (numRows === 1 && numCols === 1) {
445
448
  const content = sanitizeText(cells[0][0].text);
449
+ if (!content) return "";
446
450
  return content.split(/\n/).map((line) => {
447
451
  const trimmed = line.trim();
448
452
  if (!trimmed) return "";
@@ -479,9 +483,9 @@ function tableToMarkdown(table) {
479
483
  const row = display[r];
480
484
  const isEmptyPlaceholder = row.every((cell) => cell === "");
481
485
  if (isEmptyPlaceholder) continue;
482
- const hasSkippedCols = row.some((cell, c) => cell === "" && skip.has(`${r},${c}`));
483
486
  const nonEmptyCols = row.filter((cell) => cell !== "");
484
- if (!hasSkippedCols && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
487
+ const hasSkipInRow = row.some((_, c) => skip.has(`${r},${c}`));
488
+ if (!hasSkipInRow && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
485
489
  pendingFirstCol = row[0];
486
490
  continue;
487
491
  }
@@ -896,7 +900,8 @@ function detectHwpxHeadings(blocks, styleMap) {
896
900
  else if (ratio >= HEADING_RATIO_H2) level = 2;
897
901
  else if (ratio >= HEADING_RATIO_H3) level = 3;
898
902
  }
899
- if (/^제\d+[조장절편]/.test(text) && text.length <= 50) {
903
+ const compactText = text.replace(/\s+/g, "");
904
+ if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
900
905
  if (level === 0) level = 3;
901
906
  }
902
907
  if (level > 0) {
@@ -948,9 +953,14 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
948
953
  if (newTable.rows.length > 0) {
949
954
  if (tableStack.length > 0) {
950
955
  const parentTable = tableStack.pop();
951
- const nestedText = convertTableToText(newTable.rows);
952
- if (parentTable.cell) {
953
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
956
+ const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
957
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
958
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
959
+ } else {
960
+ const nestedText = convertTableToText(newTable.rows);
961
+ if (parentTable.cell) {
962
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
963
+ }
954
964
  }
955
965
  tableCtx = parentTable;
956
966
  } else {
@@ -1050,9 +1060,14 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1050
1060
  if (newTable.rows.length > 0) {
1051
1061
  if (tableStack.length > 0) {
1052
1062
  const parentTable = tableStack.pop();
1053
- const nestedText = convertTableToText(newTable.rows);
1054
- if (parentTable.cell) {
1055
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
1063
+ const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
1064
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
1065
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
1066
+ } else {
1067
+ const nestedText = convertTableToText(newTable.rows);
1068
+ if (parentTable.cell) {
1069
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
1070
+ }
1056
1071
  }
1057
1072
  tableCtx = parentTable;
1058
1073
  } else {
@@ -1063,13 +1078,20 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1063
1078
  tableCtx = tableStack.length > 0 ? tableStack.pop() : null;
1064
1079
  }
1065
1080
  } else if (localTag === "pic" || localTag === "shape" || localTag === "drawingObject") {
1066
- const imgRef = extractImageRef(el);
1067
- if (imgRef) {
1068
- blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
1069
- } else if (warnings && sectionNum) {
1070
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
1081
+ const drawTextChild = findDescendant(el, "drawText");
1082
+ if (drawTextChild) {
1083
+ extractDrawTextBlocks(drawTextChild, blocks, styleMap, sectionNum);
1084
+ } else {
1085
+ const imgRef = extractImageRef(el);
1086
+ if (imgRef) {
1087
+ blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
1088
+ } else if (warnings && sectionNum) {
1089
+ warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
1090
+ }
1071
1091
  }
1072
- } else if (localTag === "r" || localTag === "run" || localTag === "ctrl") {
1092
+ } else if (localTag === "drawText") {
1093
+ extractDrawTextBlocks(el, blocks, styleMap, sectionNum);
1094
+ } else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
1073
1095
  walkChildren(el, d + 1);
1074
1096
  }
1075
1097
  }
@@ -1077,6 +1099,40 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1077
1099
  walkChildren(node, depth);
1078
1100
  return tableCtx;
1079
1101
  }
1102
+ function findDescendant(node, targetTag, depth = 0) {
1103
+ if (depth > 5) return null;
1104
+ const children = node.childNodes;
1105
+ if (!children) return null;
1106
+ for (let i = 0; i < children.length; i++) {
1107
+ const child = children[i];
1108
+ if (child.nodeType !== 1) continue;
1109
+ const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1110
+ if (tag === targetTag) return child;
1111
+ const found = findDescendant(child, targetTag, depth + 1);
1112
+ if (found) return found;
1113
+ }
1114
+ return null;
1115
+ }
1116
+ function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
1117
+ const children = drawTextNode.childNodes;
1118
+ if (!children) return;
1119
+ for (let i = 0; i < children.length; i++) {
1120
+ const child = children[i];
1121
+ if (child.nodeType !== 1) continue;
1122
+ const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1123
+ if (tag === "subList" || tag === "p" || tag === "para") {
1124
+ if (tag === "subList") {
1125
+ extractDrawTextBlocks(child, blocks, styleMap, sectionNum);
1126
+ } else {
1127
+ const info = extractParagraphInfo(child, styleMap);
1128
+ const text = info.text.trim();
1129
+ if (text) {
1130
+ blocks.push({ type: "paragraph", text, style: info.style ?? void 0, pageNumber: sectionNum });
1131
+ }
1132
+ }
1133
+ }
1134
+ }
1135
+ }
1080
1136
  function extractParagraphInfo(para, styleMap) {
1081
1137
  let text = "";
1082
1138
  let href;
@@ -1095,11 +1151,18 @@ function extractParagraphInfo(para, styleMap) {
1095
1151
  const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1096
1152
  switch (tag) {
1097
1153
  case "t":
1098
- text += child.textContent || "";
1154
+ walk(child);
1099
1155
  break;
1100
- case "tab":
1101
- text += " ";
1156
+ // 자식 순회 (tab 등 하위 요소 처리)
1157
+ case "tab": {
1158
+ const leader = child.getAttribute("leader");
1159
+ if (leader && leader !== "0") {
1160
+ text += "";
1161
+ } else {
1162
+ text += " ";
1163
+ }
1102
1164
  break;
1165
+ }
1103
1166
  case "br":
1104
1167
  if ((child.getAttribute("type") || "line") === "line") text += "\n";
1105
1168
  break;
@@ -1166,6 +1229,8 @@ function extractParagraphInfo(para, styleMap) {
1166
1229
  }
1167
1230
  };
1168
1231
  walk(para);
1232
+ const leaderIdx = text.indexOf("");
1233
+ if (leaderIdx >= 0) text = text.substring(0, leaderIdx);
1169
1234
  let cleanText = text.replace(/[ \t]+/g, " ").trim();
1170
1235
  if (/^그림입니다\.?\s*원본\s*그림의\s*(이름|크기)/.test(cleanText)) cleanText = "";
1171
1236
  cleanText = cleanText.replace(/그림입니다\.?\s*원본\s*그림의\s*(이름|크기)[^\n]*(\n[^\n]*원본\s*그림의\s*(이름|크기)[^\n]*)*/g, "").trim();
@@ -1994,18 +2059,20 @@ function decryptDistributePayload(payload) {
1994
2059
  if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
1995
2060
  const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
1996
2061
  const lcg = new MsvcLcg(seed);
1997
- const result = new Uint8Array(256);
1998
- result[0] = payload[0];
1999
- result[1] = payload[1];
2000
- result[2] = payload[2];
2001
- result[3] = payload[3];
2002
- let i = 4;
2062
+ const result = new Uint8Array(payload.subarray(0, 256));
2063
+ let i = 0;
2064
+ let n = 0;
2065
+ let key = 0;
2003
2066
  while (i < 256) {
2004
- const keyByte = lcg.rand() & 255;
2005
- const n = (lcg.rand() & 15) + 1;
2006
- for (let j = 0; j < n && i < 256; j++, i++) {
2007
- result[i] = payload[i] ^ keyByte;
2067
+ if (n === 0) {
2068
+ key = lcg.rand() & 255;
2069
+ n = (lcg.rand() & 15) + 1;
2070
+ }
2071
+ if (i >= 4) {
2072
+ result[i] ^= key;
2008
2073
  }
2074
+ i++;
2075
+ n--;
2009
2076
  }
2010
2077
  return result;
2011
2078
  }
@@ -2029,7 +2096,7 @@ function parseRecordHeader(data, offset) {
2029
2096
  }
2030
2097
  return { tagId, size, headerSize };
2031
2098
  }
2032
- var TAG_DISTRIBUTE_DOC_DATA = 16 + 28;
2099
+ var TAG_DISTRIBUTE_DOC_DATA = 16 + 12;
2033
2100
  function decryptViewText(viewTextRaw, compressed) {
2034
2101
  const data = new Uint8Array(viewTextRaw);
2035
2102
  const rec = parseRecordHeader(data, 0);
@@ -2635,7 +2702,10 @@ function parseSection(records, docInfo, warnings, sectionNum) {
2635
2702
  if (binId >= 0) {
2636
2703
  blocks.push({ type: "image", text: String(binId), pageNumber: sectionNum });
2637
2704
  } else {
2638
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
2705
+ const boxText = extractTextBoxText(records, i);
2706
+ if (boxText) {
2707
+ blocks.push({ type: "paragraph", text: boxText, pageNumber: sectionNum });
2708
+ }
2639
2709
  }
2640
2710
  } else if (ctrlId === " elo" || ctrlId === "ole ") {
2641
2711
  warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
@@ -2674,6 +2744,19 @@ function extractNoteText(records, ctrlIdx) {
2674
2744
  }
2675
2745
  return texts.length > 0 ? texts.join(" ") : null;
2676
2746
  }
2747
+ function extractTextBoxText(records, ctrlIdx) {
2748
+ const ctrlLevel = records[ctrlIdx].level;
2749
+ const texts = [];
2750
+ for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 200; j++) {
2751
+ const r = records[j];
2752
+ if (r.level <= ctrlLevel) break;
2753
+ if (r.tagId === TAG_PARA_TEXT) {
2754
+ const t = extractText(r.data).trim();
2755
+ if (t) texts.push(t);
2756
+ }
2757
+ }
2758
+ return texts.length > 0 ? texts.join("\n") : null;
2759
+ }
2677
2760
  function extractHyperlinkUrl(data) {
2678
2761
  try {
2679
2762
  const httpSig = Buffer.from("http", "utf16le");