kordoc 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2,11 +2,11 @@
2
2
  import {
3
3
  detectFormat,
4
4
  parse
5
- } from "./chunk-JJ65GKUH.js";
5
+ } from "./chunk-XJYM2AUA.js";
6
6
  import {
7
7
  VERSION,
8
8
  toArrayBuffer
9
- } from "./chunk-L4OFASDS.js";
9
+ } from "./chunk-EVWOJ4T5.js";
10
10
  import "./chunk-MOL7MDBG.js";
11
11
 
12
12
  // src/cli.ts
@@ -92,7 +92,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
92
92
  process.stdout.write(output + "\n");
93
93
  }
94
94
  } catch (err) {
95
- const { sanitizeError } = await import("./utils-4HVKHULU.js");
95
+ const { sanitizeError } = await import("./utils-6JEIFBCJ.js");
96
96
  process.stderr.write(`
97
97
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
98
98
  `);
@@ -101,7 +101,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
101
101
  }
102
102
  });
103
103
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
104
- const { watchDirectory } = await import("./watch-RNZ3KESY.js");
104
+ const { watchDirectory } = await import("./watch-BCPDLGOE.js");
105
105
  await watchDirectory({
106
106
  dir,
107
107
  outDir: opts.outDir,
package/dist/index.cjs CHANGED
@@ -182,7 +182,7 @@ var import_zlib = require("zlib");
182
182
  var import_xmldom = require("@xmldom/xmldom");
183
183
 
184
184
  // src/utils.ts
185
- var VERSION = true ? "2.0.1" : "0.0.0-dev";
185
+ var VERSION = true ? "2.0.2" : "0.0.0-dev";
186
186
  function toArrayBuffer(buf) {
187
187
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
188
188
  return buf.buffer;
@@ -432,8 +432,11 @@ function blocksToMarkdown(blocks) {
432
432
  if (lines.length > 0 && lines[lines.length - 1] !== "") {
433
433
  lines.push("");
434
434
  }
435
- lines.push(tableToMarkdown(block.table));
436
- lines.push("");
435
+ const tableMd = tableToMarkdown(block.table);
436
+ if (tableMd) {
437
+ lines.push(tableMd);
438
+ lines.push("");
439
+ }
437
440
  }
438
441
  }
439
442
  return lines.join("\n").trim();
@@ -443,6 +446,7 @@ function tableToMarkdown(table) {
443
446
  const { cells, rows: numRows, cols: numCols } = table;
444
447
  if (numRows === 1 && numCols === 1) {
445
448
  const content = sanitizeText(cells[0][0].text);
449
+ if (!content) return "";
446
450
  return content.split(/\n/).map((line) => {
447
451
  const trimmed = line.trim();
448
452
  if (!trimmed) return "";
@@ -479,9 +483,9 @@ function tableToMarkdown(table) {
479
483
  const row = display[r];
480
484
  const isEmptyPlaceholder = row.every((cell) => cell === "");
481
485
  if (isEmptyPlaceholder) continue;
482
- const hasSkippedCols = row.some((cell, c) => cell === "" && skip.has(`${r},${c}`));
483
486
  const nonEmptyCols = row.filter((cell) => cell !== "");
484
- if (!hasSkippedCols && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
487
+ const hasSkipInRow = row.some((_, c) => skip.has(`${r},${c}`));
488
+ if (!hasSkipInRow && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
485
489
  pendingFirstCol = row[0];
486
490
  continue;
487
491
  }
@@ -896,7 +900,8 @@ function detectHwpxHeadings(blocks, styleMap) {
896
900
  else if (ratio >= HEADING_RATIO_H2) level = 2;
897
901
  else if (ratio >= HEADING_RATIO_H3) level = 3;
898
902
  }
899
- if (/^제\d+[조장절편]/.test(text) && text.length <= 50) {
903
+ const compactText = text.replace(/\s+/g, "");
904
+ if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
900
905
  if (level === 0) level = 3;
901
906
  }
902
907
  if (level > 0) {
@@ -948,9 +953,14 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
948
953
  if (newTable.rows.length > 0) {
949
954
  if (tableStack.length > 0) {
950
955
  const parentTable = tableStack.pop();
951
- const nestedText = convertTableToText(newTable.rows);
952
- if (parentTable.cell) {
953
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
956
+ const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
957
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
958
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
959
+ } else {
960
+ const nestedText = convertTableToText(newTable.rows);
961
+ if (parentTable.cell) {
962
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
963
+ }
954
964
  }
955
965
  tableCtx = parentTable;
956
966
  } else {
@@ -1050,9 +1060,14 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1050
1060
  if (newTable.rows.length > 0) {
1051
1061
  if (tableStack.length > 0) {
1052
1062
  const parentTable = tableStack.pop();
1053
- const nestedText = convertTableToText(newTable.rows);
1054
- if (parentTable.cell) {
1055
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
1063
+ const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
1064
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
1065
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
1066
+ } else {
1067
+ const nestedText = convertTableToText(newTable.rows);
1068
+ if (parentTable.cell) {
1069
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
1070
+ }
1056
1071
  }
1057
1072
  tableCtx = parentTable;
1058
1073
  } else {
@@ -1063,13 +1078,20 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1063
1078
  tableCtx = tableStack.length > 0 ? tableStack.pop() : null;
1064
1079
  }
1065
1080
  } else if (localTag === "pic" || localTag === "shape" || localTag === "drawingObject") {
1066
- const imgRef = extractImageRef(el);
1067
- if (imgRef) {
1068
- blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
1069
- } else if (warnings && sectionNum) {
1070
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
1081
+ const drawTextChild = findDescendant(el, "drawText");
1082
+ if (drawTextChild) {
1083
+ extractDrawTextBlocks(drawTextChild, blocks, styleMap, sectionNum);
1084
+ } else {
1085
+ const imgRef = extractImageRef(el);
1086
+ if (imgRef) {
1087
+ blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
1088
+ } else if (warnings && sectionNum) {
1089
+ warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
1090
+ }
1071
1091
  }
1072
- } else if (localTag === "r" || localTag === "run" || localTag === "ctrl") {
1092
+ } else if (localTag === "drawText") {
1093
+ extractDrawTextBlocks(el, blocks, styleMap, sectionNum);
1094
+ } else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
1073
1095
  walkChildren(el, d + 1);
1074
1096
  }
1075
1097
  }
@@ -1077,6 +1099,40 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1077
1099
  walkChildren(node, depth);
1078
1100
  return tableCtx;
1079
1101
  }
1102
+ function findDescendant(node, targetTag, depth = 0) {
1103
+ if (depth > 5) return null;
1104
+ const children = node.childNodes;
1105
+ if (!children) return null;
1106
+ for (let i = 0; i < children.length; i++) {
1107
+ const child = children[i];
1108
+ if (child.nodeType !== 1) continue;
1109
+ const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1110
+ if (tag === targetTag) return child;
1111
+ const found = findDescendant(child, targetTag, depth + 1);
1112
+ if (found) return found;
1113
+ }
1114
+ return null;
1115
+ }
1116
+ function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
1117
+ const children = drawTextNode.childNodes;
1118
+ if (!children) return;
1119
+ for (let i = 0; i < children.length; i++) {
1120
+ const child = children[i];
1121
+ if (child.nodeType !== 1) continue;
1122
+ const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1123
+ if (tag === "subList" || tag === "p" || tag === "para") {
1124
+ if (tag === "subList") {
1125
+ extractDrawTextBlocks(child, blocks, styleMap, sectionNum);
1126
+ } else {
1127
+ const info = extractParagraphInfo(child, styleMap);
1128
+ const text = info.text.trim();
1129
+ if (text) {
1130
+ blocks.push({ type: "paragraph", text, style: info.style ?? void 0, pageNumber: sectionNum });
1131
+ }
1132
+ }
1133
+ }
1134
+ }
1135
+ }
1080
1136
  function extractParagraphInfo(para, styleMap) {
1081
1137
  let text = "";
1082
1138
  let href;
@@ -1095,11 +1151,18 @@ function extractParagraphInfo(para, styleMap) {
1095
1151
  const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1096
1152
  switch (tag) {
1097
1153
  case "t":
1098
- text += child.textContent || "";
1154
+ walk(child);
1099
1155
  break;
1100
- case "tab":
1101
- text += " ";
1156
+ // 자식 순회 (tab 등 하위 요소 처리)
1157
+ case "tab": {
1158
+ const leader = child.getAttribute("leader");
1159
+ if (leader && leader !== "0") {
1160
+ text += "";
1161
+ } else {
1162
+ text += " ";
1163
+ }
1102
1164
  break;
1165
+ }
1103
1166
  case "br":
1104
1167
  if ((child.getAttribute("type") || "line") === "line") text += "\n";
1105
1168
  break;
@@ -1166,6 +1229,8 @@ function extractParagraphInfo(para, styleMap) {
1166
1229
  }
1167
1230
  };
1168
1231
  walk(para);
1232
+ const leaderIdx = text.indexOf("");
1233
+ if (leaderIdx >= 0) text = text.substring(0, leaderIdx);
1169
1234
  let cleanText = text.replace(/[ \t]+/g, " ").trim();
1170
1235
  if (/^그림입니다\.?\s*원본\s*그림의\s*(이름|크기)/.test(cleanText)) cleanText = "";
1171
1236
  cleanText = cleanText.replace(/그림입니다\.?\s*원본\s*그림의\s*(이름|크기)[^\n]*(\n[^\n]*원본\s*그림의\s*(이름|크기)[^\n]*)*/g, "").trim();
@@ -2637,7 +2702,10 @@ function parseSection(records, docInfo, warnings, sectionNum) {
2637
2702
  if (binId >= 0) {
2638
2703
  blocks.push({ type: "image", text: String(binId), pageNumber: sectionNum });
2639
2704
  } else {
2640
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
2705
+ const boxText = extractTextBoxText(records, i);
2706
+ if (boxText) {
2707
+ blocks.push({ type: "paragraph", text: boxText, pageNumber: sectionNum });
2708
+ }
2641
2709
  }
2642
2710
  } else if (ctrlId === " elo" || ctrlId === "ole ") {
2643
2711
  warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
@@ -2676,6 +2744,19 @@ function extractNoteText(records, ctrlIdx) {
2676
2744
  }
2677
2745
  return texts.length > 0 ? texts.join(" ") : null;
2678
2746
  }
2747
+ function extractTextBoxText(records, ctrlIdx) {
2748
+ const ctrlLevel = records[ctrlIdx].level;
2749
+ const texts = [];
2750
+ for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 200; j++) {
2751
+ const r = records[j];
2752
+ if (r.level <= ctrlLevel) break;
2753
+ if (r.tagId === TAG_PARA_TEXT) {
2754
+ const t = extractText(r.data).trim();
2755
+ if (t) texts.push(t);
2756
+ }
2757
+ }
2758
+ return texts.length > 0 ? texts.join("\n") : null;
2759
+ }
2679
2760
  function extractHyperlinkUrl(data) {
2680
2761
  try {
2681
2762
  const httpSig = Buffer.from("http", "utf16le");