kordoc 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/{chunk-UMO6QQO5.js → chunk-EVWOJ4T5.js} +2 -2
- package/dist/{chunk-UUHAAZYN.js → chunk-XJYM2AUA.js} +117 -34
- package/dist/chunk-XJYM2AUA.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/index.cjs +116 -33
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +116 -33
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-K23YMTIM.js → utils-6JEIFBCJ.js} +2 -2
- package/dist/{watch-CGG7CCHJ.js → watch-BCPDLGOE.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-UUHAAZYN.js.map +0 -1
- /package/dist/{chunk-UMO6QQO5.js.map → chunk-EVWOJ4T5.js.map} +0 -0
- /package/dist/{utils-K23YMTIM.js.map → utils-6JEIFBCJ.js.map} +0 -0
- /package/dist/{watch-CGG7CCHJ.js.map → watch-BCPDLGOE.js.map} +0 -0
package/dist/cli.js
CHANGED
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
import {
|
|
3
3
|
detectFormat,
|
|
4
4
|
parse
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-XJYM2AUA.js";
|
|
6
6
|
import {
|
|
7
7
|
VERSION,
|
|
8
8
|
toArrayBuffer
|
|
9
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-EVWOJ4T5.js";
|
|
10
10
|
import "./chunk-MOL7MDBG.js";
|
|
11
11
|
|
|
12
12
|
// src/cli.ts
|
|
@@ -92,7 +92,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
|
|
|
92
92
|
process.stdout.write(output + "\n");
|
|
93
93
|
}
|
|
94
94
|
} catch (err) {
|
|
95
|
-
const { sanitizeError } = await import("./utils-
|
|
95
|
+
const { sanitizeError } = await import("./utils-6JEIFBCJ.js");
|
|
96
96
|
process.stderr.write(`
|
|
97
97
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
98
98
|
`);
|
|
@@ -101,7 +101,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
|
|
|
101
101
|
}
|
|
102
102
|
});
|
|
103
103
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
104
|
-
const { watchDirectory } = await import("./watch-
|
|
104
|
+
const { watchDirectory } = await import("./watch-BCPDLGOE.js");
|
|
105
105
|
await watchDirectory({
|
|
106
106
|
dir,
|
|
107
107
|
outDir: opts.outDir,
|
package/dist/index.cjs
CHANGED
|
@@ -182,7 +182,7 @@ var import_zlib = require("zlib");
|
|
|
182
182
|
var import_xmldom = require("@xmldom/xmldom");
|
|
183
183
|
|
|
184
184
|
// src/utils.ts
|
|
185
|
-
var VERSION = true ? "2.0.
|
|
185
|
+
var VERSION = true ? "2.0.2" : "0.0.0-dev";
|
|
186
186
|
function toArrayBuffer(buf) {
|
|
187
187
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
188
188
|
return buf.buffer;
|
|
@@ -432,8 +432,11 @@ function blocksToMarkdown(blocks) {
|
|
|
432
432
|
if (lines.length > 0 && lines[lines.length - 1] !== "") {
|
|
433
433
|
lines.push("");
|
|
434
434
|
}
|
|
435
|
-
|
|
436
|
-
|
|
435
|
+
const tableMd = tableToMarkdown(block.table);
|
|
436
|
+
if (tableMd) {
|
|
437
|
+
lines.push(tableMd);
|
|
438
|
+
lines.push("");
|
|
439
|
+
}
|
|
437
440
|
}
|
|
438
441
|
}
|
|
439
442
|
return lines.join("\n").trim();
|
|
@@ -443,6 +446,7 @@ function tableToMarkdown(table) {
|
|
|
443
446
|
const { cells, rows: numRows, cols: numCols } = table;
|
|
444
447
|
if (numRows === 1 && numCols === 1) {
|
|
445
448
|
const content = sanitizeText(cells[0][0].text);
|
|
449
|
+
if (!content) return "";
|
|
446
450
|
return content.split(/\n/).map((line) => {
|
|
447
451
|
const trimmed = line.trim();
|
|
448
452
|
if (!trimmed) return "";
|
|
@@ -479,9 +483,9 @@ function tableToMarkdown(table) {
|
|
|
479
483
|
const row = display[r];
|
|
480
484
|
const isEmptyPlaceholder = row.every((cell) => cell === "");
|
|
481
485
|
if (isEmptyPlaceholder) continue;
|
|
482
|
-
const hasSkippedCols = row.some((cell, c) => cell === "" && skip.has(`${r},${c}`));
|
|
483
486
|
const nonEmptyCols = row.filter((cell) => cell !== "");
|
|
484
|
-
|
|
487
|
+
const hasSkipInRow = row.some((_, c) => skip.has(`${r},${c}`));
|
|
488
|
+
if (!hasSkipInRow && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
|
|
485
489
|
pendingFirstCol = row[0];
|
|
486
490
|
continue;
|
|
487
491
|
}
|
|
@@ -896,7 +900,8 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
896
900
|
else if (ratio >= HEADING_RATIO_H2) level = 2;
|
|
897
901
|
else if (ratio >= HEADING_RATIO_H3) level = 3;
|
|
898
902
|
}
|
|
899
|
-
|
|
903
|
+
const compactText = text.replace(/\s+/g, "");
|
|
904
|
+
if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
|
|
900
905
|
if (level === 0) level = 3;
|
|
901
906
|
}
|
|
902
907
|
if (level > 0) {
|
|
@@ -948,9 +953,14 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
|
|
|
948
953
|
if (newTable.rows.length > 0) {
|
|
949
954
|
if (tableStack.length > 0) {
|
|
950
955
|
const parentTable = tableStack.pop();
|
|
951
|
-
const
|
|
952
|
-
if (
|
|
953
|
-
|
|
956
|
+
const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
|
|
957
|
+
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
958
|
+
blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
|
|
959
|
+
} else {
|
|
960
|
+
const nestedText = convertTableToText(newTable.rows);
|
|
961
|
+
if (parentTable.cell) {
|
|
962
|
+
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
|
|
963
|
+
}
|
|
954
964
|
}
|
|
955
965
|
tableCtx = parentTable;
|
|
956
966
|
} else {
|
|
@@ -1050,9 +1060,14 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
|
|
|
1050
1060
|
if (newTable.rows.length > 0) {
|
|
1051
1061
|
if (tableStack.length > 0) {
|
|
1052
1062
|
const parentTable = tableStack.pop();
|
|
1053
|
-
const
|
|
1054
|
-
if (
|
|
1055
|
-
|
|
1063
|
+
const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
|
|
1064
|
+
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
1065
|
+
blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
|
|
1066
|
+
} else {
|
|
1067
|
+
const nestedText = convertTableToText(newTable.rows);
|
|
1068
|
+
if (parentTable.cell) {
|
|
1069
|
+
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
|
|
1070
|
+
}
|
|
1056
1071
|
}
|
|
1057
1072
|
tableCtx = parentTable;
|
|
1058
1073
|
} else {
|
|
@@ -1063,13 +1078,20 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
|
|
|
1063
1078
|
tableCtx = tableStack.length > 0 ? tableStack.pop() : null;
|
|
1064
1079
|
}
|
|
1065
1080
|
} else if (localTag === "pic" || localTag === "shape" || localTag === "drawingObject") {
|
|
1066
|
-
const
|
|
1067
|
-
if (
|
|
1068
|
-
|
|
1069
|
-
} else
|
|
1070
|
-
|
|
1081
|
+
const drawTextChild = findDescendant(el, "drawText");
|
|
1082
|
+
if (drawTextChild) {
|
|
1083
|
+
extractDrawTextBlocks(drawTextChild, blocks, styleMap, sectionNum);
|
|
1084
|
+
} else {
|
|
1085
|
+
const imgRef = extractImageRef(el);
|
|
1086
|
+
if (imgRef) {
|
|
1087
|
+
blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
|
|
1088
|
+
} else if (warnings && sectionNum) {
|
|
1089
|
+
warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
|
|
1090
|
+
}
|
|
1071
1091
|
}
|
|
1072
|
-
} else if (localTag === "
|
|
1092
|
+
} else if (localTag === "drawText") {
|
|
1093
|
+
extractDrawTextBlocks(el, blocks, styleMap, sectionNum);
|
|
1094
|
+
} else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
|
|
1073
1095
|
walkChildren(el, d + 1);
|
|
1074
1096
|
}
|
|
1075
1097
|
}
|
|
@@ -1077,6 +1099,40 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
|
|
|
1077
1099
|
walkChildren(node, depth);
|
|
1078
1100
|
return tableCtx;
|
|
1079
1101
|
}
|
|
1102
|
+
function findDescendant(node, targetTag, depth = 0) {
|
|
1103
|
+
if (depth > 5) return null;
|
|
1104
|
+
const children = node.childNodes;
|
|
1105
|
+
if (!children) return null;
|
|
1106
|
+
for (let i = 0; i < children.length; i++) {
|
|
1107
|
+
const child = children[i];
|
|
1108
|
+
if (child.nodeType !== 1) continue;
|
|
1109
|
+
const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
|
|
1110
|
+
if (tag === targetTag) return child;
|
|
1111
|
+
const found = findDescendant(child, targetTag, depth + 1);
|
|
1112
|
+
if (found) return found;
|
|
1113
|
+
}
|
|
1114
|
+
return null;
|
|
1115
|
+
}
|
|
1116
|
+
function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
|
|
1117
|
+
const children = drawTextNode.childNodes;
|
|
1118
|
+
if (!children) return;
|
|
1119
|
+
for (let i = 0; i < children.length; i++) {
|
|
1120
|
+
const child = children[i];
|
|
1121
|
+
if (child.nodeType !== 1) continue;
|
|
1122
|
+
const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
|
|
1123
|
+
if (tag === "subList" || tag === "p" || tag === "para") {
|
|
1124
|
+
if (tag === "subList") {
|
|
1125
|
+
extractDrawTextBlocks(child, blocks, styleMap, sectionNum);
|
|
1126
|
+
} else {
|
|
1127
|
+
const info = extractParagraphInfo(child, styleMap);
|
|
1128
|
+
const text = info.text.trim();
|
|
1129
|
+
if (text) {
|
|
1130
|
+
blocks.push({ type: "paragraph", text, style: info.style ?? void 0, pageNumber: sectionNum });
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1080
1136
|
function extractParagraphInfo(para, styleMap) {
|
|
1081
1137
|
let text = "";
|
|
1082
1138
|
let href;
|
|
@@ -1095,11 +1151,18 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1095
1151
|
const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
|
|
1096
1152
|
switch (tag) {
|
|
1097
1153
|
case "t":
|
|
1098
|
-
|
|
1154
|
+
walk(child);
|
|
1099
1155
|
break;
|
|
1100
|
-
|
|
1101
|
-
|
|
1156
|
+
// 자식 순회 (tab 등 하위 요소 처리)
|
|
1157
|
+
case "tab": {
|
|
1158
|
+
const leader = child.getAttribute("leader");
|
|
1159
|
+
if (leader && leader !== "0") {
|
|
1160
|
+
text += "";
|
|
1161
|
+
} else {
|
|
1162
|
+
text += " ";
|
|
1163
|
+
}
|
|
1102
1164
|
break;
|
|
1165
|
+
}
|
|
1103
1166
|
case "br":
|
|
1104
1167
|
if ((child.getAttribute("type") || "line") === "line") text += "\n";
|
|
1105
1168
|
break;
|
|
@@ -1166,6 +1229,8 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1166
1229
|
}
|
|
1167
1230
|
};
|
|
1168
1231
|
walk(para);
|
|
1232
|
+
const leaderIdx = text.indexOf("");
|
|
1233
|
+
if (leaderIdx >= 0) text = text.substring(0, leaderIdx);
|
|
1169
1234
|
let cleanText = text.replace(/[ \t]+/g, " ").trim();
|
|
1170
1235
|
if (/^그림입니다\.?\s*원본\s*그림의\s*(이름|크기)/.test(cleanText)) cleanText = "";
|
|
1171
1236
|
cleanText = cleanText.replace(/그림입니다\.?\s*원본\s*그림의\s*(이름|크기)[^\n]*(\n[^\n]*원본\s*그림의\s*(이름|크기)[^\n]*)*/g, "").trim();
|
|
@@ -1994,18 +2059,20 @@ function decryptDistributePayload(payload) {
|
|
|
1994
2059
|
if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
|
|
1995
2060
|
const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
|
|
1996
2061
|
const lcg = new MsvcLcg(seed);
|
|
1997
|
-
const result = new Uint8Array(256);
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
result[3] = payload[3];
|
|
2002
|
-
let i = 4;
|
|
2062
|
+
const result = new Uint8Array(payload.subarray(0, 256));
|
|
2063
|
+
let i = 0;
|
|
2064
|
+
let n = 0;
|
|
2065
|
+
let key = 0;
|
|
2003
2066
|
while (i < 256) {
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2067
|
+
if (n === 0) {
|
|
2068
|
+
key = lcg.rand() & 255;
|
|
2069
|
+
n = (lcg.rand() & 15) + 1;
|
|
2070
|
+
}
|
|
2071
|
+
if (i >= 4) {
|
|
2072
|
+
result[i] ^= key;
|
|
2008
2073
|
}
|
|
2074
|
+
i++;
|
|
2075
|
+
n--;
|
|
2009
2076
|
}
|
|
2010
2077
|
return result;
|
|
2011
2078
|
}
|
|
@@ -2029,7 +2096,7 @@ function parseRecordHeader(data, offset) {
|
|
|
2029
2096
|
}
|
|
2030
2097
|
return { tagId, size, headerSize };
|
|
2031
2098
|
}
|
|
2032
|
-
var TAG_DISTRIBUTE_DOC_DATA = 16 +
|
|
2099
|
+
var TAG_DISTRIBUTE_DOC_DATA = 16 + 12;
|
|
2033
2100
|
function decryptViewText(viewTextRaw, compressed) {
|
|
2034
2101
|
const data = new Uint8Array(viewTextRaw);
|
|
2035
2102
|
const rec = parseRecordHeader(data, 0);
|
|
@@ -2635,7 +2702,10 @@ function parseSection(records, docInfo, warnings, sectionNum) {
|
|
|
2635
2702
|
if (binId >= 0) {
|
|
2636
2703
|
blocks.push({ type: "image", text: String(binId), pageNumber: sectionNum });
|
|
2637
2704
|
} else {
|
|
2638
|
-
|
|
2705
|
+
const boxText = extractTextBoxText(records, i);
|
|
2706
|
+
if (boxText) {
|
|
2707
|
+
blocks.push({ type: "paragraph", text: boxText, pageNumber: sectionNum });
|
|
2708
|
+
}
|
|
2639
2709
|
}
|
|
2640
2710
|
} else if (ctrlId === " elo" || ctrlId === "ole ") {
|
|
2641
2711
|
warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
|
|
@@ -2674,6 +2744,19 @@ function extractNoteText(records, ctrlIdx) {
|
|
|
2674
2744
|
}
|
|
2675
2745
|
return texts.length > 0 ? texts.join(" ") : null;
|
|
2676
2746
|
}
|
|
2747
|
+
function extractTextBoxText(records, ctrlIdx) {
|
|
2748
|
+
const ctrlLevel = records[ctrlIdx].level;
|
|
2749
|
+
const texts = [];
|
|
2750
|
+
for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 200; j++) {
|
|
2751
|
+
const r = records[j];
|
|
2752
|
+
if (r.level <= ctrlLevel) break;
|
|
2753
|
+
if (r.tagId === TAG_PARA_TEXT) {
|
|
2754
|
+
const t = extractText(r.data).trim();
|
|
2755
|
+
if (t) texts.push(t);
|
|
2756
|
+
}
|
|
2757
|
+
}
|
|
2758
|
+
return texts.length > 0 ? texts.join("\n") : null;
|
|
2759
|
+
}
|
|
2677
2760
|
function extractHyperlinkUrl(data) {
|
|
2678
2761
|
try {
|
|
2679
2762
|
const httpSig = Buffer.from("http", "utf16le");
|