kordoc 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/{chunk-UMO6QQO5.js → chunk-EVWOJ4T5.js} +2 -2
- package/dist/{chunk-UUHAAZYN.js → chunk-XJYM2AUA.js} +117 -34
- package/dist/chunk-XJYM2AUA.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/index.cjs +116 -33
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +116 -33
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-K23YMTIM.js → utils-6JEIFBCJ.js} +2 -2
- package/dist/{watch-CGG7CCHJ.js → watch-BCPDLGOE.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-UUHAAZYN.js.map +0 -1
- /package/dist/{chunk-UMO6QQO5.js.map → chunk-EVWOJ4T5.js.map} +0 -0
- /package/dist/{utils-K23YMTIM.js.map → utils-6JEIFBCJ.js.map} +0 -0
- /package/dist/{watch-CGG7CCHJ.js.map → watch-BCPDLGOE.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -138,7 +138,7 @@ import { inflateRawSync } from "zlib";
|
|
|
138
138
|
import { DOMParser } from "@xmldom/xmldom";
|
|
139
139
|
|
|
140
140
|
// src/utils.ts
|
|
141
|
-
var VERSION = true ? "2.0.
|
|
141
|
+
var VERSION = true ? "2.0.2" : "0.0.0-dev";
|
|
142
142
|
function toArrayBuffer(buf) {
|
|
143
143
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
144
144
|
return buf.buffer;
|
|
@@ -388,8 +388,11 @@ function blocksToMarkdown(blocks) {
|
|
|
388
388
|
if (lines.length > 0 && lines[lines.length - 1] !== "") {
|
|
389
389
|
lines.push("");
|
|
390
390
|
}
|
|
391
|
-
|
|
392
|
-
|
|
391
|
+
const tableMd = tableToMarkdown(block.table);
|
|
392
|
+
if (tableMd) {
|
|
393
|
+
lines.push(tableMd);
|
|
394
|
+
lines.push("");
|
|
395
|
+
}
|
|
393
396
|
}
|
|
394
397
|
}
|
|
395
398
|
return lines.join("\n").trim();
|
|
@@ -399,6 +402,7 @@ function tableToMarkdown(table) {
|
|
|
399
402
|
const { cells, rows: numRows, cols: numCols } = table;
|
|
400
403
|
if (numRows === 1 && numCols === 1) {
|
|
401
404
|
const content = sanitizeText(cells[0][0].text);
|
|
405
|
+
if (!content) return "";
|
|
402
406
|
return content.split(/\n/).map((line) => {
|
|
403
407
|
const trimmed = line.trim();
|
|
404
408
|
if (!trimmed) return "";
|
|
@@ -435,9 +439,9 @@ function tableToMarkdown(table) {
|
|
|
435
439
|
const row = display[r];
|
|
436
440
|
const isEmptyPlaceholder = row.every((cell) => cell === "");
|
|
437
441
|
if (isEmptyPlaceholder) continue;
|
|
438
|
-
const hasSkippedCols = row.some((cell, c) => cell === "" && skip.has(`${r},${c}`));
|
|
439
442
|
const nonEmptyCols = row.filter((cell) => cell !== "");
|
|
440
|
-
|
|
443
|
+
const hasSkipInRow = row.some((_, c) => skip.has(`${r},${c}`));
|
|
444
|
+
if (!hasSkipInRow && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
|
|
441
445
|
pendingFirstCol = row[0];
|
|
442
446
|
continue;
|
|
443
447
|
}
|
|
@@ -852,7 +856,8 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
852
856
|
else if (ratio >= HEADING_RATIO_H2) level = 2;
|
|
853
857
|
else if (ratio >= HEADING_RATIO_H3) level = 3;
|
|
854
858
|
}
|
|
855
|
-
|
|
859
|
+
const compactText = text.replace(/\s+/g, "");
|
|
860
|
+
if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
|
|
856
861
|
if (level === 0) level = 3;
|
|
857
862
|
}
|
|
858
863
|
if (level > 0) {
|
|
@@ -904,9 +909,14 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
|
|
|
904
909
|
if (newTable.rows.length > 0) {
|
|
905
910
|
if (tableStack.length > 0) {
|
|
906
911
|
const parentTable = tableStack.pop();
|
|
907
|
-
const
|
|
908
|
-
if (
|
|
909
|
-
|
|
912
|
+
const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
|
|
913
|
+
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
914
|
+
blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
|
|
915
|
+
} else {
|
|
916
|
+
const nestedText = convertTableToText(newTable.rows);
|
|
917
|
+
if (parentTable.cell) {
|
|
918
|
+
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
|
|
919
|
+
}
|
|
910
920
|
}
|
|
911
921
|
tableCtx = parentTable;
|
|
912
922
|
} else {
|
|
@@ -1006,9 +1016,14 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
|
|
|
1006
1016
|
if (newTable.rows.length > 0) {
|
|
1007
1017
|
if (tableStack.length > 0) {
|
|
1008
1018
|
const parentTable = tableStack.pop();
|
|
1009
|
-
const
|
|
1010
|
-
if (
|
|
1011
|
-
|
|
1019
|
+
const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
|
|
1020
|
+
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
1021
|
+
blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
|
|
1022
|
+
} else {
|
|
1023
|
+
const nestedText = convertTableToText(newTable.rows);
|
|
1024
|
+
if (parentTable.cell) {
|
|
1025
|
+
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
|
|
1026
|
+
}
|
|
1012
1027
|
}
|
|
1013
1028
|
tableCtx = parentTable;
|
|
1014
1029
|
} else {
|
|
@@ -1019,13 +1034,20 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
|
|
|
1019
1034
|
tableCtx = tableStack.length > 0 ? tableStack.pop() : null;
|
|
1020
1035
|
}
|
|
1021
1036
|
} else if (localTag === "pic" || localTag === "shape" || localTag === "drawingObject") {
|
|
1022
|
-
const
|
|
1023
|
-
if (
|
|
1024
|
-
|
|
1025
|
-
} else
|
|
1026
|
-
|
|
1037
|
+
const drawTextChild = findDescendant(el, "drawText");
|
|
1038
|
+
if (drawTextChild) {
|
|
1039
|
+
extractDrawTextBlocks(drawTextChild, blocks, styleMap, sectionNum);
|
|
1040
|
+
} else {
|
|
1041
|
+
const imgRef = extractImageRef(el);
|
|
1042
|
+
if (imgRef) {
|
|
1043
|
+
blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
|
|
1044
|
+
} else if (warnings && sectionNum) {
|
|
1045
|
+
warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
|
|
1046
|
+
}
|
|
1027
1047
|
}
|
|
1028
|
-
} else if (localTag === "
|
|
1048
|
+
} else if (localTag === "drawText") {
|
|
1049
|
+
extractDrawTextBlocks(el, blocks, styleMap, sectionNum);
|
|
1050
|
+
} else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
|
|
1029
1051
|
walkChildren(el, d + 1);
|
|
1030
1052
|
}
|
|
1031
1053
|
}
|
|
@@ -1033,6 +1055,40 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
|
|
|
1033
1055
|
walkChildren(node, depth);
|
|
1034
1056
|
return tableCtx;
|
|
1035
1057
|
}
|
|
1058
|
+
function findDescendant(node, targetTag, depth = 0) {
|
|
1059
|
+
if (depth > 5) return null;
|
|
1060
|
+
const children = node.childNodes;
|
|
1061
|
+
if (!children) return null;
|
|
1062
|
+
for (let i = 0; i < children.length; i++) {
|
|
1063
|
+
const child = children[i];
|
|
1064
|
+
if (child.nodeType !== 1) continue;
|
|
1065
|
+
const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
|
|
1066
|
+
if (tag === targetTag) return child;
|
|
1067
|
+
const found = findDescendant(child, targetTag, depth + 1);
|
|
1068
|
+
if (found) return found;
|
|
1069
|
+
}
|
|
1070
|
+
return null;
|
|
1071
|
+
}
|
|
1072
|
+
function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
|
|
1073
|
+
const children = drawTextNode.childNodes;
|
|
1074
|
+
if (!children) return;
|
|
1075
|
+
for (let i = 0; i < children.length; i++) {
|
|
1076
|
+
const child = children[i];
|
|
1077
|
+
if (child.nodeType !== 1) continue;
|
|
1078
|
+
const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
|
|
1079
|
+
if (tag === "subList" || tag === "p" || tag === "para") {
|
|
1080
|
+
if (tag === "subList") {
|
|
1081
|
+
extractDrawTextBlocks(child, blocks, styleMap, sectionNum);
|
|
1082
|
+
} else {
|
|
1083
|
+
const info = extractParagraphInfo(child, styleMap);
|
|
1084
|
+
const text = info.text.trim();
|
|
1085
|
+
if (text) {
|
|
1086
|
+
blocks.push({ type: "paragraph", text, style: info.style ?? void 0, pageNumber: sectionNum });
|
|
1087
|
+
}
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1036
1092
|
function extractParagraphInfo(para, styleMap) {
|
|
1037
1093
|
let text = "";
|
|
1038
1094
|
let href;
|
|
@@ -1051,11 +1107,18 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1051
1107
|
const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
|
|
1052
1108
|
switch (tag) {
|
|
1053
1109
|
case "t":
|
|
1054
|
-
|
|
1110
|
+
walk(child);
|
|
1055
1111
|
break;
|
|
1056
|
-
|
|
1057
|
-
|
|
1112
|
+
// 자식 순회 (tab 등 하위 요소 처리)
|
|
1113
|
+
case "tab": {
|
|
1114
|
+
const leader = child.getAttribute("leader");
|
|
1115
|
+
if (leader && leader !== "0") {
|
|
1116
|
+
text += "";
|
|
1117
|
+
} else {
|
|
1118
|
+
text += " ";
|
|
1119
|
+
}
|
|
1058
1120
|
break;
|
|
1121
|
+
}
|
|
1059
1122
|
case "br":
|
|
1060
1123
|
if ((child.getAttribute("type") || "line") === "line") text += "\n";
|
|
1061
1124
|
break;
|
|
@@ -1122,6 +1185,8 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1122
1185
|
}
|
|
1123
1186
|
};
|
|
1124
1187
|
walk(para);
|
|
1188
|
+
const leaderIdx = text.indexOf("");
|
|
1189
|
+
if (leaderIdx >= 0) text = text.substring(0, leaderIdx);
|
|
1125
1190
|
let cleanText = text.replace(/[ \t]+/g, " ").trim();
|
|
1126
1191
|
if (/^그림입니다\.?\s*원본\s*그림의\s*(이름|크기)/.test(cleanText)) cleanText = "";
|
|
1127
1192
|
cleanText = cleanText.replace(/그림입니다\.?\s*원본\s*그림의\s*(이름|크기)[^\n]*(\n[^\n]*원본\s*그림의\s*(이름|크기)[^\n]*)*/g, "").trim();
|
|
@@ -1950,18 +2015,20 @@ function decryptDistributePayload(payload) {
|
|
|
1950
2015
|
if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
|
|
1951
2016
|
const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
|
|
1952
2017
|
const lcg = new MsvcLcg(seed);
|
|
1953
|
-
const result = new Uint8Array(256);
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
result[3] = payload[3];
|
|
1958
|
-
let i = 4;
|
|
2018
|
+
const result = new Uint8Array(payload.subarray(0, 256));
|
|
2019
|
+
let i = 0;
|
|
2020
|
+
let n = 0;
|
|
2021
|
+
let key = 0;
|
|
1959
2022
|
while (i < 256) {
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
2023
|
+
if (n === 0) {
|
|
2024
|
+
key = lcg.rand() & 255;
|
|
2025
|
+
n = (lcg.rand() & 15) + 1;
|
|
2026
|
+
}
|
|
2027
|
+
if (i >= 4) {
|
|
2028
|
+
result[i] ^= key;
|
|
1964
2029
|
}
|
|
2030
|
+
i++;
|
|
2031
|
+
n--;
|
|
1965
2032
|
}
|
|
1966
2033
|
return result;
|
|
1967
2034
|
}
|
|
@@ -1985,7 +2052,7 @@ function parseRecordHeader(data, offset) {
|
|
|
1985
2052
|
}
|
|
1986
2053
|
return { tagId, size, headerSize };
|
|
1987
2054
|
}
|
|
1988
|
-
var TAG_DISTRIBUTE_DOC_DATA = 16 +
|
|
2055
|
+
var TAG_DISTRIBUTE_DOC_DATA = 16 + 12;
|
|
1989
2056
|
function decryptViewText(viewTextRaw, compressed) {
|
|
1990
2057
|
const data = new Uint8Array(viewTextRaw);
|
|
1991
2058
|
const rec = parseRecordHeader(data, 0);
|
|
@@ -2590,7 +2657,10 @@ function parseSection(records, docInfo, warnings, sectionNum) {
|
|
|
2590
2657
|
if (binId >= 0) {
|
|
2591
2658
|
blocks.push({ type: "image", text: String(binId), pageNumber: sectionNum });
|
|
2592
2659
|
} else {
|
|
2593
|
-
|
|
2660
|
+
const boxText = extractTextBoxText(records, i);
|
|
2661
|
+
if (boxText) {
|
|
2662
|
+
blocks.push({ type: "paragraph", text: boxText, pageNumber: sectionNum });
|
|
2663
|
+
}
|
|
2594
2664
|
}
|
|
2595
2665
|
} else if (ctrlId === " elo" || ctrlId === "ole ") {
|
|
2596
2666
|
warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
|
|
@@ -2629,6 +2699,19 @@ function extractNoteText(records, ctrlIdx) {
|
|
|
2629
2699
|
}
|
|
2630
2700
|
return texts.length > 0 ? texts.join(" ") : null;
|
|
2631
2701
|
}
|
|
2702
|
+
function extractTextBoxText(records, ctrlIdx) {
|
|
2703
|
+
const ctrlLevel = records[ctrlIdx].level;
|
|
2704
|
+
const texts = [];
|
|
2705
|
+
for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 200; j++) {
|
|
2706
|
+
const r = records[j];
|
|
2707
|
+
if (r.level <= ctrlLevel) break;
|
|
2708
|
+
if (r.tagId === TAG_PARA_TEXT) {
|
|
2709
|
+
const t = extractText(r.data).trim();
|
|
2710
|
+
if (t) texts.push(t);
|
|
2711
|
+
}
|
|
2712
|
+
}
|
|
2713
|
+
return texts.length > 0 ? texts.join("\n") : null;
|
|
2714
|
+
}
|
|
2632
2715
|
function extractHyperlinkUrl(data) {
|
|
2633
2716
|
try {
|
|
2634
2717
|
const httpSig = Buffer.from("http", "utf16le");
|