kordoc 2.9.1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -0
- package/dist/-K5SLEFZD.js +71 -0
- package/dist/-K5SLEFZD.js.map +1 -0
- package/dist/{chunk-GQQNAYZA.js → chunk-326STEDU.js} +6684 -4061
- package/dist/chunk-326STEDU.js.map +1 -0
- package/dist/{chunk-FWAXCTSX.cjs → chunk-3WRJQQIO.cjs} +185 -16
- package/dist/chunk-3WRJQQIO.cjs.map +1 -0
- package/dist/chunk-MUOQXDZ4.cjs.map +1 -1
- package/dist/{chunk-Z6TLTWYK.js → chunk-NHXKJWR7.js} +182 -13
- package/dist/chunk-NHXKJWR7.js.map +1 -0
- package/dist/{chunk-ODF24QXC.js → chunk-SA2PERJ5.js} +182 -13
- package/dist/chunk-SA2PERJ5.js.map +1 -0
- package/dist/cli.js +42 -3
- package/dist/cli.js.map +1 -1
- package/dist/formula-XGG6ZP42.cjs.map +1 -1
- package/dist/index.cjs +3247 -822
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +61 -2
- package/dist/index.d.ts +61 -2
- package/dist/index.js +3025 -600
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +3 -3
- package/dist/page-range-3C7UGGEK.cjs.map +1 -1
- package/dist/{parser-BKYM3LKN.js → parser-4IVYHKSL.js} +677 -85
- package/dist/parser-4IVYHKSL.js.map +1 -0
- package/dist/{parser-BTIPAEDZ.cjs → parser-5KHU732L.cjs} +689 -97
- package/dist/parser-5KHU732L.cjs.map +1 -0
- package/dist/{parser-FJNQEW7K.js → parser-AU2NLC44.js} +677 -85
- package/dist/parser-AU2NLC44.js.map +1 -0
- package/dist/provider-SNONEZNW.cjs.map +1 -1
- package/dist/{watch-SBLSWHL7.js → watch-5DDN4BUI.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-FWAXCTSX.cjs.map +0 -1
- package/dist/chunk-GQQNAYZA.js.map +0 -1
- package/dist/chunk-ODF24QXC.js.map +0 -1
- package/dist/chunk-Z6TLTWYK.js.map +0 -1
- package/dist/parser-BKYM3LKN.js.map +0 -1
- package/dist/parser-BTIPAEDZ.cjs.map +0 -1
- package/dist/parser-FJNQEW7K.js.map +0 -1
- /package/dist/{watch-SBLSWHL7.js.map → watch-5DDN4BUI.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -12,11 +12,12 @@ import {
|
|
|
12
12
|
convertTableToText,
|
|
13
13
|
flattenLayoutTables,
|
|
14
14
|
isPathTraversal,
|
|
15
|
+
mapPuaText,
|
|
15
16
|
precheckZipSize,
|
|
16
17
|
sanitizeHref,
|
|
17
18
|
stripDtd,
|
|
18
19
|
toArrayBuffer
|
|
19
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-NHXKJWR7.js";
|
|
20
21
|
import {
|
|
21
22
|
parsePageRange
|
|
22
23
|
} from "./chunk-SBVRCJFH.js";
|
|
@@ -818,6 +819,9 @@ function clampSpan(val, max) {
|
|
|
818
819
|
return Math.max(1, Math.min(val, max));
|
|
819
820
|
}
|
|
820
821
|
var MAX_XML_DEPTH = 200;
|
|
822
|
+
function createSectionShared() {
|
|
823
|
+
return { numState: /* @__PURE__ */ new Map(), pageText: { headers: [], footers: [] }, track: { deleteDepth: 0, warned: false } };
|
|
824
|
+
}
|
|
821
825
|
function createXmlParser(warnings) {
|
|
822
826
|
return new DOMParser({
|
|
823
827
|
onError(level, msg) {
|
|
@@ -829,7 +833,10 @@ function createXmlParser(warnings) {
|
|
|
829
833
|
async function extractHwpxStyles(zip, decompressed) {
|
|
830
834
|
const result = {
|
|
831
835
|
charProperties: /* @__PURE__ */ new Map(),
|
|
832
|
-
styles: /* @__PURE__ */ new Map()
|
|
836
|
+
styles: /* @__PURE__ */ new Map(),
|
|
837
|
+
numberings: /* @__PURE__ */ new Map(),
|
|
838
|
+
bullets: /* @__PURE__ */ new Map(),
|
|
839
|
+
paraHeadings: /* @__PURE__ */ new Map()
|
|
833
840
|
};
|
|
834
841
|
const headerPaths = ["Contents/header.xml", "header.xml", "Contents/head.xml", "head.xml"];
|
|
835
842
|
for (const hp of headerPaths) {
|
|
@@ -847,6 +854,10 @@ async function extractHwpxStyles(zip, decompressed) {
|
|
|
847
854
|
if (!doc.documentElement) continue;
|
|
848
855
|
parseCharProperties(doc, result.charProperties);
|
|
849
856
|
parseStyleElements(doc, result.styles);
|
|
857
|
+
const domDoc = doc;
|
|
858
|
+
parseNumberings(domDoc, result.numberings);
|
|
859
|
+
parseBullets(domDoc, result.bullets);
|
|
860
|
+
parseParaHeadings(domDoc, result.paraHeadings);
|
|
850
861
|
break;
|
|
851
862
|
} catch {
|
|
852
863
|
continue;
|
|
@@ -904,6 +915,162 @@ function parseStyleElements(doc, map) {
|
|
|
904
915
|
}
|
|
905
916
|
}
|
|
906
917
|
}
|
|
918
|
+
function parseNumberings(doc, map) {
|
|
919
|
+
const tagNames = ["hh:numbering", "numbering"];
|
|
920
|
+
for (const tagName of tagNames) {
|
|
921
|
+
const elements = doc.getElementsByTagName(tagName);
|
|
922
|
+
for (let i = 0; i < elements.length; i++) {
|
|
923
|
+
const el = elements[i];
|
|
924
|
+
const id = el.getAttribute("id") || "";
|
|
925
|
+
if (!id) continue;
|
|
926
|
+
const def = { heads: /* @__PURE__ */ new Map() };
|
|
927
|
+
const children = el.childNodes;
|
|
928
|
+
for (let j = 0; j < children.length; j++) {
|
|
929
|
+
const ch = children[j];
|
|
930
|
+
if (ch.nodeType !== 1) continue;
|
|
931
|
+
const tag = (ch.tagName || ch.localName || "").replace(/^[^:]+:/, "");
|
|
932
|
+
if (tag !== "paraHead") continue;
|
|
933
|
+
const level = parseInt(ch.getAttribute("level") || "", 10);
|
|
934
|
+
if (isNaN(level) || level < 1 || level > 10) continue;
|
|
935
|
+
const start = parseInt(ch.getAttribute("start") || "1", 10);
|
|
936
|
+
def.heads.set(level, {
|
|
937
|
+
numFormat: ch.getAttribute("numFormat") || "DIGIT",
|
|
938
|
+
text: ch.textContent || "",
|
|
939
|
+
start: isNaN(start) ? 1 : start
|
|
940
|
+
});
|
|
941
|
+
}
|
|
942
|
+
if (def.heads.size > 0) map.set(id, def);
|
|
943
|
+
}
|
|
944
|
+
if (map.size > 0) break;
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
function parseBullets(doc, map) {
|
|
948
|
+
const tagNames = ["hh:bullet", "bullet"];
|
|
949
|
+
for (const tagName of tagNames) {
|
|
950
|
+
const elements = doc.getElementsByTagName(tagName);
|
|
951
|
+
for (let i = 0; i < elements.length; i++) {
|
|
952
|
+
const el = elements[i];
|
|
953
|
+
const id = el.getAttribute("id") || "";
|
|
954
|
+
const char = el.getAttribute("char") || "";
|
|
955
|
+
if (id && char) map.set(id, char);
|
|
956
|
+
}
|
|
957
|
+
if (map.size > 0) break;
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
function parseParaHeadings(doc, map) {
|
|
961
|
+
const tagNames = ["hh:paraPr", "paraPr"];
|
|
962
|
+
for (const tagName of tagNames) {
|
|
963
|
+
const elements = doc.getElementsByTagName(tagName);
|
|
964
|
+
for (let i = 0; i < elements.length; i++) {
|
|
965
|
+
const el = elements[i];
|
|
966
|
+
const id = el.getAttribute("id") || "";
|
|
967
|
+
if (!id) continue;
|
|
968
|
+
const heading = findChildByLocalName(el, "heading");
|
|
969
|
+
if (!heading) continue;
|
|
970
|
+
const type = heading.getAttribute("type") || "NONE";
|
|
971
|
+
if (type !== "NUMBER" && type !== "BULLET" && type !== "OUTLINE") continue;
|
|
972
|
+
const level = parseInt(heading.getAttribute("level") || "0", 10);
|
|
973
|
+
map.set(id, {
|
|
974
|
+
type,
|
|
975
|
+
idRef: heading.getAttribute("idRef") || "0",
|
|
976
|
+
level: isNaN(level) ? 0 : Math.max(0, Math.min(level, 9))
|
|
977
|
+
});
|
|
978
|
+
}
|
|
979
|
+
if (map.size > 0) break;
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
var HANGUL_SYLLABLE_SEQ = "\uAC00\uB098\uB2E4\uB77C\uB9C8\uBC14\uC0AC\uC544\uC790\uCC28\uCE74\uD0C0\uD30C\uD558";
|
|
983
|
+
var HANGUL_JAMO_SEQ = "\u3131\u3134\u3137\u3139\u3141\u3142\u3145\u3147\u3148\u314A\u314B\u314C\u314D\u314E";
|
|
984
|
+
function toRoman(n) {
|
|
985
|
+
if (n <= 0 || n >= 4e3) return String(n);
|
|
986
|
+
const table = [
|
|
987
|
+
[1e3, "M"],
|
|
988
|
+
[900, "CM"],
|
|
989
|
+
[500, "D"],
|
|
990
|
+
[400, "CD"],
|
|
991
|
+
[100, "C"],
|
|
992
|
+
[90, "XC"],
|
|
993
|
+
[50, "L"],
|
|
994
|
+
[40, "XL"],
|
|
995
|
+
[10, "X"],
|
|
996
|
+
[9, "IX"],
|
|
997
|
+
[5, "V"],
|
|
998
|
+
[4, "IV"],
|
|
999
|
+
[1, "I"]
|
|
1000
|
+
];
|
|
1001
|
+
let out = "";
|
|
1002
|
+
for (const [v, s] of table) {
|
|
1003
|
+
while (n >= v) {
|
|
1004
|
+
out += s;
|
|
1005
|
+
n -= v;
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
return out;
|
|
1009
|
+
}
|
|
1010
|
+
function formatHeadNumber(n, numFormat) {
|
|
1011
|
+
if (n <= 0) n = 1;
|
|
1012
|
+
switch (numFormat) {
|
|
1013
|
+
case "DIGIT":
|
|
1014
|
+
return String(n);
|
|
1015
|
+
case "CIRCLED_DIGIT":
|
|
1016
|
+
return n <= 20 ? String.fromCodePoint(9312 + n - 1) : `(${n})`;
|
|
1017
|
+
case "HANGUL_SYLLABLE":
|
|
1018
|
+
return HANGUL_SYLLABLE_SEQ[(n - 1) % HANGUL_SYLLABLE_SEQ.length];
|
|
1019
|
+
case "CIRCLED_HANGUL_SYLLABLE":
|
|
1020
|
+
return n <= 14 ? String.fromCodePoint(12910 + n - 1) : HANGUL_SYLLABLE_SEQ[(n - 1) % 14];
|
|
1021
|
+
case "HANGUL_JAMO":
|
|
1022
|
+
return HANGUL_JAMO_SEQ[(n - 1) % HANGUL_JAMO_SEQ.length];
|
|
1023
|
+
case "CIRCLED_HANGUL_JAMO":
|
|
1024
|
+
return n <= 14 ? String.fromCodePoint(12896 + n - 1) : HANGUL_JAMO_SEQ[(n - 1) % 14];
|
|
1025
|
+
case "LATIN_CAPITAL":
|
|
1026
|
+
return String.fromCharCode(65 + (n - 1) % 26);
|
|
1027
|
+
case "LATIN_SMALL":
|
|
1028
|
+
return String.fromCharCode(97 + (n - 1) % 26);
|
|
1029
|
+
case "CIRCLED_LATIN_CAPITAL":
|
|
1030
|
+
return n <= 26 ? String.fromCodePoint(9398 + n - 1) : String.fromCharCode(65 + (n - 1) % 26);
|
|
1031
|
+
case "CIRCLED_LATIN_SMALL":
|
|
1032
|
+
return n <= 26 ? String.fromCodePoint(9424 + n - 1) : String.fromCharCode(97 + (n - 1) % 26);
|
|
1033
|
+
case "ROMAN_CAPITAL":
|
|
1034
|
+
return toRoman(n);
|
|
1035
|
+
case "ROMAN_SMALL":
|
|
1036
|
+
return toRoman(n).toLowerCase();
|
|
1037
|
+
default:
|
|
1038
|
+
return String(n);
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
function resolveParaHeading(paraEl, ctx) {
|
|
1042
|
+
const sm = ctx.styleMap;
|
|
1043
|
+
if (!sm) return null;
|
|
1044
|
+
const prId = paraEl.getAttribute("paraPrIDRef");
|
|
1045
|
+
if (!prId) return null;
|
|
1046
|
+
const ref = sm.paraHeadings.get(prId);
|
|
1047
|
+
if (!ref) return null;
|
|
1048
|
+
if (ref.type === "BULLET") {
|
|
1049
|
+
const char = sm.bullets.get(ref.idRef);
|
|
1050
|
+
return char ? { prefix: char } : null;
|
|
1051
|
+
}
|
|
1052
|
+
const numId = ref.type === "OUTLINE" ? ctx.outlineNumId || "1" : ref.idRef;
|
|
1053
|
+
const level = Math.min(ref.level + 1, 10);
|
|
1054
|
+
const headingLevel = ref.type === "OUTLINE" ? Math.min(ref.level + 1, 6) : void 0;
|
|
1055
|
+
const numDef = sm.numberings.get(numId);
|
|
1056
|
+
if (!numDef) return headingLevel ? { headingLevel } : null;
|
|
1057
|
+
let counters = ctx.shared.numState.get(numId);
|
|
1058
|
+
if (!counters) {
|
|
1059
|
+
counters = new Array(11).fill(0);
|
|
1060
|
+
ctx.shared.numState.set(numId, counters);
|
|
1061
|
+
}
|
|
1062
|
+
const head = numDef.heads.get(level);
|
|
1063
|
+
counters[level] = counters[level] === 0 ? head?.start ?? 1 : counters[level] + 1;
|
|
1064
|
+
for (let l = level + 1; l <= 10; l++) counters[l] = 0;
|
|
1065
|
+
const fmtText = head?.text?.trim() || `^${level}.`;
|
|
1066
|
+
const prefix = fmtText.replace(/\^(10|[1-9])/g, (_, d) => {
|
|
1067
|
+
const lv = parseInt(d, 10);
|
|
1068
|
+
const refHead = numDef.heads.get(lv);
|
|
1069
|
+
const n = counters[lv] || refHead?.start || 1;
|
|
1070
|
+
return formatHeadNumber(n, refHead?.numFormat || "DIGIT");
|
|
1071
|
+
});
|
|
1072
|
+
return { prefix, headingLevel };
|
|
1073
|
+
}
|
|
907
1074
|
async function parseHwpxDocument(buffer, options) {
|
|
908
1075
|
precheckZipSize(buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
|
|
909
1076
|
let zip;
|
|
@@ -940,7 +1107,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
940
1107
|
const pageFilter = options?.pages ? parsePageRange(options.pages, sectionPaths.length) : null;
|
|
941
1108
|
const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
|
|
942
1109
|
const blocks = [];
|
|
943
|
-
const
|
|
1110
|
+
const shared = createSectionShared();
|
|
944
1111
|
let parsedSections = 0;
|
|
945
1112
|
for (let si = 0; si < sectionPaths.length; si++) {
|
|
946
1113
|
if (pageFilter && !pageFilter.has(si + 1)) continue;
|
|
@@ -950,7 +1117,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
950
1117
|
const xml = await file.async("text");
|
|
951
1118
|
decompressed.total += xml.length * 2;
|
|
952
1119
|
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new KordocError("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
953
|
-
blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1,
|
|
1120
|
+
blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, shared));
|
|
954
1121
|
parsedSections++;
|
|
955
1122
|
options?.onProgress?.(parsedSections, totalTarget);
|
|
956
1123
|
} catch (secErr) {
|
|
@@ -958,12 +1125,22 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
958
1125
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
959
1126
|
}
|
|
960
1127
|
}
|
|
1128
|
+
applyPageText(blocks, shared);
|
|
961
1129
|
const images = await extractImagesFromZip(zip, blocks, decompressed, warnings);
|
|
962
1130
|
detectHwpxHeadings(blocks, styleMap);
|
|
963
1131
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
964
1132
|
const markdown = blocksToMarkdown(blocks);
|
|
965
1133
|
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
966
1134
|
}
|
|
1135
|
+
function applyPageText(blocks, shared) {
|
|
1136
|
+
const { headers, footers } = shared.pageText;
|
|
1137
|
+
if (headers.length > 0) {
|
|
1138
|
+
blocks.unshift(...headers.map((t) => ({ type: "paragraph", text: t, pageNumber: 1 })));
|
|
1139
|
+
}
|
|
1140
|
+
if (footers.length > 0) {
|
|
1141
|
+
blocks.push(...footers.map((t) => ({ type: "paragraph", text: t })));
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
967
1144
|
function imageExtToMime(ext) {
|
|
968
1145
|
switch (ext.toLowerCase()) {
|
|
969
1146
|
case "jpg":
|
|
@@ -999,10 +1176,26 @@ function mimeToExt(mime) {
|
|
|
999
1176
|
if (mime.includes("svg")) return "svg";
|
|
1000
1177
|
return "bin";
|
|
1001
1178
|
}
|
|
1179
|
+
function collectImageBlocks(blocks, out, ownerCell, depth = 0) {
|
|
1180
|
+
if (depth > MAX_XML_DEPTH) return;
|
|
1181
|
+
for (const block of blocks) {
|
|
1182
|
+
if (block.type === "image") {
|
|
1183
|
+
out.push({ block, ownerCell });
|
|
1184
|
+
} else if (block.type === "table" && block.table) {
|
|
1185
|
+
for (const row of block.table.cells) {
|
|
1186
|
+
for (const cell of row) {
|
|
1187
|
+
if (cell.blocks?.length) collectImageBlocks(cell.blocks, out, cell, depth + 1);
|
|
1188
|
+
}
|
|
1189
|
+
}
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1002
1193
|
async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
1003
1194
|
const images = [];
|
|
1004
1195
|
let imageIndex = 0;
|
|
1005
|
-
|
|
1196
|
+
const imageBlocks = [];
|
|
1197
|
+
collectImageBlocks(blocks, imageBlocks);
|
|
1198
|
+
for (const { block, ownerCell } of imageBlocks) {
|
|
1006
1199
|
if (block.type !== "image" || !block.text) continue;
|
|
1007
1200
|
const ref = block.text;
|
|
1008
1201
|
const candidates = [
|
|
@@ -1040,6 +1233,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
1040
1233
|
images.push({ filename, data, mimeType });
|
|
1041
1234
|
block.text = filename;
|
|
1042
1235
|
block.imageData = { data, mimeType, filename: ref };
|
|
1236
|
+
if (ownerCell) ownerCell.text = ownerCell.text.replace(``, ``);
|
|
1043
1237
|
found = true;
|
|
1044
1238
|
break;
|
|
1045
1239
|
} catch (err) {
|
|
@@ -1050,6 +1244,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
1050
1244
|
warnings?.push({ page: block.pageNumber, message: `\uC774\uBBF8\uC9C0 \uD30C\uC77C \uC5C6\uC74C: ${ref}`, code: "SKIPPED_IMAGE" });
|
|
1051
1245
|
block.type = "paragraph";
|
|
1052
1246
|
block.text = `[\uC774\uBBF8\uC9C0: ${ref}]`;
|
|
1247
|
+
if (ownerCell) ownerCell.text = ownerCell.text.replace(``, `[\uC774\uBBF8\uC9C0: ${ref}]`);
|
|
1053
1248
|
}
|
|
1054
1249
|
}
|
|
1055
1250
|
return images;
|
|
@@ -1106,7 +1301,7 @@ function extractFromBrokenZip(buffer) {
|
|
|
1106
1301
|
let totalDecompressed = 0;
|
|
1107
1302
|
let entryCount = 0;
|
|
1108
1303
|
let sectionNum = 0;
|
|
1109
|
-
const
|
|
1304
|
+
const shared = createSectionShared();
|
|
1110
1305
|
while (pos < data.length - 30) {
|
|
1111
1306
|
if (data[pos] !== 80 || data[pos + 1] !== 75 || data[pos + 2] !== 3 || data[pos + 3] !== 4) {
|
|
1112
1307
|
pos++;
|
|
@@ -1153,12 +1348,13 @@ function extractFromBrokenZip(buffer) {
|
|
|
1153
1348
|
totalDecompressed += content.length * 2;
|
|
1154
1349
|
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new KordocError("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
|
|
1155
1350
|
sectionNum++;
|
|
1156
|
-
blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum,
|
|
1351
|
+
blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, shared));
|
|
1157
1352
|
} catch {
|
|
1158
1353
|
continue;
|
|
1159
1354
|
}
|
|
1160
1355
|
}
|
|
1161
1356
|
if (blocks.length === 0) throw new KordocError("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
1357
|
+
applyPageText(blocks, shared);
|
|
1162
1358
|
const markdown = blocksToMarkdown(blocks);
|
|
1163
1359
|
return { markdown, blocks, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
1164
1360
|
}
|
|
@@ -1203,6 +1399,7 @@ function parseSectionPathsFromManifest(xml) {
|
|
|
1203
1399
|
return Array.from(idToHref.entries()).filter(([id]) => isSectionId(id)).sort((a, b) => a[0].localeCompare(b[0])).map(([, href]) => href);
|
|
1204
1400
|
}
|
|
1205
1401
|
function detectHwpxHeadings(blocks, styleMap) {
|
|
1402
|
+
if (blocks.some((b) => b.type === "heading")) return;
|
|
1206
1403
|
let baseFontSize = 0;
|
|
1207
1404
|
const sizeFreq = /* @__PURE__ */ new Map();
|
|
1208
1405
|
for (const b of blocks) {
|
|
@@ -1238,39 +1435,73 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
1238
1435
|
}
|
|
1239
1436
|
}
|
|
1240
1437
|
}
|
|
1241
|
-
function
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
const
|
|
1245
|
-
const
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1438
|
+
function buildTableWithCellMeta(state) {
|
|
1439
|
+
const table = buildTable(state.rows);
|
|
1440
|
+
if (state.caption) table.caption = state.caption;
|
|
1441
|
+
const claimed = /* @__PURE__ */ new Set();
|
|
1442
|
+
for (const row of state.rows) {
|
|
1443
|
+
for (const src of row) {
|
|
1444
|
+
const needsBlocks = src.hasStructure && src.blocks && src.blocks.length > 0;
|
|
1445
|
+
if (!needsBlocks && !src.isHeader) continue;
|
|
1446
|
+
let target;
|
|
1447
|
+
const trimmed = src.text.trim();
|
|
1448
|
+
if (src.rowAddr !== void 0 && src.colAddr !== void 0) {
|
|
1449
|
+
const cand = table.cells[src.rowAddr]?.[src.colAddr];
|
|
1450
|
+
if (cand && cand.text === trimmed && !claimed.has(cand)) target = cand;
|
|
1451
|
+
}
|
|
1452
|
+
if (!target) {
|
|
1453
|
+
outer: for (const irRow of table.cells) {
|
|
1454
|
+
for (const cand of irRow) {
|
|
1455
|
+
if (!claimed.has(cand) && cand.text === trimmed && cand.colSpan === src.colSpan && cand.rowSpan === src.rowSpan) {
|
|
1456
|
+
target = cand;
|
|
1457
|
+
break outer;
|
|
1458
|
+
}
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
}
|
|
1462
|
+
if (!target) continue;
|
|
1463
|
+
claimed.add(target);
|
|
1464
|
+
if (needsBlocks) target.blocks = src.blocks;
|
|
1465
|
+
if (src.isHeader) target.isHeader = true;
|
|
1258
1466
|
}
|
|
1467
|
+
}
|
|
1468
|
+
return table;
|
|
1469
|
+
}
|
|
1470
|
+
function completeTable(newTable, tableStack, blocks, ctx) {
|
|
1471
|
+
const parentTable = tableStack.length > 0 ? tableStack.pop() : null;
|
|
1472
|
+
if (newTable.rows.length === 0) {
|
|
1473
|
+
if (newTable.caption) blocks.push({ type: "paragraph", text: newTable.caption, pageNumber: ctx.sectionNum });
|
|
1474
|
+
return parentTable;
|
|
1475
|
+
}
|
|
1476
|
+
const ir = buildTableWithCellMeta(newTable);
|
|
1477
|
+
const block = { type: "table", table: ir, pageNumber: ctx.sectionNum };
|
|
1478
|
+
if (parentTable?.cell) {
|
|
1479
|
+
const cell = parentTable.cell;
|
|
1480
|
+
(cell.blocks ??= []).push(block);
|
|
1481
|
+
cell.hasStructure = true;
|
|
1482
|
+
let flat = convertTableToText(newTable.rows);
|
|
1483
|
+
if (newTable.caption) flat = newTable.caption + (flat ? "\n" + flat : "");
|
|
1484
|
+
if (flat) cell.text += (cell.text ? "\n" : "") + flat;
|
|
1259
1485
|
} else {
|
|
1260
|
-
|
|
1261
|
-
if (parentTable.cell) {
|
|
1262
|
-
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
1263
|
-
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker + "\n" + nestedText;
|
|
1264
|
-
}
|
|
1486
|
+
blocks.push(block);
|
|
1265
1487
|
}
|
|
1266
1488
|
return parentTable;
|
|
1267
1489
|
}
|
|
1268
|
-
function parseSectionXml(xml, styleMap, warnings, sectionNum,
|
|
1490
|
+
function parseSectionXml(xml, styleMap, warnings, sectionNum, shared) {
|
|
1269
1491
|
const parser = createXmlParser(warnings);
|
|
1270
1492
|
const doc = parser.parseFromString(stripDtd(xml), "text/xml");
|
|
1271
1493
|
if (!doc.documentElement) return [];
|
|
1494
|
+
const ctx = { styleMap, warnings, sectionNum, shared: shared ?? createSectionShared() };
|
|
1495
|
+
ctx.shared.track.deleteDepth = 0;
|
|
1496
|
+
for (const tagName of ["hp:secPr", "secPr"]) {
|
|
1497
|
+
const els = doc.getElementsByTagName(tagName);
|
|
1498
|
+
if (els.length > 0) {
|
|
1499
|
+
const v = els[0].getAttribute("outlineShapeIDRef");
|
|
1500
|
+
if (v) ctx.outlineNumId = v;
|
|
1501
|
+
break;
|
|
1502
|
+
}
|
|
1503
|
+
}
|
|
1272
1504
|
const blocks = [];
|
|
1273
|
-
const ctx = { styleMap, warnings, sectionNum, counter };
|
|
1274
1505
|
walkSection(doc.documentElement, blocks, null, [], ctx);
|
|
1275
1506
|
return blocks;
|
|
1276
1507
|
}
|
|
@@ -1306,18 +1537,16 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
1306
1537
|
if (tableCtx) tableStack.push(tableCtx);
|
|
1307
1538
|
const newTable = { rows: [], currentRow: [], cell: null };
|
|
1308
1539
|
walkSection(el, blocks, newTable, tableStack, ctx, depth + 1);
|
|
1309
|
-
|
|
1310
|
-
if (tableStack.length > 0) {
|
|
1311
|
-
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
1312
|
-
} else {
|
|
1313
|
-
blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: ctx.sectionNum });
|
|
1314
|
-
tableCtx = null;
|
|
1315
|
-
}
|
|
1316
|
-
} else {
|
|
1317
|
-
tableCtx = tableStack.length > 0 ? tableStack.pop() : null;
|
|
1318
|
-
}
|
|
1540
|
+
tableCtx = completeTable(newTable, tableStack, blocks, ctx);
|
|
1319
1541
|
break;
|
|
1320
1542
|
}
|
|
1543
|
+
// 표/도표 캡션 — IRTable.caption으로 보존 (v3.0, 기존 무음 드롭 수정)
|
|
1544
|
+
case "caption":
|
|
1545
|
+
if (tableCtx) {
|
|
1546
|
+
const capText = collectSubListText(el, ctx);
|
|
1547
|
+
if (capText) tableCtx.caption = (tableCtx.caption ? tableCtx.caption + "\n" : "") + capText;
|
|
1548
|
+
}
|
|
1549
|
+
break;
|
|
1321
1550
|
case "tr":
|
|
1322
1551
|
if (tableCtx) {
|
|
1323
1552
|
tableCtx.currentRow = [];
|
|
@@ -1329,6 +1558,7 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
1329
1558
|
case "tc":
|
|
1330
1559
|
if (tableCtx) {
|
|
1331
1560
|
tableCtx.cell = { text: "", colSpan: 1, rowSpan: 1 };
|
|
1561
|
+
if (el.getAttribute("header") === "1" || el.getAttribute("header") === "true") tableCtx.cell.isHeader = true;
|
|
1332
1562
|
walkSection(el, blocks, tableCtx, tableStack, ctx, depth + 1);
|
|
1333
1563
|
if (tableCtx.cell) {
|
|
1334
1564
|
tableCtx.currentRow.push(tableCtx.cell);
|
|
@@ -1355,30 +1585,52 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
1355
1585
|
}
|
|
1356
1586
|
break;
|
|
1357
1587
|
case "p": {
|
|
1358
|
-
const { text, href, footnote, style } = extractParagraphInfo(el, ctx.styleMap);
|
|
1588
|
+
const { text: rawText, href, footnote, style } = extractParagraphInfo(el, ctx.styleMap, ctx);
|
|
1589
|
+
let text = rawText;
|
|
1590
|
+
let headingLevel;
|
|
1591
|
+
if (text) {
|
|
1592
|
+
const ph = resolveParaHeading(el, ctx);
|
|
1593
|
+
if (ph?.prefix) text = ph.prefix + " " + text;
|
|
1594
|
+
headingLevel = ph?.headingLevel;
|
|
1595
|
+
}
|
|
1359
1596
|
if (text) {
|
|
1360
1597
|
if (tableCtx?.cell) {
|
|
1361
|
-
|
|
1598
|
+
const cell = tableCtx.cell;
|
|
1599
|
+
if (footnote) text += ` (\uC8FC: ${footnote})`;
|
|
1600
|
+
cell.text += (cell.text ? "\n" : "") + text;
|
|
1601
|
+
(cell.blocks ??= []).push({ type: "paragraph", text, pageNumber: ctx.sectionNum });
|
|
1362
1602
|
} else if (!tableCtx) {
|
|
1363
|
-
const block = { type: "paragraph", text, pageNumber: ctx.sectionNum };
|
|
1603
|
+
const block = { type: headingLevel ? "heading" : "paragraph", text, pageNumber: ctx.sectionNum };
|
|
1604
|
+
if (headingLevel) block.level = headingLevel;
|
|
1364
1605
|
if (style) block.style = style;
|
|
1365
1606
|
if (href) block.href = href;
|
|
1366
1607
|
if (footnote) block.footnoteText = footnote;
|
|
1367
1608
|
blocks.push(block);
|
|
1609
|
+
} else {
|
|
1610
|
+
blocks.push({ type: "paragraph", text, pageNumber: ctx.sectionNum });
|
|
1368
1611
|
}
|
|
1369
1612
|
}
|
|
1370
1613
|
tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, ctx, depth + 1);
|
|
1371
1614
|
break;
|
|
1372
1615
|
}
|
|
1373
|
-
//
|
|
1616
|
+
// 이미지/그림/글상자 — 이미지·텍스트·캡션 병행 추출
|
|
1374
1617
|
case "pic":
|
|
1375
1618
|
case "shape":
|
|
1376
1619
|
case "drawingObject": {
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1620
|
+
if (tableCtx?.cell) {
|
|
1621
|
+
const sink = [];
|
|
1622
|
+
handleShape(el, sink, ctx);
|
|
1623
|
+
mergeBlocksIntoCell(tableCtx.cell, sink);
|
|
1624
|
+
} else {
|
|
1625
|
+
handleShape(el, blocks, ctx);
|
|
1626
|
+
}
|
|
1627
|
+
break;
|
|
1628
|
+
}
|
|
1629
|
+
// 메모 — 본문 혼입 차단 (v3.0)
|
|
1630
|
+
case "memogroup":
|
|
1631
|
+
case "memo": {
|
|
1632
|
+
if (ctx.warnings && extractTextFromNode(el)) {
|
|
1633
|
+
ctx.warnings.push({ page: ctx.sectionNum, message: "\uBA54\uBAA8 \uD14D\uC2A4\uD2B8 \uBCF8\uBB38 \uC81C\uC678: memogroup", code: "HIDDEN_TEXT_FILTERED" });
|
|
1382
1634
|
}
|
|
1383
1635
|
break;
|
|
1384
1636
|
}
|
|
@@ -1388,6 +1640,73 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
1388
1640
|
}
|
|
1389
1641
|
}
|
|
1390
1642
|
}
|
|
1643
|
+
function handleShape(el, sink, ctx) {
|
|
1644
|
+
const imgRef = extractImageRef(el);
|
|
1645
|
+
const drawTextChild = findDescendant(el, "drawText");
|
|
1646
|
+
if (imgRef) {
|
|
1647
|
+
const block = { type: "image", text: imgRef, pageNumber: ctx.sectionNum };
|
|
1648
|
+
const alt = userShapeComment(el);
|
|
1649
|
+
if (alt) block.footnoteText = alt;
|
|
1650
|
+
sink.push(block);
|
|
1651
|
+
}
|
|
1652
|
+
if (drawTextChild) {
|
|
1653
|
+
extractDrawTextBlocks(drawTextChild, sink, ctx);
|
|
1654
|
+
}
|
|
1655
|
+
const capEl = findChildByLocalName(el, "caption");
|
|
1656
|
+
if (capEl) {
|
|
1657
|
+
const capText = collectSubListText(capEl, ctx);
|
|
1658
|
+
if (capText) sink.push({ type: "paragraph", text: capText, pageNumber: ctx.sectionNum });
|
|
1659
|
+
}
|
|
1660
|
+
if (!imgRef && !drawTextChild && ctx.warnings && ctx.sectionNum) {
|
|
1661
|
+
const localTag = (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
|
|
1662
|
+
ctx.warnings.push({ page: ctx.sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
function userShapeComment(el) {
|
|
1666
|
+
const commentEl = findChildByLocalName(el, "shapeComment");
|
|
1667
|
+
if (!commentEl) return void 0;
|
|
1668
|
+
const text = extractTextFromNode(commentEl);
|
|
1669
|
+
if (!text) return void 0;
|
|
1670
|
+
if (/^그림입니다/.test(text)) return void 0;
|
|
1671
|
+
if (/^(?:모서리가 둥근 |둥근 )?[^\n]{1,20}입니다\.?$/.test(text)) return void 0;
|
|
1672
|
+
return text;
|
|
1673
|
+
}
|
|
1674
|
+
function mergeBlocksIntoCell(cell, sink) {
|
|
1675
|
+
for (const b of sink) {
|
|
1676
|
+
if ((b.type === "paragraph" || b.type === "heading") && b.text) {
|
|
1677
|
+
cell.text += (cell.text ? "\n" : "") + b.text;
|
|
1678
|
+
(cell.blocks ??= []).push(b);
|
|
1679
|
+
} else if (b.type === "image" || b.type === "table") {
|
|
1680
|
+
if (b.type === "image" && b.text) {
|
|
1681
|
+
cell.text += (cell.text ? "\n" : "") + ``;
|
|
1682
|
+
}
|
|
1683
|
+
;
|
|
1684
|
+
(cell.blocks ??= []).push(b);
|
|
1685
|
+
cell.hasStructure = true;
|
|
1686
|
+
}
|
|
1687
|
+
}
|
|
1688
|
+
}
|
|
1689
|
+
function collectSubListText(el, ctx, depth = 0) {
|
|
1690
|
+
if (depth > 10) return "";
|
|
1691
|
+
const parts = [];
|
|
1692
|
+
const children = el.childNodes;
|
|
1693
|
+
if (!children) return "";
|
|
1694
|
+
for (let i = 0; i < children.length; i++) {
|
|
1695
|
+
const ch = children[i];
|
|
1696
|
+
if (ch.nodeType !== 1) continue;
|
|
1697
|
+
const tag = (ch.tagName || ch.localName || "").replace(/^[^:]+:/, "");
|
|
1698
|
+
if (tag === "p" || tag === "para") {
|
|
1699
|
+
const t = extractParagraphInfo(ch, ctx.styleMap, ctx).text;
|
|
1700
|
+
if (t) parts.push(t);
|
|
1701
|
+
} else if (tag === "tbl") {
|
|
1702
|
+
continue;
|
|
1703
|
+
} else {
|
|
1704
|
+
const t = collectSubListText(ch, ctx, depth + 1);
|
|
1705
|
+
if (t) parts.push(t);
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
return parts.join("\n").trim();
|
|
1709
|
+
}
|
|
1391
1710
|
function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
1392
1711
|
if (depth > MAX_XML_DEPTH) return tableCtx;
|
|
1393
1712
|
const children = node.childNodes;
|
|
@@ -1405,34 +1724,25 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth =
|
|
|
1405
1724
|
if (tableCtx) tableStack.push(tableCtx);
|
|
1406
1725
|
const newTable = { rows: [], currentRow: [], cell: null };
|
|
1407
1726
|
walkSection(el, blocks, newTable, tableStack, ctx, d + 1);
|
|
1408
|
-
|
|
1409
|
-
if (tableStack.length > 0) {
|
|
1410
|
-
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
1411
|
-
} else {
|
|
1412
|
-
blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: ctx.sectionNum });
|
|
1413
|
-
tableCtx = null;
|
|
1414
|
-
}
|
|
1415
|
-
} else {
|
|
1416
|
-
tableCtx = tableStack.length > 0 ? tableStack.pop() : null;
|
|
1417
|
-
}
|
|
1727
|
+
tableCtx = completeTable(newTable, tableStack, blocks, ctx);
|
|
1418
1728
|
} else if (localTag === "pic" || localTag === "shape" || localTag === "drawingObject") {
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1729
|
+
if (tableCtx?.cell) {
|
|
1730
|
+
const sink = [];
|
|
1731
|
+
handleShape(el, sink, ctx);
|
|
1732
|
+
mergeBlocksIntoCell(tableCtx.cell, sink);
|
|
1422
1733
|
} else {
|
|
1423
|
-
|
|
1424
|
-
if (imgRef) {
|
|
1425
|
-
blocks.push({ type: "image", text: imgRef, pageNumber: ctx.sectionNum });
|
|
1426
|
-
} else if (ctx.warnings && ctx.sectionNum) {
|
|
1427
|
-
ctx.warnings.push({ page: ctx.sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
|
|
1428
|
-
}
|
|
1734
|
+
handleShape(el, blocks, ctx);
|
|
1429
1735
|
}
|
|
1430
1736
|
} else if (localTag === "drawText") {
|
|
1431
|
-
|
|
1737
|
+
if (tableCtx?.cell) {
|
|
1738
|
+
const sink = [];
|
|
1739
|
+
extractDrawTextBlocks(el, sink, ctx);
|
|
1740
|
+
mergeBlocksIntoCell(tableCtx.cell, sink);
|
|
1741
|
+
} else {
|
|
1742
|
+
extractDrawTextBlocks(el, blocks, ctx);
|
|
1743
|
+
}
|
|
1432
1744
|
} else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
|
|
1433
1745
|
walkChildren(el, d + 1);
|
|
1434
|
-
} else if (localTag === "run") {
|
|
1435
|
-
tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, ctx, depth + 1);
|
|
1436
1746
|
}
|
|
1437
1747
|
}
|
|
1438
1748
|
};
|
|
@@ -1453,7 +1763,7 @@ function findDescendant(node, targetTag, depth = 0) {
|
|
|
1453
1763
|
}
|
|
1454
1764
|
return null;
|
|
1455
1765
|
}
|
|
1456
|
-
function extractDrawTextBlocks(drawTextNode, blocks,
|
|
1766
|
+
function extractDrawTextBlocks(drawTextNode, blocks, ctx) {
|
|
1457
1767
|
const children = drawTextNode.childNodes;
|
|
1458
1768
|
if (!children) return;
|
|
1459
1769
|
for (let i = 0; i < children.length; i++) {
|
|
@@ -1462,29 +1772,136 @@ function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
|
|
|
1462
1772
|
const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
|
|
1463
1773
|
if (tag === "subList" || tag === "p" || tag === "para") {
|
|
1464
1774
|
if (tag === "subList") {
|
|
1465
|
-
extractDrawTextBlocks(child, blocks,
|
|
1775
|
+
extractDrawTextBlocks(child, blocks, ctx);
|
|
1466
1776
|
} else {
|
|
1467
|
-
const info = extractParagraphInfo(child, styleMap);
|
|
1468
|
-
|
|
1777
|
+
const info = extractParagraphInfo(child, ctx.styleMap, ctx);
|
|
1778
|
+
let text = info.text.trim();
|
|
1469
1779
|
if (text) {
|
|
1470
|
-
|
|
1780
|
+
const ph = resolveParaHeading(child, ctx);
|
|
1781
|
+
if (ph?.prefix) text = ph.prefix + " " + text;
|
|
1782
|
+
const block = { type: "paragraph", text, style: info.style ?? void 0, pageNumber: ctx.sectionNum };
|
|
1783
|
+
if (info.href) block.href = info.href;
|
|
1784
|
+
if (info.footnote) block.footnoteText = info.footnote;
|
|
1785
|
+
blocks.push(block);
|
|
1471
1786
|
}
|
|
1787
|
+
walkParagraphChildren(child, blocks, null, [], ctx);
|
|
1472
1788
|
}
|
|
1473
1789
|
}
|
|
1474
1790
|
}
|
|
1475
1791
|
}
|
|
1476
|
-
function
|
|
1792
|
+
function extractHyperlinkHref(fieldBegin) {
|
|
1793
|
+
if ((fieldBegin.getAttribute("type") || "").toUpperCase() !== "HYPERLINK") return void 0;
|
|
1794
|
+
const params = findChildByLocalName(fieldBegin, "parameters");
|
|
1795
|
+
if (!params) return void 0;
|
|
1796
|
+
const children = params.childNodes;
|
|
1797
|
+
if (!children) return void 0;
|
|
1798
|
+
for (let i = 0; i < children.length; i++) {
|
|
1799
|
+
const ch = children[i];
|
|
1800
|
+
if (ch.nodeType !== 1) continue;
|
|
1801
|
+
const tag = (ch.tagName || ch.localName || "").replace(/^[^:]+:/, "");
|
|
1802
|
+
if (tag !== "stringParam" || ch.getAttribute("name") !== "Path") continue;
|
|
1803
|
+
let url = (ch.textContent || "").trim();
|
|
1804
|
+
if (!url) continue;
|
|
1805
|
+
url = url.replace(/^https?:\/\/(?=https?:\/\/)/i, "");
|
|
1806
|
+
const safe = sanitizeHref(url);
|
|
1807
|
+
if (safe) return safe;
|
|
1808
|
+
}
|
|
1809
|
+
return void 0;
|
|
1810
|
+
}
|
|
1811
|
+
function isInDeletedRange(ctx) {
|
|
1812
|
+
return (ctx?.shared.track.deleteDepth ?? 0) > 0;
|
|
1813
|
+
}
|
|
1814
|
+
function extractParagraphInfo(para, styleMap, ctx) {
|
|
1477
1815
|
let text = "";
|
|
1478
1816
|
let href;
|
|
1479
1817
|
let footnote;
|
|
1480
1818
|
let charPrId;
|
|
1819
|
+
const handleCtrl = (ctrlEl) => {
|
|
1820
|
+
const kids2 = ctrlEl.childNodes;
|
|
1821
|
+
if (!kids2) return;
|
|
1822
|
+
for (let j = 0; j < kids2.length; j++) {
|
|
1823
|
+
const k = kids2[j];
|
|
1824
|
+
if (k.nodeType !== 1) continue;
|
|
1825
|
+
const ktag = (k.tagName || k.localName || "").replace(/^[^:]+:/, "");
|
|
1826
|
+
switch (ktag) {
|
|
1827
|
+
// 머리말/꼬리말 — 문서당 1회 수집, 본문 앞/뒤 배치
|
|
1828
|
+
case "header":
|
|
1829
|
+
case "footer": {
|
|
1830
|
+
if (!ctx) break;
|
|
1831
|
+
const t = collectSubListText(k, ctx);
|
|
1832
|
+
if (t) {
|
|
1833
|
+
const bucket = ktag === "header" ? ctx.shared.pageText.headers : ctx.shared.pageText.footers;
|
|
1834
|
+
if (!bucket.includes(t)) bucket.push(t);
|
|
1835
|
+
}
|
|
1836
|
+
break;
|
|
1837
|
+
}
|
|
1838
|
+
// 각주/미주 — 해당 문단의 footnote로 인라인 보존
|
|
1839
|
+
case "footNote":
|
|
1840
|
+
case "endNote": {
|
|
1841
|
+
const noteText = extractTextFromNode(k);
|
|
1842
|
+
if (noteText) footnote = (footnote ? footnote + "; " : "") + noteText;
|
|
1843
|
+
break;
|
|
1844
|
+
}
|
|
1845
|
+
// 하이퍼링크 — fieldBegin type=HYPERLINK의 Path 파라미터
|
|
1846
|
+
case "fieldBegin": {
|
|
1847
|
+
const url = extractHyperlinkHref(k);
|
|
1848
|
+
if (url && !href) href = url;
|
|
1849
|
+
break;
|
|
1850
|
+
}
|
|
1851
|
+
case "fieldEnd":
|
|
1852
|
+
break;
|
|
1853
|
+
// 변경추적 — 삭제 구간(deleteBegin~End)의 텍스트는 출력 제외 (최종본 상태 재현)
|
|
1854
|
+
case "deleteBegin":
|
|
1855
|
+
if (ctx) ctx.shared.track.deleteDepth++;
|
|
1856
|
+
break;
|
|
1857
|
+
case "deleteEnd":
|
|
1858
|
+
if (ctx && ctx.shared.track.deleteDepth > 0) ctx.shared.track.deleteDepth--;
|
|
1859
|
+
break;
|
|
1860
|
+
case "insertBegin":
|
|
1861
|
+
case "insertEnd":
|
|
1862
|
+
break;
|
|
1863
|
+
// 삽입분은 최종본에 포함
|
|
1864
|
+
// 숨은 설명 — 본문 혼입 차단
|
|
1865
|
+
case "hiddenComment": {
|
|
1866
|
+
if (ctx?.warnings && extractTextFromNode(k)) {
|
|
1867
|
+
ctx.warnings.push({ page: ctx.sectionNum, message: "\uC228\uC740 \uC124\uBA85 \uD14D\uC2A4\uD2B8 \uC81C\uC678: hiddenComment", code: "HIDDEN_TEXT_FILTERED" });
|
|
1868
|
+
}
|
|
1869
|
+
break;
|
|
1870
|
+
}
|
|
1871
|
+
// 콘텐츠 없는 제어 요소 — 스킵
|
|
1872
|
+
case "bookmark":
|
|
1873
|
+
case "pageNum":
|
|
1874
|
+
case "pageNumCtrl":
|
|
1875
|
+
case "pageHiding":
|
|
1876
|
+
case "newNum":
|
|
1877
|
+
case "autoNum":
|
|
1878
|
+
case "indexmark":
|
|
1879
|
+
case "colPr":
|
|
1880
|
+
break;
|
|
1881
|
+
// 미지원 요소 — 텍스트를 가졌으면 무음 손실 대신 경고
|
|
1882
|
+
default: {
|
|
1883
|
+
if (ctx?.warnings && extractTextFromNode(k)) {
|
|
1884
|
+
ctx.warnings.push({ page: ctx.sectionNum, message: `\uBBF8\uC9C0\uC6D0 \uC81C\uC5B4 \uC694\uC18C\uC758 \uD14D\uC2A4\uD2B8 \uC190\uC2E4: ${ktag}`, code: "UNSUPPORTED_ELEMENT" });
|
|
1885
|
+
}
|
|
1886
|
+
}
|
|
1887
|
+
}
|
|
1888
|
+
}
|
|
1889
|
+
};
|
|
1481
1890
|
const walk = (node) => {
|
|
1482
1891
|
const children = node.childNodes;
|
|
1483
1892
|
if (!children) return;
|
|
1484
1893
|
for (let i = 0; i < children.length; i++) {
|
|
1485
1894
|
const child = children[i];
|
|
1486
1895
|
if (child.nodeType === 3) {
|
|
1487
|
-
|
|
1896
|
+
const t = child.textContent || "";
|
|
1897
|
+
if (isInDeletedRange(ctx)) {
|
|
1898
|
+
if (t && ctx && !ctx.shared.track.warned) {
|
|
1899
|
+
ctx.shared.track.warned = true;
|
|
1900
|
+
ctx.warnings?.push({ page: ctx.sectionNum, message: "\uBCC0\uACBD\uCD94\uC801 \uC0AD\uC81C \uD14D\uC2A4\uD2B8 \uCD9C\uB825 \uC81C\uC678", code: "HIDDEN_TEXT_FILTERED" });
|
|
1901
|
+
}
|
|
1902
|
+
} else {
|
|
1903
|
+
text += t;
|
|
1904
|
+
}
|
|
1488
1905
|
continue;
|
|
1489
1906
|
}
|
|
1490
1907
|
if (child.nodeType !== 1) continue;
|
|
@@ -1506,6 +1923,10 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1506
1923
|
case "br":
|
|
1507
1924
|
if ((child.getAttribute("type") || "line") === "line") text += "\n";
|
|
1508
1925
|
break;
|
|
1926
|
+
case "lineBreak":
|
|
1927
|
+
text += "\n";
|
|
1928
|
+
break;
|
|
1929
|
+
// 강제 줄바꿈 — ref 추출기·소스맵 스캐너와 동일 모델
|
|
1509
1930
|
case "fwSpace":
|
|
1510
1931
|
case "hwSpace":
|
|
1511
1932
|
text += " ";
|
|
@@ -1532,9 +1953,26 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1532
1953
|
if (noteText) footnote = (footnote ? footnote + "; " : "") + noteText;
|
|
1533
1954
|
break;
|
|
1534
1955
|
}
|
|
1535
|
-
// 제어 요소 —
|
|
1956
|
+
// 제어 요소 — 선별 순회 (머리말/꼬리말/각주/하이퍼링크/변경추적, v3.0)
|
|
1536
1957
|
case "ctrl":
|
|
1537
|
-
|
|
1958
|
+
handleCtrl(child);
|
|
1959
|
+
break;
|
|
1960
|
+
// run 직계 fieldBegin (비표준 경로) — 하이퍼링크 URL만 추출
|
|
1961
|
+
case "fieldBegin": {
|
|
1962
|
+
const url = extractHyperlinkHref(child);
|
|
1963
|
+
if (url && !href) href = url;
|
|
1964
|
+
break;
|
|
1965
|
+
}
|
|
1966
|
+
// run 직계 변경추적 마커 (비표준 경로)
|
|
1967
|
+
case "deleteBegin":
|
|
1968
|
+
if (ctx) ctx.shared.track.deleteDepth++;
|
|
1969
|
+
break;
|
|
1970
|
+
case "deleteEnd":
|
|
1971
|
+
if (ctx && ctx.shared.track.deleteDepth > 0) ctx.shared.track.deleteDepth--;
|
|
1972
|
+
break;
|
|
1973
|
+
case "insertBegin":
|
|
1974
|
+
case "insertEnd":
|
|
1975
|
+
break;
|
|
1538
1976
|
case "fieldEnd":
|
|
1539
1977
|
case "parameters":
|
|
1540
1978
|
case "stringParam":
|
|
@@ -1548,7 +1986,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1548
1986
|
case "linesegarray":
|
|
1549
1987
|
case "lineseg":
|
|
1550
1988
|
// 레이아웃 정보
|
|
1551
|
-
// 도형/이미지 요소 — 대체텍스트("사각형입니다." 등) 누출 방지
|
|
1989
|
+
// 도형/이미지 요소 — 대체텍스트("사각형입니다." 등) 누출 방지 (walkParagraphChildren에서 처리)
|
|
1552
1990
|
case "pic":
|
|
1553
1991
|
case "shape":
|
|
1554
1992
|
case "drawingObject":
|
|
@@ -1635,8 +2073,14 @@ var TAG_CHAR_SHAPE = 68;
|
|
|
1635
2073
|
var TAG_CTRL_HEADER = 71;
|
|
1636
2074
|
var TAG_LIST_HEADER = 72;
|
|
1637
2075
|
var TAG_TABLE = 77;
|
|
2076
|
+
var TAG_SHAPE_COMPONENT = 76;
|
|
2077
|
+
var TAG_SHAPE_COMPONENT_PICTURE = 85;
|
|
2078
|
+
var TAG_SHAPE_COMPONENT_CONTAINER = 86;
|
|
1638
2079
|
var TAG_EQEDIT = 88;
|
|
2080
|
+
var TAG_BIN_DATA = 18;
|
|
1639
2081
|
var TAG_DOC_CHAR_SHAPE = 21;
|
|
2082
|
+
var TAG_NUMBERING = 23;
|
|
2083
|
+
var TAG_BULLET = 24;
|
|
1640
2084
|
var TAG_DOC_PARA_SHAPE = 25;
|
|
1641
2085
|
var TAG_DOC_STYLE = 26;
|
|
1642
2086
|
var CHAR_LINE = 0;
|
|
@@ -1692,15 +2136,76 @@ function parseFileHeader(data) {
|
|
|
1692
2136
|
flags: data.readUInt32LE(36)
|
|
1693
2137
|
};
|
|
1694
2138
|
}
|
|
2139
|
+
function readHwpString(data, offset) {
|
|
2140
|
+
if (offset + 2 > data.length) return { value: "", next: data.length };
|
|
2141
|
+
const len = data.readUInt16LE(offset);
|
|
2142
|
+
const start = offset + 2;
|
|
2143
|
+
const end = start + len * 2;
|
|
2144
|
+
if (len === 0 || end > data.length) return { value: "", next: start };
|
|
2145
|
+
return { value: data.subarray(start, end).toString("utf16le"), next: end };
|
|
2146
|
+
}
|
|
1695
2147
|
function parseDocInfo(records) {
|
|
1696
2148
|
const charShapes = [];
|
|
1697
2149
|
const paraShapes = [];
|
|
1698
2150
|
const styles = [];
|
|
2151
|
+
const binData = [];
|
|
2152
|
+
const numberings = [];
|
|
2153
|
+
const bullets = [];
|
|
1699
2154
|
for (const rec of records) {
|
|
1700
2155
|
if (rec.tagId === TAG_DOC_PARA_SHAPE && rec.data.length >= 4) {
|
|
1701
|
-
const
|
|
1702
|
-
const
|
|
1703
|
-
|
|
2156
|
+
const attr1 = rec.data.readUInt32LE(0);
|
|
2157
|
+
const headType = attr1 >>> 23 & 3;
|
|
2158
|
+
const paraLevel = attr1 >>> 25 & 7;
|
|
2159
|
+
const numberingId = rec.data.length >= 32 ? rec.data.readUInt16LE(30) : 0;
|
|
2160
|
+
paraShapes.push({ headType, paraLevel, numberingId });
|
|
2161
|
+
}
|
|
2162
|
+
if (rec.tagId === TAG_BIN_DATA && rec.data.length >= 2) {
|
|
2163
|
+
const attr = rec.data.readUInt16LE(0);
|
|
2164
|
+
const typeBits = attr & 15;
|
|
2165
|
+
if (typeBits === 0) {
|
|
2166
|
+
binData.push({ kind: "link", storageId: 0, extension: "" });
|
|
2167
|
+
} else {
|
|
2168
|
+
const storageId = rec.data.length >= 4 ? rec.data.readUInt16LE(2) : 0;
|
|
2169
|
+
const { value: extension } = readHwpString(rec.data, 4);
|
|
2170
|
+
binData.push({ kind: typeBits === 2 ? "storage" : "embed", storageId, extension });
|
|
2171
|
+
}
|
|
2172
|
+
}
|
|
2173
|
+
if (rec.tagId === TAG_NUMBERING && rec.data.length >= 14) {
|
|
2174
|
+
const levelFormats = [];
|
|
2175
|
+
const numberFormats = [];
|
|
2176
|
+
const startNumbers = [1, 1, 1, 1, 1, 1, 1];
|
|
2177
|
+
let offset = 0;
|
|
2178
|
+
for (let level = 0; level < 7; level++) {
|
|
2179
|
+
if (offset + 12 > rec.data.length) {
|
|
2180
|
+
levelFormats.push("");
|
|
2181
|
+
numberFormats.push(0);
|
|
2182
|
+
continue;
|
|
2183
|
+
}
|
|
2184
|
+
const attr = rec.data.readUInt32LE(offset);
|
|
2185
|
+
numberFormats.push(attr >>> 5 & 15);
|
|
2186
|
+
offset += 12;
|
|
2187
|
+
const { value, next } = readHwpString(rec.data, offset);
|
|
2188
|
+
levelFormats.push(value);
|
|
2189
|
+
offset = next;
|
|
2190
|
+
}
|
|
2191
|
+
let baseStart = 1;
|
|
2192
|
+
if (offset + 2 <= rec.data.length) {
|
|
2193
|
+
baseStart = rec.data.readUInt16LE(offset) || 1;
|
|
2194
|
+
offset += 2;
|
|
2195
|
+
}
|
|
2196
|
+
for (let level = 0; level < 7; level++) {
|
|
2197
|
+
if (offset + 4 <= rec.data.length) {
|
|
2198
|
+
startNumbers[level] = rec.data.readUInt32LE(offset) || 1;
|
|
2199
|
+
offset += 4;
|
|
2200
|
+
} else {
|
|
2201
|
+
startNumbers[level] = baseStart;
|
|
2202
|
+
}
|
|
2203
|
+
}
|
|
2204
|
+
numberings.push({ levelFormats, numberFormats, startNumbers });
|
|
2205
|
+
}
|
|
2206
|
+
if (rec.tagId === TAG_BULLET && rec.data.length >= 14) {
|
|
2207
|
+
const code = rec.data.readUInt16LE(12);
|
|
2208
|
+
bullets.push({ char: code > 0 ? String.fromCharCode(code) : "\u2022" });
|
|
1704
2209
|
}
|
|
1705
2210
|
if (rec.tagId === TAG_DOC_CHAR_SHAPE && rec.data.length >= 18) {
|
|
1706
2211
|
if (rec.data.length >= 50) {
|
|
@@ -1731,7 +2236,7 @@ function parseDocInfo(records) {
|
|
|
1731
2236
|
}
|
|
1732
2237
|
const type = offset < rec.data.length ? rec.data.readUInt8(offset) : 0;
|
|
1733
2238
|
offset += 1;
|
|
1734
|
-
offset +=
|
|
2239
|
+
offset += 1;
|
|
1735
2240
|
offset += 2;
|
|
1736
2241
|
const paraShapeId = offset + 2 <= rec.data.length ? rec.data.readUInt16LE(offset) : 0;
|
|
1737
2242
|
offset += 2;
|
|
@@ -1741,11 +2246,25 @@ function parseDocInfo(records) {
|
|
|
1741
2246
|
}
|
|
1742
2247
|
}
|
|
1743
2248
|
}
|
|
1744
|
-
return { charShapes, paraShapes, styles };
|
|
2249
|
+
return { charShapes, paraShapes, styles, binData, numberings, bullets };
|
|
2250
|
+
}
|
|
2251
|
+
function createParaTextState() {
|
|
2252
|
+
return { text: "", ctrlIdx: 0, fieldStack: [], fieldRanges: [] };
|
|
1745
2253
|
}
|
|
1746
|
-
function
|
|
2254
|
+
function isExtendedOnlyCtrlChar(ch) {
|
|
2255
|
+
return ch >= 1 && ch <= 3 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= 18 || ch >= 21 && ch <= 23;
|
|
2256
|
+
}
|
|
2257
|
+
function appendParaText(state, data, resolveControl) {
|
|
1747
2258
|
let result = "";
|
|
1748
2259
|
let i = 0;
|
|
2260
|
+
const base = state.text.length;
|
|
2261
|
+
const resolveAt = (byteOffset, extended) => {
|
|
2262
|
+
const ctrlId = data.readUInt32LE(byteOffset);
|
|
2263
|
+
const idx = extended ? state.ctrlIdx : -1;
|
|
2264
|
+
const replacement = resolveControl?.(idx, ctrlId);
|
|
2265
|
+
if (replacement) result += replacement;
|
|
2266
|
+
if (extended) state.ctrlIdx++;
|
|
2267
|
+
};
|
|
1749
2268
|
while (i + 1 < data.length) {
|
|
1750
2269
|
const ch = data.readUInt16LE(i);
|
|
1751
2270
|
i += 2;
|
|
@@ -1756,9 +2275,7 @@ function extractTextWithControls(data, resolveControl) {
|
|
|
1756
2275
|
break;
|
|
1757
2276
|
case CHAR_SECTION_BREAK: {
|
|
1758
2277
|
if (i + 16 <= data.length && data.readUInt16LE(i) === 11) {
|
|
1759
|
-
|
|
1760
|
-
const replacement = resolveControl?.(ctrlId);
|
|
1761
|
-
if (replacement) result += replacement;
|
|
2278
|
+
resolveAt(i + 2, true);
|
|
1762
2279
|
i += 16;
|
|
1763
2280
|
break;
|
|
1764
2281
|
}
|
|
@@ -1790,12 +2307,18 @@ function extractTextWithControls(data, resolveControl) {
|
|
|
1790
2307
|
break;
|
|
1791
2308
|
default:
|
|
1792
2309
|
if (ch >= 1 && ch <= 31) {
|
|
1793
|
-
const isExtended = ch
|
|
2310
|
+
const isExtended = isExtendedOnlyCtrlChar(ch);
|
|
1794
2311
|
const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
|
|
1795
2312
|
if ((isExtended || isInline) && i + 14 <= data.length) {
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
if (
|
|
2313
|
+
if (ch === 3) {
|
|
2314
|
+
state.fieldStack.push({ start: base + result.length, ctrlIdx: state.ctrlIdx });
|
|
2315
|
+
} else if (ch === 4) {
|
|
2316
|
+
const open = state.fieldStack.pop();
|
|
2317
|
+
if (open) {
|
|
2318
|
+
state.fieldRanges.push({ start: open.start, end: base + result.length, ctrlIdx: open.ctrlIdx });
|
|
2319
|
+
}
|
|
2320
|
+
}
|
|
2321
|
+
resolveAt(i, isExtended);
|
|
1799
2322
|
i += 14;
|
|
1800
2323
|
}
|
|
1801
2324
|
} else if (ch >= 32) {
|
|
@@ -1813,7 +2336,7 @@ function extractTextWithControls(data, resolveControl) {
|
|
|
1813
2336
|
break;
|
|
1814
2337
|
}
|
|
1815
2338
|
}
|
|
1816
|
-
|
|
2339
|
+
state.text += result;
|
|
1817
2340
|
}
|
|
1818
2341
|
function extractEquationText(data) {
|
|
1819
2342
|
if (data.length < 6) return null;
|
|
@@ -1825,100 +2348,407 @@ function extractEquationText(data) {
|
|
|
1825
2348
|
return equation || null;
|
|
1826
2349
|
}
|
|
1827
2350
|
|
|
1828
|
-
// src/hwp5/
|
|
1829
|
-
var
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
1912
|
-
0
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
2351
|
+
// src/hwp5/numbering.ts
|
|
2352
|
+
var NumberingState = class {
|
|
2353
|
+
currentId = 0;
|
|
2354
|
+
counters = [0, 0, 0, 0, 0, 0, 0];
|
|
2355
|
+
history = /* @__PURE__ */ new Map();
|
|
2356
|
+
/** 번호 문단 처리: 카운터 갱신 후 수준별 카운터 스냅샷 반환 */
|
|
2357
|
+
advance(numberingId, level) {
|
|
2358
|
+
const lv = Math.min(Math.max(level, 0), 6);
|
|
2359
|
+
if (this.currentId !== numberingId) {
|
|
2360
|
+
if (this.currentId !== 0) this.history.set(this.currentId, [...this.counters]);
|
|
2361
|
+
const saved = this.history.get(numberingId);
|
|
2362
|
+
if (saved) {
|
|
2363
|
+
this.counters = [...saved];
|
|
2364
|
+
} else {
|
|
2365
|
+
const prev = this.counters;
|
|
2366
|
+
this.counters = [0, 0, 0, 0, 0, 0, 0];
|
|
2367
|
+
for (let i = 0; i < lv; i++) this.counters[i] = prev[i];
|
|
2368
|
+
}
|
|
2369
|
+
this.currentId = numberingId;
|
|
2370
|
+
}
|
|
2371
|
+
this.counters[lv]++;
|
|
2372
|
+
for (let i = lv + 1; i < 7; i++) this.counters[i] = 0;
|
|
2373
|
+
return [...this.counters];
|
|
2374
|
+
}
|
|
2375
|
+
};
|
|
2376
|
+
function headFormatToNumFmt(code) {
|
|
2377
|
+
switch (code) {
|
|
2378
|
+
case 1:
|
|
2379
|
+
return "circled";
|
|
2380
|
+
case 2:
|
|
2381
|
+
return "romanUpper";
|
|
2382
|
+
case 3:
|
|
2383
|
+
return "romanLower";
|
|
2384
|
+
case 4:
|
|
2385
|
+
return "latinUpper";
|
|
2386
|
+
case 5:
|
|
2387
|
+
return "latinLower";
|
|
2388
|
+
case 8:
|
|
2389
|
+
return "ganada";
|
|
2390
|
+
case 9:
|
|
2391
|
+
return "circledGanada";
|
|
2392
|
+
case 10:
|
|
2393
|
+
return "jamo";
|
|
2394
|
+
case 11:
|
|
2395
|
+
return "circledJamo";
|
|
2396
|
+
case 12:
|
|
2397
|
+
return "hangulNum";
|
|
2398
|
+
case 13:
|
|
2399
|
+
return "hanjaNum";
|
|
2400
|
+
default:
|
|
2401
|
+
return "digit";
|
|
2402
|
+
}
|
|
2403
|
+
}
|
|
2404
|
+
function shapeFormatToNumFmt(code) {
|
|
2405
|
+
switch (code) {
|
|
2406
|
+
case 1:
|
|
2407
|
+
return "circled";
|
|
2408
|
+
case 2:
|
|
2409
|
+
return "romanUpper";
|
|
2410
|
+
case 3:
|
|
2411
|
+
return "romanLower";
|
|
2412
|
+
case 4:
|
|
2413
|
+
return "latinUpper";
|
|
2414
|
+
case 5:
|
|
2415
|
+
return "latinLower";
|
|
2416
|
+
case 6:
|
|
2417
|
+
return "ganada";
|
|
2418
|
+
case 7:
|
|
2419
|
+
return "hangulNum";
|
|
2420
|
+
case 8:
|
|
2421
|
+
return "hanjaNum";
|
|
2422
|
+
default:
|
|
2423
|
+
return "digit";
|
|
2424
|
+
}
|
|
2425
|
+
}
|
|
2426
|
+
var CIRCLED_DIGITS = "\u2460\u2461\u2462\u2463\u2464\u2465\u2466\u2467\u2468\u2469\u246A\u246B\u246C\u246D\u246E\u246F\u2470\u2471\u2472\u2473";
|
|
2427
|
+
var GANADA = "\uAC00\uB098\uB2E4\uB77C\uB9C8\uBC14\uC0AC\uC544\uC790\uCC28\uCE74\uD0C0\uD30C\uD558";
|
|
2428
|
+
var CIRCLED_GANADA = "\u326E\u326F\u3270\u3271\u3272\u3273\u3274\u3275\u3276\u3277\u3278\u3279\u327A\u327B";
|
|
2429
|
+
var JAMO = "\u3131\u3134\u3137\u3139\u3141\u3142\u3145\u3147\u3148\u314A\u314B\u314C\u314D\u314E";
|
|
2430
|
+
var CIRCLED_JAMO = "\u3260\u3261\u3262\u3263\u3264\u3265\u3266\u3267\u3268\u3269\u326A\u326B\u326C\u326D";
|
|
2431
|
+
function fromTable(n, table) {
|
|
2432
|
+
return n >= 1 && n <= table.length ? table[n - 1] : String(n);
|
|
2433
|
+
}
|
|
2434
|
+
function formatRoman(n, upper) {
|
|
2435
|
+
if (n <= 0 || n > 3999) return String(n);
|
|
2436
|
+
const values = [1e3, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1];
|
|
2437
|
+
const symbols = upper ? ["M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"] : ["m", "cm", "d", "cd", "c", "xc", "l", "xl", "x", "ix", "v", "iv", "i"];
|
|
2438
|
+
let result = "";
|
|
2439
|
+
let num = n;
|
|
2440
|
+
for (let i = 0; i < values.length; i++) {
|
|
2441
|
+
while (num >= values[i]) {
|
|
2442
|
+
result += symbols[i];
|
|
2443
|
+
num -= values[i];
|
|
2444
|
+
}
|
|
2445
|
+
}
|
|
2446
|
+
return result;
|
|
2447
|
+
}
|
|
2448
|
+
function formatLatin(n, upper) {
|
|
2449
|
+
if (n <= 0) return "";
|
|
2450
|
+
let result = "";
|
|
2451
|
+
let num = n;
|
|
2452
|
+
while (num > 0) {
|
|
2453
|
+
num--;
|
|
2454
|
+
result = String.fromCharCode((upper ? 65 : 97) + num % 26) + result;
|
|
2455
|
+
num = Math.floor(num / 26);
|
|
2456
|
+
}
|
|
2457
|
+
return result;
|
|
2458
|
+
}
|
|
2459
|
+
function formatEastAsianNumber(n, digits, units, zero) {
|
|
2460
|
+
if (n === 0) return zero;
|
|
2461
|
+
if (n < 0 || n > 99999) return String(n);
|
|
2462
|
+
let result = "";
|
|
2463
|
+
let num = n;
|
|
2464
|
+
let unit = 0;
|
|
2465
|
+
while (num > 0) {
|
|
2466
|
+
const d = num % 10;
|
|
2467
|
+
if (d > 0) {
|
|
2468
|
+
const digitStr = d === 1 && unit > 0 ? "" : digits[d];
|
|
2469
|
+
result = digitStr + units[unit] + result;
|
|
2470
|
+
}
|
|
2471
|
+
num = Math.floor(num / 10);
|
|
2472
|
+
unit++;
|
|
2473
|
+
}
|
|
2474
|
+
return result;
|
|
2475
|
+
}
|
|
2476
|
+
var HANGUL_DIGITS = ["", "\uC77C", "\uC774", "\uC0BC", "\uC0AC", "\uC624", "\uC721", "\uCE60", "\uD314", "\uAD6C"];
|
|
2477
|
+
var HANGUL_UNITS = ["", "\uC2ED", "\uBC31", "\uCC9C", "\uB9CC"];
|
|
2478
|
+
var HANJA_DIGITS = ["", "\u4E00", "\u4E8C", "\u4E09", "\u56DB", "\u4E94", "\u516D", "\u4E03", "\u516B", "\u4E5D"];
|
|
2479
|
+
var HANJA_UNITS = ["", "\u5341", "\u767E", "\u5343", "\u842C"];
|
|
2480
|
+
function formatNumber(n, fmt) {
|
|
2481
|
+
switch (fmt) {
|
|
2482
|
+
case "circled":
|
|
2483
|
+
return fromTable(n, CIRCLED_DIGITS);
|
|
2484
|
+
case "romanUpper":
|
|
2485
|
+
return formatRoman(n, true);
|
|
2486
|
+
case "romanLower":
|
|
2487
|
+
return formatRoman(n, false);
|
|
2488
|
+
case "latinUpper":
|
|
2489
|
+
return formatLatin(n, true) || String(n);
|
|
2490
|
+
case "latinLower":
|
|
2491
|
+
return formatLatin(n, false) || String(n);
|
|
2492
|
+
case "ganada":
|
|
2493
|
+
return fromTable(n, GANADA);
|
|
2494
|
+
case "circledGanada":
|
|
2495
|
+
return fromTable(n, CIRCLED_GANADA);
|
|
2496
|
+
case "jamo":
|
|
2497
|
+
return fromTable(n, JAMO);
|
|
2498
|
+
case "circledJamo":
|
|
2499
|
+
return fromTable(n, CIRCLED_JAMO);
|
|
2500
|
+
case "hangulNum":
|
|
2501
|
+
return formatEastAsianNumber(n, HANGUL_DIGITS, HANGUL_UNITS, "\uC601");
|
|
2502
|
+
case "hanjaNum":
|
|
2503
|
+
return formatEastAsianNumber(n, HANJA_DIGITS, HANJA_UNITS, "\u96F6");
|
|
2504
|
+
default:
|
|
2505
|
+
return String(n);
|
|
2506
|
+
}
|
|
2507
|
+
}
|
|
2508
|
+
function expandNumberingFormat(formatStr, counters, numbering) {
|
|
2509
|
+
let result = "";
|
|
2510
|
+
let i = 0;
|
|
2511
|
+
while (i < formatStr.length) {
|
|
2512
|
+
const ch = formatStr[i];
|
|
2513
|
+
if (ch === "^" && i + 1 < formatStr.length && formatStr[i + 1] >= "1" && formatStr[i + 1] <= "7") {
|
|
2514
|
+
const levelRef = formatStr.charCodeAt(i + 1) - 48;
|
|
2515
|
+
const idx = levelRef - 1;
|
|
2516
|
+
const counterVal = counters[idx] ?? 0;
|
|
2517
|
+
const start = numbering.startNumbers[idx] ?? 1;
|
|
2518
|
+
const num = counterVal > 0 ? start - 1 + counterVal : start;
|
|
2519
|
+
result += formatNumber(num, headFormatToNumFmt(numbering.numberFormats[idx] ?? 0));
|
|
2520
|
+
i += 2;
|
|
2521
|
+
continue;
|
|
2522
|
+
}
|
|
2523
|
+
result += ch;
|
|
2524
|
+
i++;
|
|
2525
|
+
}
|
|
2526
|
+
return result;
|
|
2527
|
+
}
|
|
2528
|
+
|
|
2529
|
+
// src/hwp5/images.ts
|
|
2530
|
+
function detectImageMime(data) {
|
|
2531
|
+
if (data.length < 4) return null;
|
|
2532
|
+
if (data[0] === 137 && data[1] === 80 && data[2] === 78 && data[3] === 71) return "image/png";
|
|
2533
|
+
if (data[0] === 255 && data[1] === 216 && data[2] === 255) return "image/jpeg";
|
|
2534
|
+
if (data[0] === 71 && data[1] === 73 && data[2] === 70) return "image/gif";
|
|
2535
|
+
if (data[0] === 66 && data[1] === 77) return "image/bmp";
|
|
2536
|
+
if (data[0] === 215 && data[1] === 205 && data[2] === 198 && data[3] === 154) return "image/wmf";
|
|
2537
|
+
if (data[0] === 1 && data[1] === 0 && data[2] === 0 && data[3] === 0) return "image/emf";
|
|
2538
|
+
return null;
|
|
2539
|
+
}
|
|
2540
|
+
function normalizeBinPayload(data) {
|
|
2541
|
+
if (detectImageMime(data)) return data;
|
|
2542
|
+
try {
|
|
2543
|
+
const inflated = decompressStream(data);
|
|
2544
|
+
if (inflated.length > 0) return inflated;
|
|
2545
|
+
} catch {
|
|
2546
|
+
}
|
|
2547
|
+
return data;
|
|
2548
|
+
}
|
|
2549
|
+
var BIN_ENTRY_RE = /(?:^|\/)BIN([0-9A-Fa-f]{4,8})(?:\.[^./\\]*)?$/;
|
|
2550
|
+
function collectImageBlocks2(blocks, out) {
|
|
2551
|
+
for (const b of blocks) {
|
|
2552
|
+
if (b.type === "image") out.push(b);
|
|
2553
|
+
if (b.table) {
|
|
2554
|
+
for (const row of b.table.cells) {
|
|
2555
|
+
for (const cell of row) {
|
|
2556
|
+
if (cell.blocks) collectImageBlocks2(cell.blocks, out);
|
|
2557
|
+
}
|
|
2558
|
+
}
|
|
2559
|
+
}
|
|
2560
|
+
if (b.children) collectImageBlocks2(b.children, out);
|
|
2561
|
+
}
|
|
2562
|
+
}
|
|
2563
|
+
function forEachTableCell(blocks, fn) {
|
|
2564
|
+
for (const b of blocks) {
|
|
2565
|
+
if (b.table) {
|
|
2566
|
+
for (const row of b.table.cells) {
|
|
2567
|
+
for (const cell of row) {
|
|
2568
|
+
fn(cell);
|
|
2569
|
+
if (cell.blocks) forEachTableCell(cell.blocks, fn);
|
|
2570
|
+
}
|
|
2571
|
+
}
|
|
2572
|
+
}
|
|
2573
|
+
if (b.children) forEachTableCell(b.children, fn);
|
|
2574
|
+
}
|
|
2575
|
+
}
|
|
2576
|
+
var CELL_IMAGE_SENTINEL_RE = /!\[image\]\(hwp5bin:(\d+)\)/g;
|
|
2577
|
+
function resolveCellImageSentinels(blocks, renamed) {
|
|
2578
|
+
forEachTableCell(blocks, (cell) => {
|
|
2579
|
+
if (!cell.text.includes("hwp5bin:")) return;
|
|
2580
|
+
cell.text = cell.text.replace(CELL_IMAGE_SENTINEL_RE, (_m, idStr) => {
|
|
2581
|
+
const filename = renamed.get(Number(idStr));
|
|
2582
|
+
return filename ? `` : "[\uC774\uBBF8\uC9C0]";
|
|
2583
|
+
});
|
|
2584
|
+
});
|
|
2585
|
+
}
|
|
2586
|
+
function resolveImageBlocks(binDataMap, blocks, warnings) {
|
|
2587
|
+
const imageBlocks = [];
|
|
2588
|
+
collectImageBlocks2(blocks, imageBlocks);
|
|
2589
|
+
if (imageBlocks.length === 0) return [];
|
|
2590
|
+
const images = [];
|
|
2591
|
+
const renamed = /* @__PURE__ */ new Map();
|
|
2592
|
+
let imageIndex = 0;
|
|
2593
|
+
for (const block of imageBlocks) {
|
|
2594
|
+
if (!block.text) continue;
|
|
2595
|
+
const storageId = parseInt(block.text, 10);
|
|
2596
|
+
if (isNaN(storageId)) continue;
|
|
2597
|
+
const bin = binDataMap.get(storageId);
|
|
2598
|
+
if (!bin) {
|
|
2599
|
+
warnings.push({ page: block.pageNumber, message: `BinData ${storageId} \uC5C6\uC74C`, code: "SKIPPED_IMAGE" });
|
|
2600
|
+
block.type = "paragraph";
|
|
2601
|
+
block.text = `[\uC774\uBBF8\uC9C0: BinData ${storageId}]`;
|
|
2602
|
+
continue;
|
|
2603
|
+
}
|
|
2604
|
+
const mime = detectImageMime(bin.data);
|
|
2605
|
+
if (!mime) {
|
|
2606
|
+
warnings.push({ page: block.pageNumber, message: `BinData ${storageId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
|
|
2607
|
+
block.type = "paragraph";
|
|
2608
|
+
block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
|
|
2609
|
+
continue;
|
|
2610
|
+
}
|
|
2611
|
+
imageIndex++;
|
|
2612
|
+
const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
|
|
2613
|
+
const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
|
|
2614
|
+
images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
|
|
2615
|
+
renamed.set(storageId, filename);
|
|
2616
|
+
block.text = filename;
|
|
2617
|
+
block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
|
|
2618
|
+
}
|
|
2619
|
+
resolveCellImageSentinels(blocks, renamed);
|
|
2620
|
+
return images;
|
|
2621
|
+
}
|
|
2622
|
+
function extractHwp5Images(fileIndex, blocks, warnings) {
|
|
2623
|
+
const binDataMap = /* @__PURE__ */ new Map();
|
|
2624
|
+
if (fileIndex) {
|
|
2625
|
+
for (const entry of fileIndex) {
|
|
2626
|
+
if (!entry?.name || !entry.content) continue;
|
|
2627
|
+
const match = entry.name.match(BIN_ENTRY_RE);
|
|
2628
|
+
if (!match) continue;
|
|
2629
|
+
const idx = parseInt(match[1], 16);
|
|
2630
|
+
const data = normalizeBinPayload(Buffer.from(entry.content));
|
|
2631
|
+
binDataMap.set(idx, { data, name: entry.name });
|
|
2632
|
+
}
|
|
2633
|
+
}
|
|
2634
|
+
if (binDataMap.size === 0) {
|
|
2635
|
+
resolveCellImageSentinels(blocks, /* @__PURE__ */ new Map());
|
|
2636
|
+
return [];
|
|
2637
|
+
}
|
|
2638
|
+
return resolveImageBlocks(binDataMap, blocks, warnings);
|
|
2639
|
+
}
|
|
2640
|
+
function extractHwp5ImagesLenient(lcfb, blocks, warnings) {
|
|
2641
|
+
const binDataMap = /* @__PURE__ */ new Map();
|
|
2642
|
+
const binRe = /^BIN([0-9A-Fa-f]{4,8})(?:\.|$)/;
|
|
2643
|
+
for (const e of lcfb.entries()) {
|
|
2644
|
+
const match = e.name.match(binRe);
|
|
2645
|
+
if (!match) continue;
|
|
2646
|
+
const idx = parseInt(match[1], 16);
|
|
2647
|
+
const raw = lcfb.findStream(e.name);
|
|
2648
|
+
if (!raw) continue;
|
|
2649
|
+
binDataMap.set(idx, { data: normalizeBinPayload(raw), name: e.name });
|
|
2650
|
+
}
|
|
2651
|
+
if (binDataMap.size === 0) {
|
|
2652
|
+
resolveCellImageSentinels(blocks, /* @__PURE__ */ new Map());
|
|
2653
|
+
return [];
|
|
2654
|
+
}
|
|
2655
|
+
return resolveImageBlocks(binDataMap, blocks, warnings);
|
|
2656
|
+
}
|
|
2657
|
+
|
|
2658
|
+
// src/hwp5/aes.ts
|
|
2659
|
+
var S_BOX = new Uint8Array([
|
|
2660
|
+
99,
|
|
2661
|
+
124,
|
|
2662
|
+
119,
|
|
2663
|
+
123,
|
|
2664
|
+
242,
|
|
2665
|
+
107,
|
|
2666
|
+
111,
|
|
2667
|
+
197,
|
|
2668
|
+
48,
|
|
2669
|
+
1,
|
|
2670
|
+
103,
|
|
2671
|
+
43,
|
|
2672
|
+
254,
|
|
2673
|
+
215,
|
|
2674
|
+
171,
|
|
2675
|
+
118,
|
|
2676
|
+
202,
|
|
2677
|
+
130,
|
|
2678
|
+
201,
|
|
2679
|
+
125,
|
|
2680
|
+
250,
|
|
2681
|
+
89,
|
|
2682
|
+
71,
|
|
2683
|
+
240,
|
|
2684
|
+
173,
|
|
2685
|
+
212,
|
|
2686
|
+
162,
|
|
2687
|
+
175,
|
|
2688
|
+
156,
|
|
2689
|
+
164,
|
|
2690
|
+
114,
|
|
2691
|
+
192,
|
|
2692
|
+
183,
|
|
2693
|
+
253,
|
|
2694
|
+
147,
|
|
2695
|
+
38,
|
|
2696
|
+
54,
|
|
2697
|
+
63,
|
|
2698
|
+
247,
|
|
2699
|
+
204,
|
|
2700
|
+
52,
|
|
2701
|
+
165,
|
|
2702
|
+
229,
|
|
2703
|
+
241,
|
|
2704
|
+
113,
|
|
2705
|
+
216,
|
|
2706
|
+
49,
|
|
2707
|
+
21,
|
|
2708
|
+
4,
|
|
2709
|
+
199,
|
|
2710
|
+
35,
|
|
2711
|
+
195,
|
|
2712
|
+
24,
|
|
2713
|
+
150,
|
|
2714
|
+
5,
|
|
2715
|
+
154,
|
|
2716
|
+
7,
|
|
2717
|
+
18,
|
|
2718
|
+
128,
|
|
2719
|
+
226,
|
|
2720
|
+
235,
|
|
2721
|
+
39,
|
|
2722
|
+
178,
|
|
2723
|
+
117,
|
|
2724
|
+
9,
|
|
2725
|
+
131,
|
|
2726
|
+
44,
|
|
2727
|
+
26,
|
|
2728
|
+
27,
|
|
2729
|
+
110,
|
|
2730
|
+
90,
|
|
2731
|
+
160,
|
|
2732
|
+
82,
|
|
2733
|
+
59,
|
|
2734
|
+
214,
|
|
2735
|
+
179,
|
|
2736
|
+
41,
|
|
2737
|
+
227,
|
|
2738
|
+
47,
|
|
2739
|
+
132,
|
|
2740
|
+
83,
|
|
2741
|
+
209,
|
|
2742
|
+
0,
|
|
2743
|
+
237,
|
|
2744
|
+
32,
|
|
2745
|
+
252,
|
|
2746
|
+
177,
|
|
2747
|
+
91,
|
|
2748
|
+
106,
|
|
2749
|
+
203,
|
|
2750
|
+
190,
|
|
2751
|
+
57,
|
|
1922
2752
|
74,
|
|
1923
2753
|
76,
|
|
1924
2754
|
88,
|
|
@@ -2751,6 +3581,66 @@ var require2 = createRequire(import.meta.url);
|
|
|
2751
3581
|
var CFB = require2("cfb");
|
|
2752
3582
|
var MAX_SECTIONS = 100;
|
|
2753
3583
|
var MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024;
|
|
3584
|
+
var MAX_NEST_DEPTH = 8;
|
|
3585
|
+
function cid(s) {
|
|
3586
|
+
return (s.charCodeAt(0) << 24 | s.charCodeAt(1) << 16 | s.charCodeAt(2) << 8 | s.charCodeAt(3)) >>> 0;
|
|
3587
|
+
}
|
|
3588
|
+
var CTRL_TBL = cid("tbl ");
|
|
3589
|
+
var CTRL_GSO = cid("gso ");
|
|
3590
|
+
var CTRL_EQED = cid("eqed");
|
|
3591
|
+
var CTRL_HEAD = cid("head");
|
|
3592
|
+
var CTRL_FOOT = cid("foot");
|
|
3593
|
+
var CTRL_FN = cid("fn ");
|
|
3594
|
+
var CTRL_EN = cid("en ");
|
|
3595
|
+
var CTRL_ATNO = cid("atno");
|
|
3596
|
+
var CTRL_NWNO = cid("nwno");
|
|
3597
|
+
var CTRL_PGNP = cid("pgnp");
|
|
3598
|
+
var CTRL_PGHD = cid("pghd");
|
|
3599
|
+
var CTRL_IDXM = cid("idxm");
|
|
3600
|
+
var CTRL_BOKM = cid("bokm");
|
|
3601
|
+
var CTRL_TCPS = cid("tcps");
|
|
3602
|
+
var CTRL_TDUT = cid("tdut");
|
|
3603
|
+
var CTRL_TCMT = cid("tcmt");
|
|
3604
|
+
var CTRL_SECD = cid("secd");
|
|
3605
|
+
var CTRL_COLD = cid("cold");
|
|
3606
|
+
var CTRL_FORM = cid("form");
|
|
3607
|
+
var CTRL_OLE = cid("ole ");
|
|
3608
|
+
var FIELD_HLK = cid("%hlk");
|
|
3609
|
+
var FIELD_CLK = cid("%clk");
|
|
3610
|
+
var KNOWN_CTRL_IDS = /* @__PURE__ */ new Set([
|
|
3611
|
+
CTRL_TBL,
|
|
3612
|
+
CTRL_GSO,
|
|
3613
|
+
CTRL_EQED,
|
|
3614
|
+
CTRL_HEAD,
|
|
3615
|
+
CTRL_FOOT,
|
|
3616
|
+
CTRL_FN,
|
|
3617
|
+
CTRL_EN,
|
|
3618
|
+
CTRL_ATNO,
|
|
3619
|
+
CTRL_NWNO,
|
|
3620
|
+
CTRL_PGNP,
|
|
3621
|
+
CTRL_PGHD,
|
|
3622
|
+
CTRL_IDXM,
|
|
3623
|
+
CTRL_BOKM,
|
|
3624
|
+
CTRL_TCPS,
|
|
3625
|
+
CTRL_TDUT,
|
|
3626
|
+
CTRL_TCMT,
|
|
3627
|
+
CTRL_SECD,
|
|
3628
|
+
CTRL_COLD,
|
|
3629
|
+
CTRL_FORM,
|
|
3630
|
+
CTRL_OLE
|
|
3631
|
+
]);
|
|
3632
|
+
function isFieldCtrlId(id) {
|
|
3633
|
+
return id >>> 24 === 37;
|
|
3634
|
+
}
|
|
3635
|
+
function swap32(id) {
|
|
3636
|
+
return ((id & 255) << 24 | (id >>> 8 & 255) << 16 | (id >>> 16 & 255) << 8 | id >>> 24 & 255) >>> 0;
|
|
3637
|
+
}
|
|
3638
|
+
function normalizeCtrlId(raw) {
|
|
3639
|
+
if (KNOWN_CTRL_IDS.has(raw) || isFieldCtrlId(raw)) return raw;
|
|
3640
|
+
const sw = swap32(raw);
|
|
3641
|
+
if (KNOWN_CTRL_IDS.has(sw) || isFieldCtrlId(sw)) return sw;
|
|
3642
|
+
return raw;
|
|
3643
|
+
}
|
|
2754
3644
|
function parseHwp5Document(buffer, options) {
|
|
2755
3645
|
let cfb = null;
|
|
2756
3646
|
let lenientCfb = null;
|
|
@@ -2790,8 +3680,8 @@ function parseHwp5Document(buffer, options) {
|
|
|
2790
3680
|
metadata.pageCount = sections.length;
|
|
2791
3681
|
const pageFilter = options?.pages ? parsePageRange(options.pages, sections.length) : null;
|
|
2792
3682
|
const totalTarget = pageFilter ? pageFilter.size : sections.length;
|
|
2793
|
-
const
|
|
2794
|
-
const
|
|
3683
|
+
const bodyBlocks = [];
|
|
3684
|
+
const doc = createHwp5DocState();
|
|
2795
3685
|
let totalDecompressed = 0;
|
|
2796
3686
|
let parsedSections = 0;
|
|
2797
3687
|
for (let si = 0; si < sections.length; si++) {
|
|
@@ -2802,8 +3692,8 @@ function parseHwp5Document(buffer, options) {
|
|
|
2802
3692
|
totalDecompressed += data.length;
|
|
2803
3693
|
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new KordocError("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2804
3694
|
const records = readRecords(data);
|
|
2805
|
-
const sectionBlocks = parseSection(records, docInfo, warnings, si + 1,
|
|
2806
|
-
|
|
3695
|
+
const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, doc);
|
|
3696
|
+
bodyBlocks.push(...sectionBlocks);
|
|
2807
3697
|
parsedSections++;
|
|
2808
3698
|
options?.onProgress?.(parsedSections, totalTarget);
|
|
2809
3699
|
} catch (secErr) {
|
|
@@ -2811,7 +3701,8 @@ function parseHwp5Document(buffer, options) {
|
|
|
2811
3701
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
2812
3702
|
}
|
|
2813
3703
|
}
|
|
2814
|
-
const
|
|
3704
|
+
const blocks = [...doc.headerBlocks, ...bodyBlocks, ...doc.footerBlocks];
|
|
3705
|
+
const images = cfb ? extractHwp5Images(cfb.FileIndex, blocks, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, warnings);
|
|
2815
3706
|
const flatBlocks = flattenLayoutTables(blocks);
|
|
2816
3707
|
if (docInfo) {
|
|
2817
3708
|
detectHwp5Headings(flatBlocks, docInfo);
|
|
@@ -2842,28 +3733,28 @@ function parseDocInfoFromStream(raw, compressed) {
|
|
|
2842
3733
|
}
|
|
2843
3734
|
function detectHwp5Headings(blocks, docInfo) {
|
|
2844
3735
|
let baseFontSize = 0;
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
if (
|
|
2848
|
-
|
|
2849
|
-
if (cs?.fontSize > 0) {
|
|
2850
|
-
baseFontSize = cs.fontSize / 10;
|
|
2851
|
-
break;
|
|
2852
|
-
}
|
|
3736
|
+
const sizeFreq = /* @__PURE__ */ new Map();
|
|
3737
|
+
for (const b of blocks) {
|
|
3738
|
+
if (b.style?.fontSize && b.text) {
|
|
3739
|
+
sizeFreq.set(b.style.fontSize, (sizeFreq.get(b.style.fontSize) || 0) + b.text.length);
|
|
2853
3740
|
}
|
|
2854
3741
|
}
|
|
2855
|
-
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
|
|
2860
|
-
}
|
|
3742
|
+
let maxWeight = 0;
|
|
3743
|
+
for (const [size, weight] of sizeFreq) {
|
|
3744
|
+
if (weight > maxWeight) {
|
|
3745
|
+
maxWeight = weight;
|
|
3746
|
+
baseFontSize = size;
|
|
2861
3747
|
}
|
|
2862
|
-
|
|
2863
|
-
|
|
2864
|
-
|
|
2865
|
-
|
|
2866
|
-
|
|
3748
|
+
}
|
|
3749
|
+
if (baseFontSize === 0) {
|
|
3750
|
+
for (const style of docInfo.styles) {
|
|
3751
|
+
const name = (style.nameKo || style.name).toLowerCase();
|
|
3752
|
+
if (name.includes("\uBC14\uD0D5") || name.includes("\uBCF8\uBB38") || name === "normal" || name === "body") {
|
|
3753
|
+
const cs = docInfo.charShapes[style.charShapeId];
|
|
3754
|
+
if (cs?.fontSize > 0) {
|
|
3755
|
+
baseFontSize = cs.fontSize / 10;
|
|
3756
|
+
break;
|
|
3757
|
+
}
|
|
2867
3758
|
}
|
|
2868
3759
|
}
|
|
2869
3760
|
}
|
|
@@ -3001,414 +3892,397 @@ function findViewTextSectionsLenient(lcfb, compressed) {
|
|
|
3001
3892
|
}
|
|
3002
3893
|
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
3003
3894
|
}
|
|
3004
|
-
var TAG_SHAPE_COMPONENT = 74;
|
|
3005
|
-
var CTRL_ID_EQEDIT = "deqe";
|
|
3006
|
-
function extractBinDataId(records, ctrlIdx) {
|
|
3007
|
-
const ctrlLevel = records[ctrlIdx].level;
|
|
3008
|
-
for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 50; j++) {
|
|
3009
|
-
const r = records[j];
|
|
3010
|
-
if (r.level <= ctrlLevel) break;
|
|
3011
|
-
if (r.data.length >= 2) {
|
|
3012
|
-
if (r.tagId > TAG_SHAPE_COMPONENT && r.level > ctrlLevel + 1 && r.data.length >= 4) {
|
|
3013
|
-
const possibleId = r.data.readUInt16LE(0);
|
|
3014
|
-
if (possibleId < 1e4) return possibleId;
|
|
3015
|
-
}
|
|
3016
|
-
}
|
|
3017
|
-
}
|
|
3018
|
-
return -1;
|
|
3019
|
-
}
|
|
3020
|
-
function isEquationControlId(ctrlId) {
|
|
3021
|
-
return ctrlId === CTRL_ID_EQEDIT || ctrlId === "eqed";
|
|
3022
|
-
}
|
|
3023
3895
|
function formatEquationForMarkdown(equation) {
|
|
3024
3896
|
const normalized = hwpEquationToLatex(equation);
|
|
3025
3897
|
if (!normalized) return "";
|
|
3026
3898
|
return `$${normalized.replace(/\$/g, "\\$")}$`;
|
|
3027
3899
|
}
|
|
3028
|
-
function
|
|
3029
|
-
|
|
3030
|
-
|
|
3031
|
-
const
|
|
3032
|
-
if (r.level <= ctrlLevel) break;
|
|
3033
|
-
if (r.tagId !== TAG_EQEDIT) continue;
|
|
3034
|
-
const equation = extractEquationText(r.data);
|
|
3900
|
+
function extractEquationFromSlice(records, start, end) {
|
|
3901
|
+
for (let i = start; i < end; i++) {
|
|
3902
|
+
if (records[i].tagId !== TAG_EQEDIT) continue;
|
|
3903
|
+
const equation = extractEquationText(records[i].data);
|
|
3035
3904
|
return equation ? formatEquationForMarkdown(equation) : null;
|
|
3036
3905
|
}
|
|
3037
3906
|
return null;
|
|
3038
3907
|
}
|
|
3039
|
-
function
|
|
3040
|
-
|
|
3041
|
-
|
|
3042
|
-
|
|
3043
|
-
|
|
3044
|
-
|
|
3908
|
+
function createHwp5DocState() {
|
|
3909
|
+
return {
|
|
3910
|
+
numbering: new NumberingState(),
|
|
3911
|
+
outlineNumberingId: 0,
|
|
3912
|
+
autoCounters: /* @__PURE__ */ new Map(),
|
|
3913
|
+
headerTexts: /* @__PURE__ */ new Set(),
|
|
3914
|
+
headerBlocks: [],
|
|
3915
|
+
footerBlocks: []
|
|
3916
|
+
};
|
|
3045
3917
|
}
|
|
3046
|
-
function
|
|
3047
|
-
|
|
3048
|
-
|
|
3049
|
-
if (data[0] === 255 && data[1] === 216 && data[2] === 255) return "image/jpeg";
|
|
3050
|
-
if (data[0] === 71 && data[1] === 73 && data[2] === 70) return "image/gif";
|
|
3051
|
-
if (data[0] === 66 && data[1] === 77) return "image/bmp";
|
|
3052
|
-
if (data[0] === 215 && data[1] === 205 && data[2] === 198 && data[3] === 154) return "image/wmf";
|
|
3053
|
-
if (data[0] === 1 && data[1] === 0 && data[2] === 0 && data[3] === 0) return "image/emf";
|
|
3054
|
-
return null;
|
|
3918
|
+
function parseSection(records, docInfo, warnings, sectionNum, doc) {
|
|
3919
|
+
const ctx = { docInfo, warnings, sectionNum, doc: doc ?? createHwp5DocState(), depth: 0 };
|
|
3920
|
+
return parseParagraphList(records, 0, records.length, ctx);
|
|
3055
3921
|
}
|
|
3056
|
-
function
|
|
3057
|
-
const
|
|
3058
|
-
|
|
3059
|
-
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
|
|
3063
|
-
|
|
3064
|
-
|
|
3065
|
-
|
|
3066
|
-
|
|
3067
|
-
|
|
3068
|
-
data = decompressStream(data);
|
|
3069
|
-
} catch {
|
|
3070
|
-
}
|
|
3071
|
-
}
|
|
3072
|
-
binDataMap.set(idx, { data, name: entry.name });
|
|
3922
|
+
function parseParagraphList(records, start, end, ctx) {
|
|
3923
|
+
const blocks = [];
|
|
3924
|
+
let i = start;
|
|
3925
|
+
while (i < end) {
|
|
3926
|
+
if (records[i].tagId === TAG_PARA_HEADER) {
|
|
3927
|
+
const baseLevel = records[i].level;
|
|
3928
|
+
let j = i + 1;
|
|
3929
|
+
while (j < end && records[j].level > baseLevel) j++;
|
|
3930
|
+
blocks.push(...parseParagraph(records, i, j, ctx));
|
|
3931
|
+
i = j;
|
|
3932
|
+
} else {
|
|
3933
|
+
i++;
|
|
3073
3934
|
}
|
|
3074
3935
|
}
|
|
3075
|
-
|
|
3076
|
-
|
|
3077
|
-
|
|
3078
|
-
|
|
3079
|
-
|
|
3080
|
-
|
|
3081
|
-
|
|
3082
|
-
|
|
3083
|
-
|
|
3084
|
-
|
|
3085
|
-
|
|
3086
|
-
|
|
3936
|
+
return blocks;
|
|
3937
|
+
}
|
|
3938
|
+
function parseParagraph(records, start, end, ctx) {
|
|
3939
|
+
const header = records[start];
|
|
3940
|
+
const baseLevel = header.level;
|
|
3941
|
+
const paraShapeId = header.data.length >= 10 ? header.data.readUInt16LE(8) : -1;
|
|
3942
|
+
const textRecords = [];
|
|
3943
|
+
const charShapeIds = [];
|
|
3944
|
+
const ctrls = [];
|
|
3945
|
+
let i = start + 1;
|
|
3946
|
+
while (i < end) {
|
|
3947
|
+
const rec = records[i];
|
|
3948
|
+
if (rec.tagId === TAG_CTRL_HEADER && rec.level === baseLevel + 1 && rec.data.length >= 4) {
|
|
3949
|
+
const childStart = i + 1;
|
|
3950
|
+
let j = childStart;
|
|
3951
|
+
while (j < end && records[j].level > baseLevel + 1) j++;
|
|
3952
|
+
const idRaw = rec.data.readUInt32LE(0);
|
|
3953
|
+
ctrls.push({ id: normalizeCtrlId(idRaw), idRaw, data: rec.data, childStart, childEnd: j });
|
|
3954
|
+
i = j;
|
|
3087
3955
|
continue;
|
|
3088
3956
|
}
|
|
3089
|
-
|
|
3090
|
-
|
|
3091
|
-
|
|
3092
|
-
|
|
3093
|
-
|
|
3094
|
-
continue;
|
|
3095
|
-
}
|
|
3096
|
-
imageIndex++;
|
|
3097
|
-
const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
|
|
3098
|
-
const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
|
|
3099
|
-
images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
|
|
3100
|
-
block.text = filename;
|
|
3101
|
-
block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
|
|
3102
|
-
}
|
|
3103
|
-
return images;
|
|
3104
|
-
}
|
|
3105
|
-
function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
|
|
3106
|
-
const binDataMap = /* @__PURE__ */ new Map();
|
|
3107
|
-
const binRe = /^BIN(\d{4})/i;
|
|
3108
|
-
for (const e of lcfb.entries()) {
|
|
3109
|
-
const match = e.name.match(binRe);
|
|
3110
|
-
if (!match) continue;
|
|
3111
|
-
const idx = parseInt(match[1], 10);
|
|
3112
|
-
let raw = lcfb.findStream(e.name);
|
|
3113
|
-
if (!raw) continue;
|
|
3114
|
-
if (compressed) {
|
|
3115
|
-
try {
|
|
3116
|
-
raw = decompressStream(raw);
|
|
3117
|
-
} catch {
|
|
3957
|
+
if (rec.tagId === TAG_PARA_TEXT && rec.level === baseLevel + 1) {
|
|
3958
|
+
textRecords.push(rec.data);
|
|
3959
|
+
} else if (rec.tagId === TAG_CHAR_SHAPE && rec.level === baseLevel + 1 && rec.data.length >= 8) {
|
|
3960
|
+
for (let offset = 0; offset + 7 < rec.data.length; offset += 8) {
|
|
3961
|
+
charShapeIds.push(rec.data.readUInt32LE(offset + 4));
|
|
3118
3962
|
}
|
|
3119
3963
|
}
|
|
3120
|
-
|
|
3964
|
+
i++;
|
|
3121
3965
|
}
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
|
|
3127
|
-
|
|
3128
|
-
if (
|
|
3129
|
-
|
|
3130
|
-
if (!bin) {
|
|
3131
|
-
warnings.push({ page: block.pageNumber, message: `BinData ${binId} \uFFFD\uFFFD\uFFFD\uC74C`, code: "SKIPPED_IMAGE" });
|
|
3132
|
-
block.type = "paragraph";
|
|
3133
|
-
block.text = `[\uC774\uBBF8\uC9C0: BinData ${binId}]`;
|
|
3134
|
-
continue;
|
|
3966
|
+
for (const ctrl of ctrls) {
|
|
3967
|
+
applyCtrlEffect(ctrl, records, ctx);
|
|
3968
|
+
}
|
|
3969
|
+
const state = createParaTextState();
|
|
3970
|
+
const resolver = (idx, id) => {
|
|
3971
|
+
let ctrl = idx >= 0 && idx < ctrls.length ? ctrls[idx] : void 0;
|
|
3972
|
+
if (!ctrl || ctrl.idRaw !== id && ctrl.id !== id) {
|
|
3973
|
+
ctrl = ctrls.find((c) => !c.resolved && (c.idRaw === id || c.id === id));
|
|
3135
3974
|
}
|
|
3136
|
-
|
|
3137
|
-
|
|
3138
|
-
|
|
3139
|
-
|
|
3140
|
-
|
|
3141
|
-
|
|
3975
|
+
if (!ctrl) return null;
|
|
3976
|
+
ctrl.resolved = true;
|
|
3977
|
+
return ctrl.inlineText ?? null;
|
|
3978
|
+
};
|
|
3979
|
+
for (const data of textRecords) {
|
|
3980
|
+
appendParaText(state, data, resolver);
|
|
3981
|
+
}
|
|
3982
|
+
let text = state.text;
|
|
3983
|
+
if (state.fieldRanges.length > 0) {
|
|
3984
|
+
const ranges = [...state.fieldRanges].sort((a, b) => b.start - a.start);
|
|
3985
|
+
const applied = [];
|
|
3986
|
+
for (const r of ranges) {
|
|
3987
|
+
const ctrl = ctrls[r.ctrlIdx];
|
|
3988
|
+
if (!ctrl?.href || r.end <= r.start) continue;
|
|
3989
|
+
if (applied.some(([s, e]) => r.start < e && r.end > s)) continue;
|
|
3990
|
+
const href = sanitizeHref(ctrl.href);
|
|
3991
|
+
if (!href) continue;
|
|
3992
|
+
const anchor = text.slice(r.start, r.end);
|
|
3993
|
+
if (!anchor.trim()) continue;
|
|
3994
|
+
text = text.slice(0, r.start) + `[${anchor}](${href})` + text.slice(r.end);
|
|
3995
|
+
applied.push([r.start, r.end]);
|
|
3142
3996
|
}
|
|
3143
|
-
imageIndex++;
|
|
3144
|
-
const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
|
|
3145
|
-
const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
|
|
3146
|
-
images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
|
|
3147
|
-
block.text = filename;
|
|
3148
|
-
block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
|
|
3149
3997
|
}
|
|
3150
|
-
|
|
3151
|
-
|
|
3152
|
-
|
|
3153
|
-
const
|
|
3154
|
-
|
|
3155
|
-
|
|
3156
|
-
|
|
3157
|
-
if (rec.tagId === TAG_PARA_HEADER && rec.level === 0) {
|
|
3158
|
-
const { paragraph, tables, nextIdx, charShapeIds, paraShapeId } = parseParagraphWithTables(records, i, counter);
|
|
3159
|
-
if (paragraph) {
|
|
3160
|
-
const block = { type: "paragraph", text: paragraph, pageNumber: sectionNum };
|
|
3161
|
-
if (docInfo && charShapeIds.length > 0) {
|
|
3162
|
-
const style = resolveCharStyle(charShapeIds, docInfo);
|
|
3163
|
-
if (style) block.style = style;
|
|
3164
|
-
}
|
|
3165
|
-
if (docInfo && paraShapeId >= 0 && paraShapeId < docInfo.paraShapes.length) {
|
|
3166
|
-
const ol = docInfo.paraShapes[paraShapeId].outlineLevel;
|
|
3167
|
-
if (ol >= 1 && ol <= 6) {
|
|
3168
|
-
block.type = "heading";
|
|
3169
|
-
block.level = ol;
|
|
3170
|
-
}
|
|
3171
|
-
}
|
|
3172
|
-
blocks.push(block);
|
|
3173
|
-
}
|
|
3174
|
-
for (const t of tables) blocks.push({ type: "table", table: t, pageNumber: sectionNum });
|
|
3175
|
-
i = nextIdx;
|
|
3176
|
-
continue;
|
|
3998
|
+
const trimmed = text.replace(/\$\$/g, "$ $").trim();
|
|
3999
|
+
let headingLevel = 0;
|
|
4000
|
+
let headMarker = null;
|
|
4001
|
+
const ps = ctx.docInfo && paraShapeId >= 0 && paraShapeId < ctx.docInfo.paraShapes.length ? ctx.docInfo.paraShapes[paraShapeId] : null;
|
|
4002
|
+
if (ps && ps.headType > 0) {
|
|
4003
|
+
if (ps.headType === 1) {
|
|
4004
|
+
headingLevel = Math.min(ps.paraLevel + 1, 6);
|
|
3177
4005
|
}
|
|
3178
|
-
if (
|
|
3179
|
-
const
|
|
3180
|
-
|
|
3181
|
-
|
|
3182
|
-
|
|
3183
|
-
|
|
3184
|
-
|
|
3185
|
-
|
|
3186
|
-
|
|
3187
|
-
const binId = extractBinDataId(records, i);
|
|
3188
|
-
if (binId >= 0) {
|
|
3189
|
-
blocks.push({ type: "image", text: String(binId), pageNumber: sectionNum });
|
|
3190
|
-
} else {
|
|
3191
|
-
const boxText = extractTextBoxText(records, i);
|
|
3192
|
-
if (boxText) {
|
|
3193
|
-
blocks.push({ type: "paragraph", text: boxText, pageNumber: sectionNum });
|
|
3194
|
-
}
|
|
3195
|
-
}
|
|
3196
|
-
} else if (ctrlId === " elo" || ctrlId === "ole ") {
|
|
3197
|
-
warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
|
|
3198
|
-
} else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
|
|
3199
|
-
const noteText = extractNoteText(records, i);
|
|
3200
|
-
if (noteText && blocks.length > 0) {
|
|
3201
|
-
const lastBlock = blocks[blocks.length - 1];
|
|
3202
|
-
if (lastBlock.type === "paragraph") {
|
|
3203
|
-
lastBlock.footnoteText = lastBlock.footnoteText ? lastBlock.footnoteText + "; " + noteText : noteText;
|
|
3204
|
-
}
|
|
3205
|
-
}
|
|
3206
|
-
} else if (ctrlId === "%tok" || ctrlId === "klnk") {
|
|
3207
|
-
const url = extractHyperlinkUrl(rec.data);
|
|
3208
|
-
if (url && blocks.length > 0) {
|
|
3209
|
-
const lastBlock = blocks[blocks.length - 1];
|
|
3210
|
-
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
3211
|
-
lastBlock.href = sanitizeHref(url) ?? void 0;
|
|
3212
|
-
}
|
|
4006
|
+
if (ps.headType === 1 || ps.headType === 2) {
|
|
4007
|
+
const nid = ps.numberingId || (ps.headType === 1 ? ctx.doc.outlineNumberingId : 0);
|
|
4008
|
+
const numbering = nid >= 1 ? ctx.docInfo?.numberings[nid - 1] : void 0;
|
|
4009
|
+
if (numbering) {
|
|
4010
|
+
const counters = ctx.doc.numbering.advance(nid, ps.paraLevel);
|
|
4011
|
+
const fmt = numbering.levelFormats[Math.min(ps.paraLevel, 6)];
|
|
4012
|
+
if (fmt) {
|
|
4013
|
+
const headText = expandNumberingFormat(fmt, counters, numbering);
|
|
4014
|
+
if (headText) headMarker = headText;
|
|
3213
4015
|
}
|
|
3214
4016
|
}
|
|
4017
|
+
} else if (ps.headType === 3) {
|
|
4018
|
+
const bullet = ps.numberingId >= 1 ? ctx.docInfo?.bullets[ps.numberingId - 1] : void 0;
|
|
4019
|
+
if (bullet && bullet.char !== "\uFFFF") headMarker = bullet.char;
|
|
3215
4020
|
}
|
|
3216
|
-
|
|
4021
|
+
}
|
|
4022
|
+
const blocks = [];
|
|
4023
|
+
const footnotes = ctrls.filter((c) => c.footnote).map((c) => c.footnote);
|
|
4024
|
+
if (trimmed) {
|
|
4025
|
+
const block = {
|
|
4026
|
+
type: headingLevel > 0 ? "heading" : "paragraph",
|
|
4027
|
+
text: headMarker ? `${headMarker} ${trimmed}` : trimmed,
|
|
4028
|
+
pageNumber: ctx.sectionNum
|
|
4029
|
+
};
|
|
4030
|
+
if (headingLevel > 0) block.level = headingLevel;
|
|
4031
|
+
if (ctx.docInfo && charShapeIds.length > 0) {
|
|
4032
|
+
const style = resolveCharStyle(charShapeIds, ctx.docInfo);
|
|
4033
|
+
if (style) block.style = style;
|
|
4034
|
+
}
|
|
4035
|
+
if (footnotes.length > 0) block.footnoteText = footnotes.join("; ");
|
|
4036
|
+
blocks.push(block);
|
|
4037
|
+
} else if (footnotes.length > 0) {
|
|
4038
|
+
blocks.push({ type: "paragraph", text: `(\uC8FC: ${footnotes.join("; ")})`, pageNumber: ctx.sectionNum });
|
|
4039
|
+
}
|
|
4040
|
+
for (const ctrl of ctrls) {
|
|
4041
|
+
if (ctrl.afterBlocks) blocks.push(...ctrl.afterBlocks);
|
|
3217
4042
|
}
|
|
3218
4043
|
return blocks;
|
|
3219
4044
|
}
|
|
3220
|
-
function
|
|
3221
|
-
|
|
3222
|
-
|
|
3223
|
-
|
|
3224
|
-
|
|
3225
|
-
|
|
3226
|
-
|
|
3227
|
-
|
|
3228
|
-
|
|
3229
|
-
|
|
3230
|
-
|
|
3231
|
-
|
|
3232
|
-
|
|
3233
|
-
|
|
3234
|
-
|
|
3235
|
-
|
|
3236
|
-
}
|
|
3237
|
-
|
|
3238
|
-
|
|
3239
|
-
|
|
3240
|
-
|
|
3241
|
-
|
|
3242
|
-
|
|
3243
|
-
|
|
3244
|
-
|
|
4045
|
+
function applyCtrlEffect(ctrl, records, ctx) {
|
|
4046
|
+
switch (ctrl.id) {
|
|
4047
|
+
case CTRL_TBL: {
|
|
4048
|
+
const table = parseTableControl(ctrl, records, ctx);
|
|
4049
|
+
if (table) ctrl.afterBlocks = [{ type: "table", table, pageNumber: ctx.sectionNum }];
|
|
4050
|
+
return;
|
|
4051
|
+
}
|
|
4052
|
+
case CTRL_GSO: {
|
|
4053
|
+
const blocks = parseGsoControl(ctrl, records, ctx);
|
|
4054
|
+
if (blocks.length > 0) ctrl.afterBlocks = blocks;
|
|
4055
|
+
return;
|
|
4056
|
+
}
|
|
4057
|
+
case CTRL_EQED: {
|
|
4058
|
+
const eq = extractEquationFromSlice(records, ctrl.childStart, ctrl.childEnd);
|
|
4059
|
+
if (eq) ctrl.inlineText = eq;
|
|
4060
|
+
return;
|
|
4061
|
+
}
|
|
4062
|
+
case CTRL_FN:
|
|
4063
|
+
case CTRL_EN: {
|
|
4064
|
+
applyNoteEffect(ctrl, records, ctx, ctrl.id === CTRL_FN ? 1 : 2);
|
|
4065
|
+
return;
|
|
4066
|
+
}
|
|
4067
|
+
case CTRL_HEAD:
|
|
4068
|
+
case CTRL_FOOT: {
|
|
4069
|
+
applyHeaderFooterEffect(ctrl, records, ctx, ctrl.id === CTRL_HEAD);
|
|
4070
|
+
return;
|
|
4071
|
+
}
|
|
4072
|
+
case CTRL_ATNO: {
|
|
4073
|
+
if (ctrl.data.length >= 8) {
|
|
4074
|
+
const attr = ctrl.data.readUInt32LE(4);
|
|
4075
|
+
const type = attr & 15;
|
|
4076
|
+
const format = attr >>> 4 & 255;
|
|
4077
|
+
const num = ctx.doc.autoCounters.get(type) ?? 1;
|
|
4078
|
+
ctx.doc.autoCounters.set(type, num + 1);
|
|
4079
|
+
const prefix = ctrl.data.length >= 14 ? wcharAt(ctrl.data, 12) : "";
|
|
4080
|
+
const suffix = ctrl.data.length >= 16 ? wcharAt(ctrl.data, 14) : "";
|
|
4081
|
+
ctrl.inlineText = `${prefix}${formatNumber(num, shapeFormatToNumFmt(format))}${suffix}`;
|
|
3245
4082
|
}
|
|
4083
|
+
return;
|
|
3246
4084
|
}
|
|
3247
|
-
|
|
3248
|
-
|
|
3249
|
-
|
|
3250
|
-
|
|
3251
|
-
|
|
3252
|
-
|
|
3253
|
-
const texts = [];
|
|
3254
|
-
let textRecords = [];
|
|
3255
|
-
let equations = [];
|
|
3256
|
-
const flushText = () => {
|
|
3257
|
-
const text = renderTextWithEquations(textRecords, equations).trim();
|
|
3258
|
-
if (text) texts.push(text);
|
|
3259
|
-
textRecords = [];
|
|
3260
|
-
equations = [];
|
|
3261
|
-
};
|
|
3262
|
-
for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 200; j++) {
|
|
3263
|
-
const r = records[j];
|
|
3264
|
-
if (r.level <= ctrlLevel) break;
|
|
3265
|
-
if (r.tagId === TAG_PARA_HEADER) {
|
|
3266
|
-
flushText();
|
|
3267
|
-
}
|
|
3268
|
-
if (r.tagId === TAG_PARA_TEXT) {
|
|
3269
|
-
textRecords.push(r.data);
|
|
3270
|
-
}
|
|
3271
|
-
if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
|
|
3272
|
-
const ctrlId = r.data.subarray(0, 4).toString("ascii");
|
|
3273
|
-
if (isEquationControlId(ctrlId)) {
|
|
3274
|
-
const equation = extractEquationFromControl(records, j);
|
|
3275
|
-
if (equation) equations.push(equation);
|
|
4085
|
+
case CTRL_NWNO: {
|
|
4086
|
+
if (ctrl.data.length >= 10) {
|
|
4087
|
+
const attr = ctrl.data.readUInt32LE(4);
|
|
4088
|
+
const type = attr & 15;
|
|
4089
|
+
const num = ctrl.data.readUInt16LE(8);
|
|
4090
|
+
if (num > 0) ctx.doc.autoCounters.set(type, num);
|
|
3276
4091
|
}
|
|
4092
|
+
return;
|
|
3277
4093
|
}
|
|
3278
|
-
|
|
3279
|
-
|
|
3280
|
-
|
|
3281
|
-
}
|
|
3282
|
-
function extractHyperlinkUrl(data) {
|
|
3283
|
-
try {
|
|
3284
|
-
const httpSig = Buffer.from("http", "utf16le");
|
|
3285
|
-
const idx = data.indexOf(httpSig);
|
|
3286
|
-
if (idx >= 0) {
|
|
3287
|
-
let end = idx;
|
|
3288
|
-
while (end + 1 < data.length) {
|
|
3289
|
-
const ch = data.readUInt16LE(end);
|
|
3290
|
-
if (ch === 0) break;
|
|
3291
|
-
end += 2;
|
|
4094
|
+
case CTRL_SECD: {
|
|
4095
|
+
if (ctrl.data.length >= 20) {
|
|
4096
|
+
ctx.doc.outlineNumberingId = ctrl.data.readUInt16LE(18);
|
|
3292
4097
|
}
|
|
3293
|
-
|
|
3294
|
-
|
|
3295
|
-
|
|
4098
|
+
return;
|
|
4099
|
+
}
|
|
4100
|
+
case CTRL_OLE: {
|
|
4101
|
+
ctx.warnings.push({ page: ctx.sectionNum, message: "\uC2A4\uD0B5\uB41C OLE \uAC1C\uCCB4", code: "SKIPPED_OLE" });
|
|
4102
|
+
return;
|
|
4103
|
+
}
|
|
4104
|
+
// 숨은 설명/단 정의/쪽번호 위치/감추기/찾아보기/책갈피/글자겹침/덧말 — 본문 텍스트 없음 또는 의도적 스킵
|
|
4105
|
+
case CTRL_TCMT:
|
|
4106
|
+
case CTRL_COLD:
|
|
4107
|
+
case CTRL_PGNP:
|
|
4108
|
+
case CTRL_PGHD:
|
|
4109
|
+
case CTRL_IDXM:
|
|
4110
|
+
case CTRL_BOKM:
|
|
4111
|
+
case CTRL_TCPS:
|
|
4112
|
+
case CTRL_TDUT:
|
|
4113
|
+
case CTRL_FORM:
|
|
4114
|
+
return;
|
|
4115
|
+
default: {
|
|
4116
|
+
if (isFieldCtrlId(ctrl.id)) {
|
|
4117
|
+
applyFieldEffect(ctrl);
|
|
4118
|
+
return;
|
|
3296
4119
|
}
|
|
4120
|
+
const blocks = parseListHeaderParagraphs(ctrl, records, ctx);
|
|
4121
|
+
if (blocks.length > 0) ctrl.afterBlocks = blocks;
|
|
3297
4122
|
}
|
|
3298
|
-
} catch {
|
|
3299
4123
|
}
|
|
3300
|
-
return null;
|
|
3301
4124
|
}
|
|
3302
|
-
function
|
|
3303
|
-
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
3307
|
-
|
|
3308
|
-
|
|
3309
|
-
if (
|
|
3310
|
-
|
|
3311
|
-
dominantId = id;
|
|
4125
|
+
function wcharAt(data, offset) {
|
|
4126
|
+
const code = data.readUInt16LE(offset);
|
|
4127
|
+
return code > 0 ? String.fromCharCode(code) : "";
|
|
4128
|
+
}
|
|
4129
|
+
function parseListHeaderParagraphs(ctrl, records, ctx) {
|
|
4130
|
+
if (ctx.depth >= MAX_NEST_DEPTH) return [];
|
|
4131
|
+
for (let i = ctrl.childStart; i < ctrl.childEnd; i++) {
|
|
4132
|
+
if (records[i].tagId === TAG_LIST_HEADER) {
|
|
4133
|
+
return parseParagraphList(records, i + 1, ctrl.childEnd, { ...ctx, depth: ctx.depth + 1 });
|
|
3312
4134
|
}
|
|
3313
4135
|
}
|
|
3314
|
-
|
|
3315
|
-
if (!cs) return void 0;
|
|
3316
|
-
const style = {};
|
|
3317
|
-
if (cs.fontSize > 0) style.fontSize = cs.fontSize / 10;
|
|
3318
|
-
if (cs.attrFlags & 1) style.italic = true;
|
|
3319
|
-
if (cs.attrFlags & 2) style.bold = true;
|
|
3320
|
-
return style.fontSize || style.bold || style.italic ? style : void 0;
|
|
4136
|
+
return [];
|
|
3321
4137
|
}
|
|
3322
|
-
function
|
|
3323
|
-
const
|
|
3324
|
-
const
|
|
3325
|
-
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
while (i < records.length) {
|
|
3332
|
-
const rec = records[i];
|
|
3333
|
-
if (rec.tagId === TAG_PARA_HEADER && rec.level <= startLevel) break;
|
|
3334
|
-
if (rec.tagId === TAG_PARA_TEXT) {
|
|
3335
|
-
textRecords.push(rec.data);
|
|
4138
|
+
function blocksPlainText(blocks, sep) {
|
|
4139
|
+
const parts = [];
|
|
4140
|
+
for (const b of blocks) {
|
|
4141
|
+
if (b.type === "image") continue;
|
|
4142
|
+
if (b.type === "table") continue;
|
|
4143
|
+
if (b.text) {
|
|
4144
|
+
let t = b.text;
|
|
4145
|
+
if (b.footnoteText) t += ` (\uC8FC: ${b.footnoteText})`;
|
|
4146
|
+
parts.push(t);
|
|
3336
4147
|
}
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
|
|
3340
|
-
|
|
4148
|
+
}
|
|
4149
|
+
return parts.join(sep).trim();
|
|
4150
|
+
}
|
|
4151
|
+
function applyNoteEffect(ctrl, records, ctx, autoType) {
|
|
4152
|
+
const num = ctx.doc.autoCounters.get(autoType) ?? 1;
|
|
4153
|
+
let before = "";
|
|
4154
|
+
let after = "";
|
|
4155
|
+
let shape = 0;
|
|
4156
|
+
if (ctrl.data.length >= 12) {
|
|
4157
|
+
before = wcharAt(ctrl.data, 8);
|
|
4158
|
+
after = wcharAt(ctrl.data, 10);
|
|
4159
|
+
}
|
|
4160
|
+
if (ctrl.data.length >= 16) {
|
|
4161
|
+
shape = ctrl.data.readUInt32LE(12) & 255;
|
|
4162
|
+
}
|
|
4163
|
+
const formatted = formatNumber(num, shapeFormatToNumFmt(shape));
|
|
4164
|
+
const marker = before || after ? `${before}${formatted}${after}` : `${formatted})`;
|
|
4165
|
+
const content = blocksPlainText(parseListHeaderParagraphs(ctrl, records, ctx), " ");
|
|
4166
|
+
if ((ctx.doc.autoCounters.get(autoType) ?? 1) <= num) {
|
|
4167
|
+
ctx.doc.autoCounters.set(autoType, num + 1);
|
|
4168
|
+
}
|
|
4169
|
+
ctrl.inlineText = marker;
|
|
4170
|
+
if (content) ctrl.footnote = content.startsWith(marker) ? content : `${marker} ${content}`;
|
|
4171
|
+
}
|
|
4172
|
+
function applyHeaderFooterEffect(ctrl, records, ctx, isHeader) {
|
|
4173
|
+
const text = blocksPlainText(parseListHeaderParagraphs(ctrl, records, ctx), "\n");
|
|
4174
|
+
if (!text) return;
|
|
4175
|
+
const key = (isHeader ? "h:" : "f:") + text;
|
|
4176
|
+
if (ctx.doc.headerTexts.has(key)) return;
|
|
4177
|
+
ctx.doc.headerTexts.add(key);
|
|
4178
|
+
const block = { type: "paragraph", text, pageNumber: ctx.sectionNum };
|
|
4179
|
+
if (isHeader) ctx.doc.headerBlocks.push(block);
|
|
4180
|
+
else ctx.doc.footerBlocks.push(block);
|
|
4181
|
+
}
|
|
4182
|
+
function applyFieldEffect(ctrl) {
|
|
4183
|
+
if (ctrl.id === FIELD_HLK) {
|
|
4184
|
+
const command = parseFieldCommand(ctrl.data);
|
|
4185
|
+
if (command) {
|
|
4186
|
+
const url = hyperlinkUrlFromCommand(command);
|
|
4187
|
+
if (url) ctrl.href = url;
|
|
3341
4188
|
}
|
|
3342
|
-
|
|
3343
|
-
|
|
3344
|
-
|
|
3345
|
-
|
|
3346
|
-
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3350
|
-
|
|
3351
|
-
|
|
3352
|
-
|
|
4189
|
+
}
|
|
4190
|
+
}
|
|
4191
|
+
function parseFieldCommand(data) {
|
|
4192
|
+
if (data.length < 11) return null;
|
|
4193
|
+
const cmdLen = data.readUInt16LE(9);
|
|
4194
|
+
if (cmdLen === 0) return null;
|
|
4195
|
+
const start = 11;
|
|
4196
|
+
const end = start + cmdLen * 2;
|
|
4197
|
+
if (end > data.length) return null;
|
|
4198
|
+
return data.subarray(start, end).toString("utf16le").replace(/\0+$/, "");
|
|
4199
|
+
}
|
|
4200
|
+
function hyperlinkUrlFromCommand(command) {
|
|
4201
|
+
let url = "";
|
|
4202
|
+
for (let i = 0; i < command.length; i++) {
|
|
4203
|
+
const c = command[i];
|
|
4204
|
+
if (c === "\\" && i + 1 < command.length) {
|
|
4205
|
+
url += command[i + 1];
|
|
4206
|
+
i++;
|
|
4207
|
+
continue;
|
|
3353
4208
|
}
|
|
3354
|
-
|
|
4209
|
+
if (c === ";") break;
|
|
4210
|
+
url += c;
|
|
3355
4211
|
}
|
|
3356
|
-
|
|
3357
|
-
|
|
3358
|
-
return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds, paraShapeId };
|
|
4212
|
+
url = url.trim();
|
|
4213
|
+
return url.length > 0 && url.length < 2e3 ? url : null;
|
|
3359
4214
|
}
|
|
3360
|
-
function
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
let rows = 0
|
|
4215
|
+
function parseTableControl(ctrl, records, ctx) {
|
|
4216
|
+
if (ctx.depth >= MAX_NEST_DEPTH) return null;
|
|
4217
|
+
const { childStart, childEnd } = ctrl;
|
|
4218
|
+
let rows = 0;
|
|
4219
|
+
let cols = 0;
|
|
4220
|
+
let tableIdx = -1;
|
|
4221
|
+
for (let i2 = childStart; i2 < childEnd; i2++) {
|
|
4222
|
+
if (records[i2].tagId === TAG_TABLE && records[i2].data.length >= 8) {
|
|
4223
|
+
rows = Math.min(records[i2].data.readUInt16LE(4), MAX_ROWS);
|
|
4224
|
+
cols = Math.min(records[i2].data.readUInt16LE(6), MAX_COLS);
|
|
4225
|
+
tableIdx = i2;
|
|
4226
|
+
break;
|
|
4227
|
+
}
|
|
4228
|
+
}
|
|
4229
|
+
if (tableIdx < 0 || rows === 0 || cols === 0) return null;
|
|
4230
|
+
let caption;
|
|
4231
|
+
for (let i2 = childStart; i2 < tableIdx; i2++) {
|
|
4232
|
+
if (records[i2].tagId === TAG_LIST_HEADER) {
|
|
4233
|
+
const capBlocks = parseParagraphList(records, i2 + 1, tableIdx, { ...ctx, depth: ctx.depth + 1 });
|
|
4234
|
+
const capText = blocksPlainText(capBlocks, " ");
|
|
4235
|
+
if (capText) caption = capText;
|
|
4236
|
+
break;
|
|
4237
|
+
}
|
|
4238
|
+
}
|
|
3364
4239
|
const cells = [];
|
|
3365
|
-
|
|
4240
|
+
let i = tableIdx + 1;
|
|
4241
|
+
while (i < childEnd) {
|
|
3366
4242
|
const rec = records[i];
|
|
3367
|
-
if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
|
|
3368
|
-
if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
|
|
3369
|
-
if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
|
|
3370
|
-
rows = Math.min(rec.data.readUInt16LE(4), MAX_ROWS);
|
|
3371
|
-
cols = Math.min(rec.data.readUInt16LE(6), MAX_COLS);
|
|
3372
|
-
}
|
|
3373
4243
|
if (rec.tagId === TAG_LIST_HEADER) {
|
|
3374
|
-
const
|
|
3375
|
-
|
|
3376
|
-
|
|
4244
|
+
const cellLevel = rec.level;
|
|
4245
|
+
let j = i + 1;
|
|
4246
|
+
while (j < childEnd) {
|
|
4247
|
+
const r = records[j];
|
|
4248
|
+
if (r.level < cellLevel) break;
|
|
4249
|
+
if (r.level === cellLevel && (r.tagId === TAG_LIST_HEADER || r.tagId === TAG_TABLE)) break;
|
|
4250
|
+
j++;
|
|
4251
|
+
}
|
|
4252
|
+
cells.push(parseCell(records, i, j, ctx));
|
|
4253
|
+
i = j;
|
|
3377
4254
|
continue;
|
|
3378
4255
|
}
|
|
3379
4256
|
i++;
|
|
3380
4257
|
}
|
|
3381
|
-
if (
|
|
4258
|
+
if (cells.length === 0) return null;
|
|
3382
4259
|
const hasAddr = cells.some((c) => c.colAddr !== void 0 && c.rowAddr !== void 0);
|
|
3383
4260
|
if (hasAddr) {
|
|
3384
4261
|
const cellRows2 = arrangeCells(rows, cols, cells);
|
|
3385
|
-
const irCells = cellRows2.map((row) => row.map((c) =>
|
|
3386
|
-
text: c.text.trim(),
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
4262
|
+
const irCells = cellRows2.map((row) => row.map((c) => {
|
|
4263
|
+
const ir = { text: c.text.trim(), colSpan: c.colSpan, rowSpan: c.rowSpan };
|
|
4264
|
+
if (c.blocks?.length) ir.blocks = c.blocks;
|
|
4265
|
+
if (c.isHeader) ir.isHeader = true;
|
|
4266
|
+
return ir;
|
|
4267
|
+
}));
|
|
4268
|
+
const table2 = { rows, cols, cells: irCells, hasHeader: rows > 1 };
|
|
4269
|
+
if (caption) table2.caption = caption;
|
|
4270
|
+
return table2;
|
|
3391
4271
|
}
|
|
3392
4272
|
const cellRows = arrangeCells(rows, cols, cells);
|
|
3393
|
-
|
|
3394
|
-
|
|
3395
|
-
|
|
3396
|
-
|
|
3397
|
-
|
|
3398
|
-
const
|
|
3399
|
-
let textRecords = [];
|
|
3400
|
-
let equations = [];
|
|
3401
|
-
const flushText = () => {
|
|
3402
|
-
const text = renderTextWithEquations(textRecords, equations).trim();
|
|
3403
|
-
if (text) texts.push(text);
|
|
3404
|
-
textRecords = [];
|
|
3405
|
-
equations = [];
|
|
3406
|
-
};
|
|
4273
|
+
const table = buildTable(cellRows);
|
|
4274
|
+
if (caption && table.rows > 0) table.caption = caption;
|
|
4275
|
+
return table.rows > 0 ? table : null;
|
|
4276
|
+
}
|
|
4277
|
+
function parseCell(records, lhIdx, end, ctx) {
|
|
4278
|
+
const rec = records[lhIdx];
|
|
3407
4279
|
let colSpan = 1;
|
|
3408
4280
|
let rowSpan = 1;
|
|
3409
4281
|
let colAddr;
|
|
3410
4282
|
let rowAddr;
|
|
4283
|
+
let isHeader = false;
|
|
3411
4284
|
if (rec.data.length >= 16) {
|
|
4285
|
+
isHeader = (rec.data.readUInt16LE(6) & 4) !== 0;
|
|
3412
4286
|
colAddr = rec.data.readUInt16LE(8);
|
|
3413
4287
|
rowAddr = rec.data.readUInt16LE(10);
|
|
3414
4288
|
const cs = rec.data.readUInt16LE(12);
|
|
@@ -3416,36 +4290,30 @@ function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
|
3416
4290
|
if (cs > 0) colSpan = Math.min(cs, MAX_COLS);
|
|
3417
4291
|
if (rs > 0) rowSpan = Math.min(rs, MAX_ROWS);
|
|
3418
4292
|
}
|
|
3419
|
-
|
|
3420
|
-
|
|
3421
|
-
|
|
3422
|
-
|
|
3423
|
-
if (
|
|
3424
|
-
|
|
3425
|
-
|
|
3426
|
-
}
|
|
3427
|
-
|
|
3428
|
-
|
|
3429
|
-
|
|
3430
|
-
if (
|
|
3431
|
-
|
|
3432
|
-
if (
|
|
3433
|
-
|
|
3434
|
-
|
|
3435
|
-
} else if (ctrlId === " lbt" || ctrlId === "tbl ") {
|
|
3436
|
-
flushText();
|
|
3437
|
-
if (counter) {
|
|
3438
|
-
counter.count++;
|
|
3439
|
-
texts.push(`[\uC911\uCCA9 \uD14C\uC774\uBE14 #${counter.count}]`);
|
|
3440
|
-
} else {
|
|
3441
|
-
texts.push("[\uC911\uCCA9 \uD14C\uC774\uBE14]");
|
|
3442
|
-
}
|
|
4293
|
+
const blocks = ctx.depth < MAX_NEST_DEPTH ? parseParagraphList(records, lhIdx + 1, end, { ...ctx, depth: ctx.depth + 1 }) : [];
|
|
4294
|
+
const parts = [];
|
|
4295
|
+
let hasStructure = false;
|
|
4296
|
+
for (const b of blocks) {
|
|
4297
|
+
if (b.type === "image" && b.text) {
|
|
4298
|
+
parts.push(``);
|
|
4299
|
+
hasStructure = true;
|
|
4300
|
+
} else if (b.type === "table" && b.table) {
|
|
4301
|
+
const flat = convertTableToText(b.table.cells);
|
|
4302
|
+
if (flat) parts.push(flat);
|
|
4303
|
+
hasStructure = true;
|
|
4304
|
+
} else if (b.text) {
|
|
4305
|
+
let t = b.text;
|
|
4306
|
+
if (b.footnoteText) {
|
|
4307
|
+
t += ` (\uC8FC: ${b.footnoteText})`;
|
|
4308
|
+
hasStructure = true;
|
|
3443
4309
|
}
|
|
4310
|
+
parts.push(t);
|
|
3444
4311
|
}
|
|
3445
|
-
i++;
|
|
3446
4312
|
}
|
|
3447
|
-
|
|
3448
|
-
|
|
4313
|
+
const cell = { text: parts.join("\n"), colSpan, rowSpan, colAddr, rowAddr };
|
|
4314
|
+
if (hasStructure && blocks.length > 0) cell.blocks = blocks;
|
|
4315
|
+
if (isHeader) cell.isHeader = true;
|
|
4316
|
+
return cell;
|
|
3449
4317
|
}
|
|
3450
4318
|
function arrangeCells(rows, cols, cells) {
|
|
3451
4319
|
const grid = Array.from({ length: rows }, () => Array(cols).fill(null));
|
|
@@ -3483,6 +4351,78 @@ function arrangeCells(rows, cols, cells) {
|
|
|
3483
4351
|
}
|
|
3484
4352
|
return grid.map((row) => row.map((c) => c || { text: "", colSpan: 1, rowSpan: 1 }));
|
|
3485
4353
|
}
|
|
4354
|
+
function parseGsoControl(ctrl, records, ctx) {
|
|
4355
|
+
if (ctx.depth >= MAX_NEST_DEPTH) return [];
|
|
4356
|
+
const { childStart, childEnd } = ctrl;
|
|
4357
|
+
const blocks = [];
|
|
4358
|
+
let scIdx = -1;
|
|
4359
|
+
for (let i = childStart; i < childEnd; i++) {
|
|
4360
|
+
const t = records[i].tagId;
|
|
4361
|
+
if (t === TAG_SHAPE_COMPONENT || t === TAG_SHAPE_COMPONENT_CONTAINER) {
|
|
4362
|
+
scIdx = i;
|
|
4363
|
+
break;
|
|
4364
|
+
}
|
|
4365
|
+
}
|
|
4366
|
+
if (scIdx > childStart) {
|
|
4367
|
+
for (let i = childStart; i < scIdx; i++) {
|
|
4368
|
+
if (records[i].tagId === TAG_LIST_HEADER) {
|
|
4369
|
+
blocks.push(...parseParagraphList(records, i + 1, scIdx, { ...ctx, depth: ctx.depth + 1 }));
|
|
4370
|
+
break;
|
|
4371
|
+
}
|
|
4372
|
+
}
|
|
4373
|
+
}
|
|
4374
|
+
const scanStart = scIdx >= 0 ? scIdx + 1 : childStart;
|
|
4375
|
+
let textListIdx = -1;
|
|
4376
|
+
for (let i = scanStart; i < childEnd; i++) {
|
|
4377
|
+
if (records[i].tagId === TAG_LIST_HEADER) {
|
|
4378
|
+
textListIdx = i;
|
|
4379
|
+
break;
|
|
4380
|
+
}
|
|
4381
|
+
}
|
|
4382
|
+
const picEnd = textListIdx >= 0 ? textListIdx : childEnd;
|
|
4383
|
+
for (let i = scanStart; i < picEnd; i++) {
|
|
4384
|
+
if (records[i].tagId === TAG_SHAPE_COMPONENT_PICTURE) {
|
|
4385
|
+
const img = pictureToImageBlock(records[i].data, ctx);
|
|
4386
|
+
if (img) blocks.push(img);
|
|
4387
|
+
}
|
|
4388
|
+
}
|
|
4389
|
+
if (textListIdx >= 0) {
|
|
4390
|
+
blocks.push(...parseParagraphList(records, textListIdx + 1, childEnd, { ...ctx, depth: ctx.depth + 1 }));
|
|
4391
|
+
}
|
|
4392
|
+
return blocks;
|
|
4393
|
+
}
|
|
4394
|
+
function pictureToImageBlock(data, ctx) {
|
|
4395
|
+
if (data.length < 73) return null;
|
|
4396
|
+
const binDataId = data.readUInt16LE(71);
|
|
4397
|
+
if (binDataId === 0) return null;
|
|
4398
|
+
const item = ctx.docInfo?.binData[binDataId - 1];
|
|
4399
|
+
if (item?.kind === "link") {
|
|
4400
|
+
ctx.warnings.push({ page: ctx.sectionNum, message: `\uC678\uBD80 \uC5F0\uACB0 \uC774\uBBF8\uC9C0 (binDataId ${binDataId})`, code: "SKIPPED_IMAGE" });
|
|
4401
|
+
return null;
|
|
4402
|
+
}
|
|
4403
|
+
const storageId = item && item.storageId > 0 ? item.storageId : binDataId;
|
|
4404
|
+
return { type: "image", text: String(storageId), pageNumber: ctx.sectionNum };
|
|
4405
|
+
}
|
|
4406
|
+
function resolveCharStyle(charShapeIds, docInfo) {
|
|
4407
|
+
if (charShapeIds.length === 0 || docInfo.charShapes.length === 0) return void 0;
|
|
4408
|
+
const freq = /* @__PURE__ */ new Map();
|
|
4409
|
+
let maxCount = 0, dominantId = charShapeIds[0];
|
|
4410
|
+
for (const id of charShapeIds) {
|
|
4411
|
+
const count = (freq.get(id) || 0) + 1;
|
|
4412
|
+
freq.set(id, count);
|
|
4413
|
+
if (count > maxCount) {
|
|
4414
|
+
maxCount = count;
|
|
4415
|
+
dominantId = id;
|
|
4416
|
+
}
|
|
4417
|
+
}
|
|
4418
|
+
const cs = docInfo.charShapes[dominantId];
|
|
4419
|
+
if (!cs) return void 0;
|
|
4420
|
+
const style = {};
|
|
4421
|
+
if (cs.fontSize > 0) style.fontSize = cs.fontSize / 10;
|
|
4422
|
+
if (cs.attrFlags & 1) style.italic = true;
|
|
4423
|
+
if (cs.attrFlags & 2) style.bold = true;
|
|
4424
|
+
return style.fontSize || style.bold || style.italic ? style : void 0;
|
|
4425
|
+
}
|
|
3486
4426
|
|
|
3487
4427
|
// src/hwp3/parser.ts
|
|
3488
4428
|
import { inflateRawSync as inflateRawSync3 } from "zlib";
|
|
@@ -15571,7 +16511,7 @@ function parseHwp3Document(buffer, _options) {
|
|
|
15571
16511
|
const ctx = { paragraphs: [], warnings };
|
|
15572
16512
|
try {
|
|
15573
16513
|
skipFontFacesAndStyles(bodyReader);
|
|
15574
|
-
|
|
16514
|
+
parseParagraphList2(bodyReader, ctx);
|
|
15575
16515
|
} catch (err) {
|
|
15576
16516
|
warnings.push({
|
|
15577
16517
|
code: "PARTIAL_PARSE",
|
|
@@ -15603,7 +16543,7 @@ function skipFontFacesAndStyles(reader) {
|
|
|
15603
16543
|
const nStyles = reader.readU16();
|
|
15604
16544
|
reader.skip(nStyles * STYLE_RECORD_SIZE);
|
|
15605
16545
|
}
|
|
15606
|
-
function
|
|
16546
|
+
function parseParagraphList2(reader, ctx) {
|
|
15607
16547
|
for (; ; ) {
|
|
15608
16548
|
if (reader.eof()) return;
|
|
15609
16549
|
const followPrev = reader.readU8();
|
|
@@ -15688,17 +16628,17 @@ function parseCharStream(reader, charCount, ctx) {
|
|
|
15688
16628
|
break;
|
|
15689
16629
|
case 15: {
|
|
15690
16630
|
reader.skip(8);
|
|
15691
|
-
|
|
16631
|
+
parseParagraphList2(reader, ctx);
|
|
15692
16632
|
break;
|
|
15693
16633
|
}
|
|
15694
16634
|
case 16: {
|
|
15695
16635
|
reader.skip(10);
|
|
15696
|
-
|
|
16636
|
+
parseParagraphList2(reader, ctx);
|
|
15697
16637
|
break;
|
|
15698
16638
|
}
|
|
15699
16639
|
case 17: {
|
|
15700
16640
|
reader.skip(14);
|
|
15701
|
-
|
|
16641
|
+
parseParagraphList2(reader, ctx);
|
|
15702
16642
|
break;
|
|
15703
16643
|
}
|
|
15704
16644
|
case 29:
|
|
@@ -15728,9 +16668,9 @@ function parseTableLike(reader, ctx) {
|
|
|
15728
16668
|
}
|
|
15729
16669
|
reader.skip(27 * cellCount);
|
|
15730
16670
|
for (let i = 0; i < cellCount; i++) {
|
|
15731
|
-
|
|
16671
|
+
parseParagraphList2(reader, ctx);
|
|
15732
16672
|
}
|
|
15733
|
-
|
|
16673
|
+
parseParagraphList2(reader, ctx);
|
|
15734
16674
|
return "";
|
|
15735
16675
|
}
|
|
15736
16676
|
function parsePicture(reader, _ctx) {
|
|
@@ -17194,7 +18134,7 @@ function extractRun(r) {
|
|
|
17194
18134
|
}
|
|
17195
18135
|
return { text, bold, italic };
|
|
17196
18136
|
}
|
|
17197
|
-
function
|
|
18137
|
+
function parseParagraph2(p, styles, numbering, footnotes, rels) {
|
|
17198
18138
|
const pPrEls = getChildElements(p, "pPr");
|
|
17199
18139
|
let styleId = "";
|
|
17200
18140
|
let numId = "";
|
|
@@ -17310,7 +18250,7 @@ function parseTable(tbl, styles, numbering, footnotes, rels) {
|
|
|
17310
18250
|
const cellTexts = [];
|
|
17311
18251
|
const pElements = getChildElements(tc, "p");
|
|
17312
18252
|
for (const p of pElements) {
|
|
17313
|
-
const block =
|
|
18253
|
+
const block = parseParagraph2(p, styles, numbering, footnotes, rels);
|
|
17314
18254
|
if (block?.text) cellTexts.push(block.text);
|
|
17315
18255
|
}
|
|
17316
18256
|
row.push({ text: cellTexts.join("\n"), colSpan, rowSpan });
|
|
@@ -17439,7 +18379,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
17439
18379
|
const el = node;
|
|
17440
18380
|
const localName3 = el.localName ?? el.tagName?.split(":").pop();
|
|
17441
18381
|
if (localName3 === "p") {
|
|
17442
|
-
const block =
|
|
18382
|
+
const block = parseParagraph2(el, styles, numbering, footnotes, rels);
|
|
17443
18383
|
if (block) blocks.push(block);
|
|
17444
18384
|
} else if (localName3 === "tbl") {
|
|
17445
18385
|
const block = parseTable(el, styles, numbering, footnotes, rels);
|
|
@@ -17579,7 +18519,7 @@ function walkContent(node, blocks, paraShapeMap, sectionNum, warnings, inHeaderF
|
|
|
17579
18519
|
}
|
|
17580
18520
|
if (tag === "P") {
|
|
17581
18521
|
if (!inHeaderFooter) {
|
|
17582
|
-
|
|
18522
|
+
parseParagraph3(el, blocks, paraShapeMap, sectionNum);
|
|
17583
18523
|
}
|
|
17584
18524
|
continue;
|
|
17585
18525
|
}
|
|
@@ -17596,7 +18536,7 @@ function walkContent(node, blocks, paraShapeMap, sectionNum, warnings, inHeaderF
|
|
|
17596
18536
|
walkContent(el, blocks, paraShapeMap, sectionNum, warnings, inHeaderFooter, depth + 1);
|
|
17597
18537
|
}
|
|
17598
18538
|
}
|
|
17599
|
-
function
|
|
18539
|
+
function parseParagraph3(el, blocks, paraShapeMap, sectionNum) {
|
|
17600
18540
|
const paraShapeId = el.getAttribute("ParaShape") ?? "";
|
|
17601
18541
|
const shapeInfo = paraShapeMap.get(paraShapeId);
|
|
17602
18542
|
const text = extractParagraphText(el);
|
|
@@ -19041,6 +19981,1490 @@ function diffTableCells(a, b) {
|
|
|
19041
19981
|
return result;
|
|
19042
19982
|
}
|
|
19043
19983
|
|
|
19984
|
+
// src/roundtrip/patcher.ts
|
|
19985
|
+
import JSZip7 from "jszip";
|
|
19986
|
+
|
|
19987
|
+
// src/roundtrip/source-map.ts
|
|
19988
|
+
function escapeXmlText(text) {
|
|
19989
|
+
return text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">");
|
|
19990
|
+
}
|
|
19991
|
+
function decodeXmlEntities(text) {
|
|
19992
|
+
return text.replace(/&(lt|gt|amp|quot|apos|#x?[0-9a-fA-F]+);/g, (m, ent) => {
|
|
19993
|
+
switch (ent) {
|
|
19994
|
+
case "lt":
|
|
19995
|
+
return "<";
|
|
19996
|
+
case "gt":
|
|
19997
|
+
return ">";
|
|
19998
|
+
case "amp":
|
|
19999
|
+
return "&";
|
|
20000
|
+
case "quot":
|
|
20001
|
+
return '"';
|
|
20002
|
+
case "apos":
|
|
20003
|
+
return "'";
|
|
20004
|
+
}
|
|
20005
|
+
try {
|
|
20006
|
+
const code = ent[1] === "x" || ent[1] === "X" ? parseInt(ent.slice(2), 16) : parseInt(ent.slice(1), 10);
|
|
20007
|
+
if (!isNaN(code) && code >= 0 && code <= 1114111) return String.fromCodePoint(code);
|
|
20008
|
+
} catch {
|
|
20009
|
+
}
|
|
20010
|
+
return m;
|
|
20011
|
+
});
|
|
20012
|
+
}
|
|
20013
|
+
function tContentToText(raw) {
|
|
20014
|
+
return decodeXmlEntities(
|
|
20015
|
+
raw.replace(/<\/?(?:[A-Za-z0-9_]+:)?(?:tab|fwSpace|hwSpace|br|lineBreak)(?:\s[^>]*)?\/?>/g, " ").replace(/<[^>]*>/g, "")
|
|
20016
|
+
);
|
|
20017
|
+
}
|
|
20018
|
+
var TAG_RE = /<!--[\s\S]*?-->|<!\[CDATA\[[\s\S]*?\]\]>|<\?[\s\S]*?\?>|<!(?:"[^"]*"|'[^']*'|[^>"'])*>|<\/([^\s>]+)\s*>|<([^\s/>!?]+)((?:"[^"]*"|'[^']*'|[^>"'])*?)(\/?)>/g;
|
|
20019
|
+
var T_BARRIER = /* @__PURE__ */ new Set([
|
|
20020
|
+
"tbl",
|
|
20021
|
+
"ctrl",
|
|
20022
|
+
"caption",
|
|
20023
|
+
"pic",
|
|
20024
|
+
"shape",
|
|
20025
|
+
"drawingObject",
|
|
20026
|
+
"drawText",
|
|
20027
|
+
"shapeComment",
|
|
20028
|
+
"memogroup",
|
|
20029
|
+
"memo",
|
|
20030
|
+
"hiddenComment",
|
|
20031
|
+
"equation",
|
|
20032
|
+
"parameters",
|
|
20033
|
+
"subList",
|
|
20034
|
+
"p"
|
|
20035
|
+
]);
|
|
20036
|
+
var PARA_CONTAINER = /* @__PURE__ */ new Set([
|
|
20037
|
+
"tc",
|
|
20038
|
+
"ctrl",
|
|
20039
|
+
"caption",
|
|
20040
|
+
"drawText",
|
|
20041
|
+
"pic",
|
|
20042
|
+
"shape",
|
|
20043
|
+
"drawingObject",
|
|
20044
|
+
"memogroup",
|
|
20045
|
+
"memo",
|
|
20046
|
+
"hiddenComment",
|
|
20047
|
+
"footNote",
|
|
20048
|
+
"endNote",
|
|
20049
|
+
"fn",
|
|
20050
|
+
"en"
|
|
20051
|
+
// 각주/미주 — 파서는 호스트 블록 footnoteText로만 흡수
|
|
20052
|
+
]);
|
|
20053
|
+
var TABLE_BARRIER = /* @__PURE__ */ new Set([
|
|
20054
|
+
"tbl",
|
|
20055
|
+
"ctrl",
|
|
20056
|
+
"caption",
|
|
20057
|
+
"memogroup",
|
|
20058
|
+
"memo",
|
|
20059
|
+
"hiddenComment"
|
|
20060
|
+
]);
|
|
20061
|
+
function localOf(qname) {
|
|
20062
|
+
const i = qname.indexOf(":");
|
|
20063
|
+
return i >= 0 ? qname.slice(i + 1) : qname;
|
|
20064
|
+
}
|
|
20065
|
+
function prefixOf(qname) {
|
|
20066
|
+
const i = qname.indexOf(":");
|
|
20067
|
+
return i >= 0 ? qname.slice(0, i) : "";
|
|
20068
|
+
}
|
|
20069
|
+
function scanSectionXml(xml, sectionIndex) {
|
|
20070
|
+
const stack = [];
|
|
20071
|
+
const bodyParagraphs = [];
|
|
20072
|
+
const tables = [];
|
|
20073
|
+
const headerTexts = [];
|
|
20074
|
+
const footerTexts = [];
|
|
20075
|
+
const paraStack = [];
|
|
20076
|
+
const tableStack = [];
|
|
20077
|
+
const rowStack = [];
|
|
20078
|
+
const cellStack = [];
|
|
20079
|
+
let pendingT = null;
|
|
20080
|
+
const ctrlSubStack = [];
|
|
20081
|
+
const classifyPara = () => {
|
|
20082
|
+
let sawDrawText = false;
|
|
20083
|
+
for (let i = stack.length - 1; i >= 0; i--) {
|
|
20084
|
+
const l = stack[i].local;
|
|
20085
|
+
if (l === "tc") return "cell";
|
|
20086
|
+
if (l === "drawText") {
|
|
20087
|
+
sawDrawText = true;
|
|
20088
|
+
continue;
|
|
20089
|
+
}
|
|
20090
|
+
if (PARA_CONTAINER.has(l)) return "excluded";
|
|
20091
|
+
}
|
|
20092
|
+
return sawDrawText ? "draw" : "body";
|
|
20093
|
+
};
|
|
20094
|
+
const owningPara = () => {
|
|
20095
|
+
if (paraStack.length === 0) return null;
|
|
20096
|
+
for (let i = stack.length - 1; i >= 0; i--) {
|
|
20097
|
+
const l = stack[i].local;
|
|
20098
|
+
if (l === "p") return paraStack[paraStack.length - 1];
|
|
20099
|
+
if (T_BARRIER.has(l)) return null;
|
|
20100
|
+
}
|
|
20101
|
+
return null;
|
|
20102
|
+
};
|
|
20103
|
+
const isTableTopLevel = () => {
|
|
20104
|
+
for (let i = stack.length - 1; i >= 0; i--) {
|
|
20105
|
+
if (TABLE_BARRIER.has(stack[i].local)) return false;
|
|
20106
|
+
}
|
|
20107
|
+
return true;
|
|
20108
|
+
};
|
|
20109
|
+
const currentCtrlSub = () => ctrlSubStack.length > 0 ? ctrlSubStack[ctrlSubStack.length - 1] : null;
|
|
20110
|
+
TAG_RE.lastIndex = 0;
|
|
20111
|
+
let m;
|
|
20112
|
+
while ((m = TAG_RE.exec(xml)) !== null) {
|
|
20113
|
+
const [full, closeName, openName, , selfClose] = m;
|
|
20114
|
+
if (closeName === void 0 && openName === void 0) continue;
|
|
20115
|
+
if (closeName !== void 0) {
|
|
20116
|
+
const local2 = localOf(closeName);
|
|
20117
|
+
if (local2 === "t" && pendingT) {
|
|
20118
|
+
const { para, contentStart: contentStart2 } = pendingT;
|
|
20119
|
+
para.tRanges.push({ contentStart: contentStart2, contentEnd: m.index });
|
|
20120
|
+
para.text += tContentToText(xml.slice(contentStart2, m.index));
|
|
20121
|
+
pendingT = null;
|
|
20122
|
+
}
|
|
20123
|
+
for (let i = stack.length - 1; i >= 0; i--) {
|
|
20124
|
+
if (stack[i].local === local2) {
|
|
20125
|
+
stack.length = i;
|
|
20126
|
+
break;
|
|
20127
|
+
}
|
|
20128
|
+
}
|
|
20129
|
+
if (local2 === "p") {
|
|
20130
|
+
const para = paraStack.pop();
|
|
20131
|
+
if (para && para.kind === "excluded") {
|
|
20132
|
+
const sub = currentCtrlSub();
|
|
20133
|
+
if (sub && para.text.trim()) sub.texts.push(para.text);
|
|
20134
|
+
}
|
|
20135
|
+
} else if (local2 === "tc") {
|
|
20136
|
+
const cell = cellStack.pop();
|
|
20137
|
+
const row = rowStack[rowStack.length - 1];
|
|
20138
|
+
if (cell && row) row.push(cell);
|
|
20139
|
+
} else if (local2 === "tr") {
|
|
20140
|
+
const row = rowStack[rowStack.length - 1];
|
|
20141
|
+
const table = tableStack[tableStack.length - 1];
|
|
20142
|
+
if (row && table && row.length > 0) table.rows.push(row);
|
|
20143
|
+
if (rowStack.length > 0) rowStack[rowStack.length - 1] = [];
|
|
20144
|
+
} else if (local2 === "tbl") {
|
|
20145
|
+
const table = tableStack.pop();
|
|
20146
|
+
rowStack.pop();
|
|
20147
|
+
if (table) {
|
|
20148
|
+
finalizeTable(table);
|
|
20149
|
+
if (!table.topLevel) {
|
|
20150
|
+
const cell = cellStack[cellStack.length - 1];
|
|
20151
|
+
if (cell) cell.tables.push(table);
|
|
20152
|
+
}
|
|
20153
|
+
}
|
|
20154
|
+
} else if (local2 === "header" || local2 === "footer") {
|
|
20155
|
+
const sub = ctrlSubStack[ctrlSubStack.length - 1];
|
|
20156
|
+
if (sub) {
|
|
20157
|
+
ctrlSubStack.pop();
|
|
20158
|
+
const joined = sub.texts.join("\n").trim();
|
|
20159
|
+
if (joined) (sub.kind === "header" ? headerTexts : footerTexts).push(joined);
|
|
20160
|
+
}
|
|
20161
|
+
}
|
|
20162
|
+
continue;
|
|
20163
|
+
}
|
|
20164
|
+
const qname = openName;
|
|
20165
|
+
const local = localOf(qname);
|
|
20166
|
+
const attrsRaw = m[3] || "";
|
|
20167
|
+
const isSelfClose = selfClose === "/";
|
|
20168
|
+
const contentStart = m.index + full.length;
|
|
20169
|
+
if (isSelfClose) {
|
|
20170
|
+
if (local === "t") {
|
|
20171
|
+
const para = owningPara();
|
|
20172
|
+
if (para) para.tRanges.push({ contentStart: m.index, contentEnd: m.index + full.length, selfClosing: true, prefix: prefixOf(qname) });
|
|
20173
|
+
} else if (local === "tab" || local === "fwSpace" || local === "hwSpace" || local === "br" || local === "lineBreak") {
|
|
20174
|
+
if (!pendingT) {
|
|
20175
|
+
const para = owningPara();
|
|
20176
|
+
if (para) para.text += " ";
|
|
20177
|
+
}
|
|
20178
|
+
} else if (local === "run" || local === "r") {
|
|
20179
|
+
const para = owningPara();
|
|
20180
|
+
if (para && !para.selfCloseRun) para.selfCloseRun = { start: m.index, end: m.index + full.length };
|
|
20181
|
+
} else if (local === "cellAddr") {
|
|
20182
|
+
const cell = cellStack[cellStack.length - 1];
|
|
20183
|
+
if (cell && insideCurrentTable(stack, tableStack)) {
|
|
20184
|
+
const ca = parseInt(getAttr2(attrsRaw, "colAddr") || "", 10);
|
|
20185
|
+
const ra = parseInt(getAttr2(attrsRaw, "rowAddr") || "", 10);
|
|
20186
|
+
if (!isNaN(ca)) cell.colAddr = ca;
|
|
20187
|
+
if (!isNaN(ra)) cell.rowAddr = ra;
|
|
20188
|
+
}
|
|
20189
|
+
} else if (local === "cellSpan") {
|
|
20190
|
+
const cell = cellStack[cellStack.length - 1];
|
|
20191
|
+
if (cell && insideCurrentTable(stack, tableStack)) {
|
|
20192
|
+
const cs = parseInt(getAttr2(attrsRaw, "colSpan") || "1", 10);
|
|
20193
|
+
const rs = parseInt(getAttr2(attrsRaw, "rowSpan") || "1", 10);
|
|
20194
|
+
cell.colSpan = isNaN(cs) || cs < 1 ? 1 : cs;
|
|
20195
|
+
cell.rowSpan = isNaN(rs) || rs < 1 ? 1 : rs;
|
|
20196
|
+
}
|
|
20197
|
+
}
|
|
20198
|
+
continue;
|
|
20199
|
+
}
|
|
20200
|
+
if (local === "t") {
|
|
20201
|
+
const para = owningPara();
|
|
20202
|
+
if (para) pendingT = { para, contentStart };
|
|
20203
|
+
stack.push({ local, qname, contentStart });
|
|
20204
|
+
continue;
|
|
20205
|
+
}
|
|
20206
|
+
stack.push({ local, qname, contentStart });
|
|
20207
|
+
if (local === "p") {
|
|
20208
|
+
const para = {
|
|
20209
|
+
sectionIndex,
|
|
20210
|
+
kind: "excluded",
|
|
20211
|
+
// 분류는 push 직후 스택 기준 (자기 자신 제외)
|
|
20212
|
+
start: m.index,
|
|
20213
|
+
tRanges: [],
|
|
20214
|
+
text: ""
|
|
20215
|
+
};
|
|
20216
|
+
stack.pop();
|
|
20217
|
+
para.kind = classifyPara();
|
|
20218
|
+
stack.push({ local, qname, contentStart });
|
|
20219
|
+
paraStack.push(para);
|
|
20220
|
+
if (para.kind === "body" || para.kind === "draw") bodyParagraphs.push(para);
|
|
20221
|
+
else if (para.kind === "cell") {
|
|
20222
|
+
const cell = cellStack[cellStack.length - 1];
|
|
20223
|
+
if (cell) cell.paragraphs.push(para);
|
|
20224
|
+
}
|
|
20225
|
+
} else if (local === "run" || local === "r") {
|
|
20226
|
+
const para = owningPara();
|
|
20227
|
+
if (para && para.runPrefix === void 0) para.runPrefix = prefixOf(qname);
|
|
20228
|
+
} else if (local === "tbl") {
|
|
20229
|
+
const table = {
|
|
20230
|
+
sectionIndex,
|
|
20231
|
+
start: m.index,
|
|
20232
|
+
topLevel: false,
|
|
20233
|
+
rows: [],
|
|
20234
|
+
cellByAnchor: /* @__PURE__ */ new Map()
|
|
20235
|
+
};
|
|
20236
|
+
stack.pop();
|
|
20237
|
+
table.topLevel = isTableTopLevel();
|
|
20238
|
+
stack.push({ local, qname, contentStart });
|
|
20239
|
+
tableStack.push(table);
|
|
20240
|
+
rowStack.push([]);
|
|
20241
|
+
if (table.topLevel) tables.push(table);
|
|
20242
|
+
} else if (local === "tr") {
|
|
20243
|
+
if (rowStack.length > 0) rowStack[rowStack.length - 1] = [];
|
|
20244
|
+
} else if (local === "tc") {
|
|
20245
|
+
cellStack.push({ colSpan: 1, rowSpan: 1, paragraphs: [], tables: [] });
|
|
20246
|
+
} else if (local === "cellAddr" || local === "cellSpan") {
|
|
20247
|
+
const cell = cellStack[cellStack.length - 1];
|
|
20248
|
+
if (cell && insideCurrentTable(stack, tableStack)) {
|
|
20249
|
+
if (local === "cellAddr") {
|
|
20250
|
+
const ca = parseInt(getAttr2(attrsRaw, "colAddr") || "", 10);
|
|
20251
|
+
const ra = parseInt(getAttr2(attrsRaw, "rowAddr") || "", 10);
|
|
20252
|
+
if (!isNaN(ca)) cell.colAddr = ca;
|
|
20253
|
+
if (!isNaN(ra)) cell.rowAddr = ra;
|
|
20254
|
+
} else {
|
|
20255
|
+
const cs = parseInt(getAttr2(attrsRaw, "colSpan") || "1", 10);
|
|
20256
|
+
const rs = parseInt(getAttr2(attrsRaw, "rowSpan") || "1", 10);
|
|
20257
|
+
cell.colSpan = isNaN(cs) || cs < 1 ? 1 : cs;
|
|
20258
|
+
cell.rowSpan = isNaN(rs) || rs < 1 ? 1 : rs;
|
|
20259
|
+
}
|
|
20260
|
+
}
|
|
20261
|
+
} else if (local === "header" || local === "footer") {
|
|
20262
|
+
if (stack.some((f) => f.local === "ctrl")) {
|
|
20263
|
+
ctrlSubStack.push({ kind: local, texts: [] });
|
|
20264
|
+
}
|
|
20265
|
+
} else if (local === "tab" || local === "fwSpace" || local === "hwSpace" || local === "br" || local === "lineBreak") {
|
|
20266
|
+
const para = owningPara();
|
|
20267
|
+
if (para) para.text += " ";
|
|
20268
|
+
}
|
|
20269
|
+
}
|
|
20270
|
+
for (const para of bodyParagraphs) fillRunInsertPos(para, xml);
|
|
20271
|
+
const fillTableInsertPos = (table, depth = 0) => {
|
|
20272
|
+
if (depth > 16) return;
|
|
20273
|
+
for (const row of table.rows) {
|
|
20274
|
+
for (const cell of row) {
|
|
20275
|
+
for (const para of cell.paragraphs) fillRunInsertPos(para, xml);
|
|
20276
|
+
for (const nested of cell.tables) fillTableInsertPos(nested, depth + 1);
|
|
20277
|
+
}
|
|
20278
|
+
}
|
|
20279
|
+
};
|
|
20280
|
+
for (const table of tables) fillTableInsertPos(table);
|
|
20281
|
+
return { sectionIndex, xml, bodyParagraphs, tables, headerTexts, footerTexts };
|
|
20282
|
+
}
|
|
20283
|
+
function getAttr2(attrsRaw, name) {
|
|
20284
|
+
const re = new RegExp(`(?:^|\\s)${name}\\s*=\\s*(?:"([^"]*)"|'([^']*)')`);
|
|
20285
|
+
const m = attrsRaw.match(re);
|
|
20286
|
+
return m ? m[1] ?? m[2] : void 0;
|
|
20287
|
+
}
|
|
20288
|
+
function insideCurrentTable(stack, tableStack) {
|
|
20289
|
+
if (tableStack.length === 0) return false;
|
|
20290
|
+
for (let i = stack.length - 1; i >= 0; i--) {
|
|
20291
|
+
const l = stack[i].local;
|
|
20292
|
+
if (l === "tc") return true;
|
|
20293
|
+
if (l === "tbl") return false;
|
|
20294
|
+
}
|
|
20295
|
+
return false;
|
|
20296
|
+
}
|
|
20297
|
+
function fillRunInsertPos(para, xml) {
|
|
20298
|
+
if (para.tRanges.length > 0) return;
|
|
20299
|
+
const pEnd = findElementEnd(xml, para.start);
|
|
20300
|
+
if (pEnd < 0) return;
|
|
20301
|
+
const slice = xml.slice(para.start, pEnd);
|
|
20302
|
+
const runOpen = slice.match(/<((?:[A-Za-z0-9_]+:)?run)(?:\s(?:"[^"]*"|'[^']*'|[^>"'])*?)?(\/?)>/);
|
|
20303
|
+
if (!runOpen || runOpen.index === void 0) return;
|
|
20304
|
+
if (runOpen[2] === "/") return;
|
|
20305
|
+
const qname = runOpen[1];
|
|
20306
|
+
const closeIdx = slice.indexOf(`</${qname}>`, runOpen.index);
|
|
20307
|
+
if (closeIdx < 0) return;
|
|
20308
|
+
para.runInsertPos = para.start + closeIdx;
|
|
20309
|
+
para.runPrefix = prefixOf(qname);
|
|
20310
|
+
}
|
|
20311
|
+
function findElementEnd(xml, start) {
|
|
20312
|
+
const open = xml.slice(start).match(/^<([^\s/>!?]+)/);
|
|
20313
|
+
if (!open) return -1;
|
|
20314
|
+
const qname = open[1];
|
|
20315
|
+
const re = new RegExp(`<${qname}(?=[\\s/>])(?:"[^"]*"|'[^']*'|[^>"'])*?(/?)>|</${qname}\\s*>`, "g");
|
|
20316
|
+
re.lastIndex = start;
|
|
20317
|
+
let depth = 0;
|
|
20318
|
+
let mm;
|
|
20319
|
+
while ((mm = re.exec(xml)) !== null) {
|
|
20320
|
+
if (mm[0].startsWith("</")) {
|
|
20321
|
+
depth--;
|
|
20322
|
+
if (depth === 0) return mm.index + mm[0].length;
|
|
20323
|
+
} else if (mm[1] !== "/") {
|
|
20324
|
+
depth++;
|
|
20325
|
+
}
|
|
20326
|
+
}
|
|
20327
|
+
return -1;
|
|
20328
|
+
}
|
|
20329
|
+
function finalizeTable(table) {
|
|
20330
|
+
const hasAddr = table.rows.some((row) => row.some((c) => c.colAddr !== void 0 && c.rowAddr !== void 0));
|
|
20331
|
+
if (hasAddr) {
|
|
20332
|
+
for (const row of table.rows) {
|
|
20333
|
+
for (const cell of row) {
|
|
20334
|
+
if (cell.rowAddr !== void 0 && cell.colAddr !== void 0) {
|
|
20335
|
+
table.cellByAnchor.set(`${cell.rowAddr},${cell.colAddr}`, cell);
|
|
20336
|
+
}
|
|
20337
|
+
}
|
|
20338
|
+
}
|
|
20339
|
+
return;
|
|
20340
|
+
}
|
|
20341
|
+
const numRows = table.rows.length;
|
|
20342
|
+
const occupied = Array.from({ length: numRows }, () => []);
|
|
20343
|
+
for (let rowIdx = 0; rowIdx < numRows; rowIdx++) {
|
|
20344
|
+
let colIdx = 0;
|
|
20345
|
+
for (const cell of table.rows[rowIdx]) {
|
|
20346
|
+
while (occupied[rowIdx][colIdx]) colIdx++;
|
|
20347
|
+
cell.rowAddr = rowIdx;
|
|
20348
|
+
cell.colAddr = colIdx;
|
|
20349
|
+
table.cellByAnchor.set(`${rowIdx},${colIdx}`, cell);
|
|
20350
|
+
for (let r = rowIdx; r < Math.min(rowIdx + cell.rowSpan, numRows); r++) {
|
|
20351
|
+
for (let c = colIdx; c < colIdx + cell.colSpan; c++) {
|
|
20352
|
+
occupied[r][c] = true;
|
|
20353
|
+
}
|
|
20354
|
+
}
|
|
20355
|
+
colIdx += cell.colSpan;
|
|
20356
|
+
}
|
|
20357
|
+
}
|
|
20358
|
+
}
|
|
20359
|
+
function buildParagraphSplices(para, newText, xml) {
|
|
20360
|
+
const escaped = escapeXmlText(newText);
|
|
20361
|
+
if (para.tRanges.length > 0) {
|
|
20362
|
+
const splices = [];
|
|
20363
|
+
const first = para.tRanges[0];
|
|
20364
|
+
if (first.selfClosing) {
|
|
20365
|
+
const prefix = first.prefix ? first.prefix + ":" : "";
|
|
20366
|
+
splices.push({ start: first.contentStart, end: first.contentEnd, replacement: `<${prefix}t>${escaped}</${prefix}t>` });
|
|
20367
|
+
} else {
|
|
20368
|
+
splices.push({ start: first.contentStart, end: first.contentEnd, replacement: escaped });
|
|
20369
|
+
}
|
|
20370
|
+
for (let i = 1; i < para.tRanges.length; i++) {
|
|
20371
|
+
const r = para.tRanges[i];
|
|
20372
|
+
if (!r.selfClosing && r.contentStart < r.contentEnd) {
|
|
20373
|
+
splices.push({ start: r.contentStart, end: r.contentEnd, replacement: "" });
|
|
20374
|
+
}
|
|
20375
|
+
}
|
|
20376
|
+
return splices;
|
|
20377
|
+
}
|
|
20378
|
+
if (para.runInsertPos !== void 0) {
|
|
20379
|
+
if (!newText) return [];
|
|
20380
|
+
const prefix = para.runPrefix ? para.runPrefix + ":" : "";
|
|
20381
|
+
return [{ start: para.runInsertPos, end: para.runInsertPos, replacement: `<${prefix}t>${escaped}</${prefix}t>` }];
|
|
20382
|
+
}
|
|
20383
|
+
if (para.selfCloseRun && xml) {
|
|
20384
|
+
if (!newText) return [];
|
|
20385
|
+
const { start, end } = para.selfCloseRun;
|
|
20386
|
+
const tag = xml.slice(start, end);
|
|
20387
|
+
const qm = tag.match(/^<([^\s/>]+)/);
|
|
20388
|
+
if (!qm || !tag.endsWith("/>")) return null;
|
|
20389
|
+
const qname = qm[1];
|
|
20390
|
+
const colon = qname.indexOf(":");
|
|
20391
|
+
const prefix = colon >= 0 ? qname.slice(0, colon) + ":" : "";
|
|
20392
|
+
const opened = tag.slice(0, tag.length - 2).trimEnd() + ">";
|
|
20393
|
+
return [{ start, end, replacement: `${opened}<${prefix}t>${escaped}</${prefix}t></${qname}>` }];
|
|
20394
|
+
}
|
|
20395
|
+
return newText ? null : [];
|
|
20396
|
+
}
|
|
20397
|
+
function applySplices(xml, splices) {
|
|
20398
|
+
const sorted = [...splices].sort((a, b) => a.start - b.start);
|
|
20399
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
20400
|
+
if (sorted[i].start < sorted[i - 1].end) {
|
|
20401
|
+
throw new Error("\uC18C\uC2A4\uB9F5 splice \uBC94\uC704 \uACB9\uCE68 \u2014 \uB0B4\uBD80 \uC624\uB958");
|
|
20402
|
+
}
|
|
20403
|
+
}
|
|
20404
|
+
let result = xml;
|
|
20405
|
+
for (let i = sorted.length - 1; i >= 0; i--) {
|
|
20406
|
+
const s = sorted[i];
|
|
20407
|
+
result = result.slice(0, s.start) + s.replacement + result.slice(s.end);
|
|
20408
|
+
}
|
|
20409
|
+
return result;
|
|
20410
|
+
}
|
|
20411
|
+
|
|
20412
|
+
// src/roundtrip/zip-patch.ts
|
|
20413
|
+
import { deflateRawSync } from "zlib";
|
|
20414
|
+
var EOCD_SIG = 101010256;
|
|
20415
|
+
var CD_SIG = 33639248;
|
|
20416
|
+
var LOCAL_SIG = 67324752;
|
|
20417
|
+
var ZIP64_EOCD_LOC_SIG = 117853008;
|
|
20418
|
+
function copyBytes(buf, start, end) {
|
|
20419
|
+
return new Uint8Array(buf.subarray(start, end));
|
|
20420
|
+
}
|
|
20421
|
+
function parseCentralDirectory(buf) {
|
|
20422
|
+
const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
|
|
20423
|
+
const minEocd = Math.max(0, buf.length - 22 - 65535);
|
|
20424
|
+
let eocdOffset = -1;
|
|
20425
|
+
for (let i = buf.length - 22; i >= minEocd; i--) {
|
|
20426
|
+
if (view.getUint32(i, true) === EOCD_SIG && i + 22 + view.getUint16(i + 20, true) === buf.length) {
|
|
20427
|
+
eocdOffset = i;
|
|
20428
|
+
break;
|
|
20429
|
+
}
|
|
20430
|
+
}
|
|
20431
|
+
if (eocdOffset < 0) {
|
|
20432
|
+
for (let i = buf.length - 22; i >= minEocd; i--) {
|
|
20433
|
+
if (view.getUint32(i, true) !== EOCD_SIG) continue;
|
|
20434
|
+
if (i + 22 + view.getUint16(i + 20, true) > buf.length) continue;
|
|
20435
|
+
const cand = view.getUint32(i + 16, true);
|
|
20436
|
+
if (cand < buf.length - 4 && view.getUint32(cand, true) === CD_SIG) {
|
|
20437
|
+
eocdOffset = i;
|
|
20438
|
+
break;
|
|
20439
|
+
}
|
|
20440
|
+
}
|
|
20441
|
+
}
|
|
20442
|
+
if (eocdOffset < 0) throw new KordocError("ZIP EOCD\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
20443
|
+
const totalEntries = view.getUint16(eocdOffset + 10, true);
|
|
20444
|
+
const cdSize = view.getUint32(eocdOffset + 12, true);
|
|
20445
|
+
const cdOffset = view.getUint32(eocdOffset + 16, true);
|
|
20446
|
+
if (cdOffset === 4294967295 || totalEntries === 65535) throw new KordocError("ZIP64\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
20447
|
+
if (eocdOffset >= 20 && view.getUint32(eocdOffset - 20, true) === ZIP64_EOCD_LOC_SIG) {
|
|
20448
|
+
throw new KordocError("ZIP64\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
20449
|
+
}
|
|
20450
|
+
const decoder = new TextDecoder("utf-8");
|
|
20451
|
+
const entries = [];
|
|
20452
|
+
let pos = cdOffset;
|
|
20453
|
+
for (let i = 0; i < totalEntries; i++) {
|
|
20454
|
+
if (view.getUint32(pos, true) !== CD_SIG) throw new KordocError("ZIP Central Directory \uC190\uC0C1");
|
|
20455
|
+
const flags = view.getUint16(pos + 8, true);
|
|
20456
|
+
const method = view.getUint16(pos + 10, true);
|
|
20457
|
+
const crc = view.getUint32(pos + 16, true);
|
|
20458
|
+
const compSize = view.getUint32(pos + 20, true);
|
|
20459
|
+
const uncompSize = view.getUint32(pos + 24, true);
|
|
20460
|
+
const nameLen = view.getUint16(pos + 28, true);
|
|
20461
|
+
const extraLen = view.getUint16(pos + 30, true);
|
|
20462
|
+
const commentLen = view.getUint16(pos + 32, true);
|
|
20463
|
+
const localOffset = view.getUint32(pos + 42, true);
|
|
20464
|
+
if (compSize === 4294967295 || uncompSize === 4294967295 || localOffset === 4294967295) {
|
|
20465
|
+
throw new KordocError("ZIP64\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
20466
|
+
}
|
|
20467
|
+
const name = decoder.decode(buf.subarray(pos + 46, pos + 46 + nameLen));
|
|
20468
|
+
const cdEnd = pos + 46 + nameLen + extraLen + commentLen;
|
|
20469
|
+
entries.push({ cdStart: pos, cdEnd, name, flags, method, crc, compSize, uncompSize, localOffset });
|
|
20470
|
+
pos = cdEnd;
|
|
20471
|
+
}
|
|
20472
|
+
return { entries, cdOffset, cdSize, eocdOffset };
|
|
20473
|
+
}
|
|
20474
|
+
var CRC_TABLE = (() => {
|
|
20475
|
+
const table = new Uint32Array(256);
|
|
20476
|
+
for (let n = 0; n < 256; n++) {
|
|
20477
|
+
let c = n;
|
|
20478
|
+
for (let k = 0; k < 8; k++) c = c & 1 ? 3988292384 ^ c >>> 1 : c >>> 1;
|
|
20479
|
+
table[n] = c >>> 0;
|
|
20480
|
+
}
|
|
20481
|
+
return table;
|
|
20482
|
+
})();
|
|
20483
|
+
function crc32(data) {
|
|
20484
|
+
let crc = 4294967295;
|
|
20485
|
+
for (let i = 0; i < data.length; i++) {
|
|
20486
|
+
crc = CRC_TABLE[(crc ^ data[i]) & 255] ^ crc >>> 8;
|
|
20487
|
+
}
|
|
20488
|
+
return (crc ^ 4294967295) >>> 0;
|
|
20489
|
+
}
|
|
20490
|
+
function patchZipEntries(original, replacements) {
|
|
20491
|
+
const { entries, cdOffset, eocdOffset } = parseCentralDirectory(original);
|
|
20492
|
+
const view = new DataView(original.buffer, original.byteOffset, original.byteLength);
|
|
20493
|
+
for (const name of replacements.keys()) {
|
|
20494
|
+
if (!entries.some((e) => e.name === name)) throw new KordocError(`ZIP\uC5D0 \uC5C6\uB294 \uC5D4\uD2B8\uB9AC: ${name}`);
|
|
20495
|
+
}
|
|
20496
|
+
const byLocal = [...entries].sort((a, b) => a.localOffset - b.localOffset);
|
|
20497
|
+
const segments = [];
|
|
20498
|
+
const newLocalOffset = /* @__PURE__ */ new Map();
|
|
20499
|
+
const newMeta = /* @__PURE__ */ new Map();
|
|
20500
|
+
let offset = 0;
|
|
20501
|
+
for (let i = 0; i < byLocal.length; i++) {
|
|
20502
|
+
const e = byLocal[i];
|
|
20503
|
+
const segEnd = i + 1 < byLocal.length ? byLocal[i + 1].localOffset : cdOffset;
|
|
20504
|
+
newLocalOffset.set(e, offset);
|
|
20505
|
+
const newData = replacements.get(e.name);
|
|
20506
|
+
if (newData === void 0) {
|
|
20507
|
+
const seg = original.subarray(e.localOffset, segEnd);
|
|
20508
|
+
segments.push(seg);
|
|
20509
|
+
offset += seg.length;
|
|
20510
|
+
continue;
|
|
20511
|
+
}
|
|
20512
|
+
if (view.getUint32(e.localOffset, true) !== LOCAL_SIG) throw new KordocError("ZIP \uB85C\uCEEC \uD5E4\uB354 \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
20513
|
+
const nameLen = view.getUint16(e.localOffset + 26, true);
|
|
20514
|
+
const extraLen = view.getUint16(e.localOffset + 28, true);
|
|
20515
|
+
const headerLen = 30 + nameLen + extraLen;
|
|
20516
|
+
const header = copyBytes(original, e.localOffset, e.localOffset + headerLen);
|
|
20517
|
+
const hview = new DataView(header.buffer, header.byteOffset, header.byteLength);
|
|
20518
|
+
const method = e.method;
|
|
20519
|
+
const compData = method === 0 ? newData : new Uint8Array(deflateRawSync(newData));
|
|
20520
|
+
const crc = crc32(newData);
|
|
20521
|
+
const flags = e.flags & ~8;
|
|
20522
|
+
hview.setUint16(6, flags, true);
|
|
20523
|
+
hview.setUint32(14, crc, true);
|
|
20524
|
+
hview.setUint32(18, compData.length, true);
|
|
20525
|
+
hview.setUint32(22, newData.length, true);
|
|
20526
|
+
segments.push(header, compData);
|
|
20527
|
+
offset += headerLen + compData.length;
|
|
20528
|
+
newMeta.set(e, { crc, compSize: compData.length, uncompSize: newData.length, flags });
|
|
20529
|
+
}
|
|
20530
|
+
const newCdOffset = offset;
|
|
20531
|
+
for (const e of entries) {
|
|
20532
|
+
const cd = copyBytes(original, e.cdStart, e.cdEnd);
|
|
20533
|
+
const cview = new DataView(cd.buffer, cd.byteOffset, cd.byteLength);
|
|
20534
|
+
cview.setUint32(42, newLocalOffset.get(e), true);
|
|
20535
|
+
const meta = newMeta.get(e);
|
|
20536
|
+
if (meta) {
|
|
20537
|
+
cview.setUint16(8, meta.flags, true);
|
|
20538
|
+
cview.setUint32(16, meta.crc, true);
|
|
20539
|
+
cview.setUint32(20, meta.compSize, true);
|
|
20540
|
+
cview.setUint32(24, meta.uncompSize, true);
|
|
20541
|
+
}
|
|
20542
|
+
segments.push(cd);
|
|
20543
|
+
offset += cd.length;
|
|
20544
|
+
}
|
|
20545
|
+
const newCdSize = offset - newCdOffset;
|
|
20546
|
+
const eocd = copyBytes(original, eocdOffset);
|
|
20547
|
+
const eview = new DataView(eocd.buffer, eocd.byteOffset, eocd.byteLength);
|
|
20548
|
+
eview.setUint32(12, newCdSize, true);
|
|
20549
|
+
eview.setUint32(16, newCdOffset, true);
|
|
20550
|
+
segments.push(eocd);
|
|
20551
|
+
offset += eocd.length;
|
|
20552
|
+
const result = new Uint8Array(offset);
|
|
20553
|
+
let pos = 0;
|
|
20554
|
+
for (const seg of segments) {
|
|
20555
|
+
result.set(seg, pos);
|
|
20556
|
+
pos += seg.length;
|
|
20557
|
+
}
|
|
20558
|
+
return result;
|
|
20559
|
+
}
|
|
20560
|
+
|
|
20561
|
+
// src/roundtrip/markdown-units.ts
|
|
20562
|
+
function splitMarkdownUnits(md2) {
|
|
20563
|
+
const lines = md2.split("\n");
|
|
20564
|
+
const units = [];
|
|
20565
|
+
let i = 0;
|
|
20566
|
+
while (i < lines.length) {
|
|
20567
|
+
const line = lines[i];
|
|
20568
|
+
if (!line.trim()) {
|
|
20569
|
+
i++;
|
|
20570
|
+
continue;
|
|
20571
|
+
}
|
|
20572
|
+
if (line.trim().startsWith("<table>")) {
|
|
20573
|
+
const collected2 = [];
|
|
20574
|
+
let depth = 0;
|
|
20575
|
+
while (i < lines.length) {
|
|
20576
|
+
const l = lines[i];
|
|
20577
|
+
collected2.push(l);
|
|
20578
|
+
depth += (l.match(/<table>/g) || []).length;
|
|
20579
|
+
depth -= (l.match(/<\/table>/g) || []).length;
|
|
20580
|
+
i++;
|
|
20581
|
+
if (depth <= 0) break;
|
|
20582
|
+
}
|
|
20583
|
+
units.push({ kind: "html-table", raw: collected2.join("\n"), lines: collected2 });
|
|
20584
|
+
continue;
|
|
20585
|
+
}
|
|
20586
|
+
if (line.trimStart().startsWith("|")) {
|
|
20587
|
+
const collected2 = [];
|
|
20588
|
+
while (i < lines.length && lines[i].trimStart().startsWith("|")) {
|
|
20589
|
+
collected2.push(lines[i]);
|
|
20590
|
+
i++;
|
|
20591
|
+
}
|
|
20592
|
+
units.push({ kind: "gfm-table", raw: collected2.join("\n"), lines: collected2 });
|
|
20593
|
+
continue;
|
|
20594
|
+
}
|
|
20595
|
+
if (/^-{3,}\s*$/.test(line.trim())) {
|
|
20596
|
+
units.push({ kind: "separator", raw: line.trim(), lines: [line.trim()] });
|
|
20597
|
+
i++;
|
|
20598
|
+
continue;
|
|
20599
|
+
}
|
|
20600
|
+
if (/^!\[image\]\([^)]*\)\s*$/.test(line.trim())) {
|
|
20601
|
+
units.push({ kind: "image", raw: line.trim(), lines: [line.trim()] });
|
|
20602
|
+
i++;
|
|
20603
|
+
continue;
|
|
20604
|
+
}
|
|
20605
|
+
const collected = [];
|
|
20606
|
+
while (i < lines.length && lines[i].trim() && !lines[i].trimStart().startsWith("|") && !lines[i].trim().startsWith("<table>")) {
|
|
20607
|
+
collected.push(lines[i].trim());
|
|
20608
|
+
i++;
|
|
20609
|
+
}
|
|
20610
|
+
units.push({ kind: "text", raw: collected.join("\n"), lines: collected });
|
|
20611
|
+
}
|
|
20612
|
+
return units;
|
|
20613
|
+
}
|
|
20614
|
+
function escapeGfm(text) {
|
|
20615
|
+
return text.replace(/~/g, "\\~");
|
|
20616
|
+
}
|
|
20617
|
+
var HWP_SHAPE_ALT_TEXT_RE = /(?:모서리가 둥근 |둥근 )?(?:사각형|직사각형|정사각형|원|타원|삼각형|이등변 삼각형|직각 삼각형|선|직선|곡선|화살표|굵은 화살표|이중 화살표|오각형|육각형|팔각형|별|[4-8]점별|십자|십자형|구름|구름형|마름모|도넛|평행사변형|사다리꼴|부채꼴|호|반원|물결|번개|하트|빗금|블록 화살표|수식|표|그림|개체|그리기\s?개체|묶음\s?개체|글상자|수식\s?개체|OLE\s?개체)\s?입니다\.?/g;
|
|
20618
|
+
function sanitizeText(text) {
|
|
20619
|
+
let result = mapPuaText(text).replace(/[\u{F0000}-\u{FFFFD}]/gu, "").replace(HWP_SHAPE_ALT_TEXT_RE, "").replace(/ +/g, " ").trim();
|
|
20620
|
+
if (result.length <= 30 && result.includes(" ")) {
|
|
20621
|
+
const tokens = result.split(" ");
|
|
20622
|
+
const koreanSingleCharCount = tokens.filter((t) => t.length === 1 && /[가-ㄱ-ㆎ]/.test(t)).length;
|
|
20623
|
+
if (tokens.length >= 3 && koreanSingleCharCount / tokens.length >= 0.7) {
|
|
20624
|
+
result = tokens.join("");
|
|
20625
|
+
}
|
|
20626
|
+
}
|
|
20627
|
+
return result;
|
|
20628
|
+
}
|
|
20629
|
+
function normForMatch(text) {
|
|
20630
|
+
return sanitizeText(text).replace(/\s+/g, " ").trim();
|
|
20631
|
+
}
|
|
20632
|
+
function unescapeGfm(text) {
|
|
20633
|
+
return text.replace(/\\~/g, "~");
|
|
20634
|
+
}
|
|
20635
|
+
function summarize(text) {
|
|
20636
|
+
const t = text.replace(/\s+/g, " ").trim();
|
|
20637
|
+
return t.length > 80 ? t.slice(0, 77) + "..." : t;
|
|
20638
|
+
}
|
|
20639
|
+
function replicateGfmTable(table) {
|
|
20640
|
+
const { cells, rows: numRows, cols: numCols } = table;
|
|
20641
|
+
if (numRows === 0 || numCols === 0) return null;
|
|
20642
|
+
if (numRows === 1 && numCols === 1) return null;
|
|
20643
|
+
if (numCols === 1) return null;
|
|
20644
|
+
const display = Array.from({ length: numRows }, (_, r) => Array.from({ length: numCols }, (_2, c) => ({ text: "", gridR: r, gridC: c })));
|
|
20645
|
+
const skip = /* @__PURE__ */ new Set();
|
|
20646
|
+
for (let r = 0; r < numRows; r++) {
|
|
20647
|
+
for (let c = 0; c < numCols; c++) {
|
|
20648
|
+
if (skip.has(`${r},${c}`)) continue;
|
|
20649
|
+
const cell = cells[r]?.[c];
|
|
20650
|
+
if (!cell) continue;
|
|
20651
|
+
display[r][c] = {
|
|
20652
|
+
text: escapeGfm(sanitizeText(cell.text)).replace(/\|/g, "\\|").replace(/\n/g, "<br>"),
|
|
20653
|
+
gridR: r,
|
|
20654
|
+
gridC: c
|
|
20655
|
+
};
|
|
20656
|
+
for (let dr = 0; dr < cell.rowSpan; dr++) {
|
|
20657
|
+
for (let dc = 0; dc < cell.colSpan; dc++) {
|
|
20658
|
+
if (dr === 0 && dc === 0) continue;
|
|
20659
|
+
if (r + dr < numRows && c + dc < numCols) skip.add(`${r + dr},${c + dc}`);
|
|
20660
|
+
}
|
|
20661
|
+
}
|
|
20662
|
+
c += cell.colSpan - 1;
|
|
20663
|
+
}
|
|
20664
|
+
}
|
|
20665
|
+
const uniqueRows = [];
|
|
20666
|
+
let pendingLabelRow = null;
|
|
20667
|
+
for (let r = 0; r < display.length; r++) {
|
|
20668
|
+
const row = display[r];
|
|
20669
|
+
if (row.every((cell) => cell.text === "")) continue;
|
|
20670
|
+
const nonEmptyCols = row.filter((cell) => cell.text !== "");
|
|
20671
|
+
const hasSkipInRow = row.some((_, c) => skip.has(`${r},${c}`));
|
|
20672
|
+
if (!hasSkipInRow && nonEmptyCols.length === 1 && row[0].text !== "" && row.slice(1).every((c) => c.text === "")) {
|
|
20673
|
+
if (pendingLabelRow) uniqueRows.push(pendingLabelRow);
|
|
20674
|
+
pendingLabelRow = row;
|
|
20675
|
+
continue;
|
|
20676
|
+
}
|
|
20677
|
+
if (pendingLabelRow) {
|
|
20678
|
+
if (row[0].text === "") row[0] = pendingLabelRow[0];
|
|
20679
|
+
else uniqueRows.push(pendingLabelRow);
|
|
20680
|
+
pendingLabelRow = null;
|
|
20681
|
+
}
|
|
20682
|
+
uniqueRows.push(row);
|
|
20683
|
+
}
|
|
20684
|
+
if (pendingLabelRow) uniqueRows.push(pendingLabelRow);
|
|
20685
|
+
return uniqueRows.length > 0 ? uniqueRows : null;
|
|
20686
|
+
}
|
|
20687
|
+
function parseGfmTable(lines) {
|
|
20688
|
+
const rows = [];
|
|
20689
|
+
for (const line of lines) {
|
|
20690
|
+
const trimmed = line.trim();
|
|
20691
|
+
if (!trimmed.startsWith("|")) continue;
|
|
20692
|
+
const cells = trimmed.split(/(?<!\\)\|/).slice(1, -1).map((c) => c.trim());
|
|
20693
|
+
if (cells.length === 0) continue;
|
|
20694
|
+
if (cells.every((c) => /^:?-{3,}:?$/.test(c))) continue;
|
|
20695
|
+
rows.push(cells);
|
|
20696
|
+
}
|
|
20697
|
+
return rows;
|
|
20698
|
+
}
|
|
20699
|
+
function unescapeGfmCell(text) {
|
|
20700
|
+
return text.replace(/<br\s*\/?>/gi, "\n").replace(/\\\|/g, "|").replace(/\\~/g, "~");
|
|
20701
|
+
}
|
|
20702
|
+
function replicateCellInnerHtml(cell) {
|
|
20703
|
+
if (cell.blocks?.length) {
|
|
20704
|
+
return cell.blocks.map((b) => {
|
|
20705
|
+
if (b.type === "table" && b.table) {
|
|
20706
|
+
const cap = b.table.caption ? sanitizeText(b.table.caption) : "";
|
|
20707
|
+
return (cap ? cap + "<br>" : "") + replicateTableToHtml(b.table);
|
|
20708
|
+
}
|
|
20709
|
+
if (b.type === "image" && b.text) return `<img src="${b.text}" alt="image">`;
|
|
20710
|
+
const t = sanitizeText(b.text ?? "");
|
|
20711
|
+
return t ? t.replace(/\n/g, "<br>") : "";
|
|
20712
|
+
}).filter(Boolean).join("<br>");
|
|
20713
|
+
}
|
|
20714
|
+
return sanitizeText(cell.text).replace(/\n/g, "<br>");
|
|
20715
|
+
}
|
|
20716
|
+
function replicateTableToHtml(table) {
|
|
20717
|
+
const rows = replicateHtmlTable(table);
|
|
20718
|
+
const lines = ["<table>"];
|
|
20719
|
+
for (let r = 0; r < rows.length; r++) {
|
|
20720
|
+
const tag = rows[r].tag;
|
|
20721
|
+
const rowHtml = rows[r].cells.map((cell) => {
|
|
20722
|
+
const attrs = [];
|
|
20723
|
+
if (cell.colSpan > 1) attrs.push(`colspan="${cell.colSpan}"`);
|
|
20724
|
+
if (cell.rowSpan > 1) attrs.push(`rowspan="${cell.rowSpan}"`);
|
|
20725
|
+
const attrStr = attrs.length ? " " + attrs.join(" ") : "";
|
|
20726
|
+
return `<${tag}${attrStr}>${cell.inner}</${tag}>`;
|
|
20727
|
+
});
|
|
20728
|
+
if (rowHtml.length) lines.push(`<tr>${rowHtml.join("")}</tr>`);
|
|
20729
|
+
}
|
|
20730
|
+
lines.push("</table>");
|
|
20731
|
+
return lines.join("\n");
|
|
20732
|
+
}
|
|
20733
|
+
function replicateHtmlTable(table) {
|
|
20734
|
+
const { cells, rows: numRows, cols: numCols } = table;
|
|
20735
|
+
const skip = /* @__PURE__ */ new Set();
|
|
20736
|
+
const result = [];
|
|
20737
|
+
for (let r = 0; r < numRows; r++) {
|
|
20738
|
+
const tag = r === 0 ? "th" : "td";
|
|
20739
|
+
const rowCells = [];
|
|
20740
|
+
for (let c = 0; c < numCols; c++) {
|
|
20741
|
+
if (skip.has(`${r},${c}`)) continue;
|
|
20742
|
+
const cell = cells[r]?.[c];
|
|
20743
|
+
if (!cell) continue;
|
|
20744
|
+
for (let dr = 0; dr < cell.rowSpan; dr++) {
|
|
20745
|
+
for (let dc = 0; dc < cell.colSpan; dc++) {
|
|
20746
|
+
if (dr === 0 && dc === 0) continue;
|
|
20747
|
+
if (r + dr < numRows && c + dc < numCols) skip.add(`${r + dr},${c + dc}`);
|
|
20748
|
+
}
|
|
20749
|
+
}
|
|
20750
|
+
rowCells.push({
|
|
20751
|
+
inner: replicateCellInnerHtml(cell),
|
|
20752
|
+
colSpan: cell.colSpan,
|
|
20753
|
+
rowSpan: cell.rowSpan,
|
|
20754
|
+
gridR: r,
|
|
20755
|
+
gridC: c
|
|
20756
|
+
});
|
|
20757
|
+
}
|
|
20758
|
+
if (rowCells.length) result.push({ tag, cells: rowCells });
|
|
20759
|
+
}
|
|
20760
|
+
return result;
|
|
20761
|
+
}
|
|
20762
|
+
function parseHtmlTable(raw) {
|
|
20763
|
+
const re = /<(\/?)(table|tr|td|th)((?:"[^"]*"|'[^']*'|[^>"'])*?)>/gi;
|
|
20764
|
+
let depth = 0;
|
|
20765
|
+
let currentRow = null;
|
|
20766
|
+
let cellStart = -1;
|
|
20767
|
+
let cellInfo = null;
|
|
20768
|
+
const rows = [];
|
|
20769
|
+
let m;
|
|
20770
|
+
while ((m = re.exec(raw)) !== null) {
|
|
20771
|
+
const isClose = m[1] === "/";
|
|
20772
|
+
const tag = m[2].toLowerCase();
|
|
20773
|
+
const attrs = m[3] || "";
|
|
20774
|
+
if (tag === "table") {
|
|
20775
|
+
depth += isClose ? -1 : 1;
|
|
20776
|
+
if (depth < 0) return null;
|
|
20777
|
+
continue;
|
|
20778
|
+
}
|
|
20779
|
+
if (depth !== 1) continue;
|
|
20780
|
+
if (tag === "tr") {
|
|
20781
|
+
if (!isClose) currentRow = [];
|
|
20782
|
+
else if (currentRow) {
|
|
20783
|
+
rows.push({ tag: rows.length === 0 ? "th" : "td", cells: currentRow });
|
|
20784
|
+
currentRow = null;
|
|
20785
|
+
}
|
|
20786
|
+
} else {
|
|
20787
|
+
if (!isClose) {
|
|
20788
|
+
const cs = parseInt(attrs.match(/colspan\s*=\s*"(\d+)"/i)?.[1] || "1", 10);
|
|
20789
|
+
const rs = parseInt(attrs.match(/rowspan\s*=\s*"(\d+)"/i)?.[1] || "1", 10);
|
|
20790
|
+
cellStart = m.index + m[0].length;
|
|
20791
|
+
cellInfo = { colSpan: isNaN(cs) ? 1 : cs, rowSpan: isNaN(rs) ? 1 : rs };
|
|
20792
|
+
} else if (cellStart >= 0 && cellInfo && currentRow) {
|
|
20793
|
+
currentRow.push({ inner: raw.slice(cellStart, m.index), colSpan: cellInfo.colSpan, rowSpan: cellInfo.rowSpan });
|
|
20794
|
+
cellStart = -1;
|
|
20795
|
+
cellInfo = null;
|
|
20796
|
+
}
|
|
20797
|
+
}
|
|
20798
|
+
}
|
|
20799
|
+
if (depth !== 0) return null;
|
|
20800
|
+
return rows;
|
|
20801
|
+
}
|
|
20802
|
+
function htmlCellInnerToLines(inner) {
|
|
20803
|
+
let hadNonText = false;
|
|
20804
|
+
let work = inner;
|
|
20805
|
+
if (/<table[\s>]/i.test(work)) {
|
|
20806
|
+
hadNonText = true;
|
|
20807
|
+
work = removeNestedTables(work);
|
|
20808
|
+
}
|
|
20809
|
+
if (/<img\s/i.test(work)) {
|
|
20810
|
+
hadNonText = true;
|
|
20811
|
+
work = work.replace(/<img\s(?:"[^"]*"|'[^']*'|[^>"'])*?>/gi, "");
|
|
20812
|
+
}
|
|
20813
|
+
const lines = work.split(/<br\s*\/?>/gi).map((s) => s.trim()).filter((s) => s.length > 0);
|
|
20814
|
+
return { lines, hadNonText };
|
|
20815
|
+
}
|
|
20816
|
+
function extractTopLevelTables(html) {
|
|
20817
|
+
const result = [];
|
|
20818
|
+
let depth = 0;
|
|
20819
|
+
let start = -1;
|
|
20820
|
+
const re = /<(\/?)table(?:[\s>]|>)/gi;
|
|
20821
|
+
let m;
|
|
20822
|
+
while ((m = re.exec(html)) !== null) {
|
|
20823
|
+
if (m[1] !== "/") {
|
|
20824
|
+
if (depth === 0) start = m.index;
|
|
20825
|
+
depth++;
|
|
20826
|
+
} else {
|
|
20827
|
+
depth--;
|
|
20828
|
+
if (depth === 0 && start >= 0) {
|
|
20829
|
+
result.push(html.slice(start, m.index + m[0].length));
|
|
20830
|
+
start = -1;
|
|
20831
|
+
}
|
|
20832
|
+
if (depth < 0) depth = 0;
|
|
20833
|
+
}
|
|
20834
|
+
}
|
|
20835
|
+
return result;
|
|
20836
|
+
}
|
|
20837
|
+
function removeNestedTables(html) {
|
|
20838
|
+
let result = "";
|
|
20839
|
+
let depth = 0;
|
|
20840
|
+
const re = /<(\/?)table(?:[\s>]|>)/gi;
|
|
20841
|
+
let last = 0;
|
|
20842
|
+
let m;
|
|
20843
|
+
while ((m = re.exec(html)) !== null) {
|
|
20844
|
+
if (m[1] !== "/") {
|
|
20845
|
+
if (depth === 0) result += html.slice(last, m.index);
|
|
20846
|
+
depth++;
|
|
20847
|
+
} else {
|
|
20848
|
+
depth--;
|
|
20849
|
+
if (depth === 0) last = m.index + m[0].length;
|
|
20850
|
+
if (depth < 0) depth = 0;
|
|
20851
|
+
}
|
|
20852
|
+
}
|
|
20853
|
+
if (depth === 0) result += html.slice(last);
|
|
20854
|
+
return result;
|
|
20855
|
+
}
|
|
20856
|
+
|
|
20857
|
+
// src/roundtrip/table-patch.ts
|
|
20858
|
+
function patchGfmTable(table, scanTable, orig, edited, ctx, skip) {
|
|
20859
|
+
const replica = replicateGfmTable(table);
|
|
20860
|
+
if (!replica) return skip("\uD45C \uB80C\uB354 \uACBD\uB85C \uC2DD\uBCC4 \uC2E4\uD328");
|
|
20861
|
+
const origRows = parseGfmTable(orig.lines);
|
|
20862
|
+
const editedRows = parseGfmTable(edited.lines);
|
|
20863
|
+
if (replica.length !== origRows.length || replica.some((row, r) => row.length !== origRows[r].length || row.some((c, j) => c.text !== origRows[r][j]))) {
|
|
20864
|
+
return skip("\uD45C \uC88C\uD45C \uC7AC\uD604 \uBD88\uC77C\uCE58 \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
|
|
20865
|
+
}
|
|
20866
|
+
if (editedRows.length !== origRows.length) return skip("\uD45C \uD589 \uCD94\uAC00/\uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0 (\uD45C \uAD6C\uC870 \uBCC0\uACBD)");
|
|
20867
|
+
let applied = 0;
|
|
20868
|
+
for (let r = 0; r < origRows.length; r++) {
|
|
20869
|
+
if (editedRows[r].length !== origRows[r].length) {
|
|
20870
|
+
skip(`\uD45C ${r + 1}\uD589 \uC5F4 \uC218 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0`);
|
|
20871
|
+
continue;
|
|
20872
|
+
}
|
|
20873
|
+
for (let c = 0; c < origRows[r].length; c++) {
|
|
20874
|
+
if (origRows[r][c] === editedRows[r][c]) continue;
|
|
20875
|
+
const { gridR, gridC } = replica[r][c];
|
|
20876
|
+
const origTokens = extractCellTokens(origRows[r][c]);
|
|
20877
|
+
const editedTokens = extractCellTokens(editedRows[r][c]);
|
|
20878
|
+
if (origTokens !== editedTokens) {
|
|
20879
|
+
skip("\uC140 \uB0B4 \uC774\uBBF8\uC9C0 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
|
|
20880
|
+
continue;
|
|
20881
|
+
}
|
|
20882
|
+
const newLines = unescapeGfmCell(stripCellTokens(editedRows[r][c])).split("\n").map((s) => s.trim()).filter(Boolean);
|
|
20883
|
+
const origLines = unescapeGfmCell(stripCellTokens(origRows[r][c])).split("\n").map((s) => s.trim()).filter(Boolean);
|
|
20884
|
+
const n = applyCellEdit(table, scanTable, gridR, gridC, newLines, ctx, origRows[r][c], editedRows[r][c], origLines.length);
|
|
20885
|
+
if (n > 0 && origTokens) {
|
|
20886
|
+
ctx.skipped.push({
|
|
20887
|
+
reason: "\uC140 \uB0B4 \uC774\uBBF8\uC9C0\xB7\uD14D\uC2A4\uD2B8 \uD63C\uC7AC \u2014 \uD14D\uC2A4\uD2B8\uB9CC \uC801\uC6A9 (\uC774\uBBF8\uC9C0 \uC778\uC811 \uBC30\uCE58\uB294 <br> \uBD84\uB9AC\uB85C \uC7AC\uD604\uB428)",
|
|
20888
|
+
before: summarize(origRows[r][c]),
|
|
20889
|
+
after: summarize(editedRows[r][c])
|
|
20890
|
+
});
|
|
20891
|
+
}
|
|
20892
|
+
applied += n;
|
|
20893
|
+
}
|
|
20894
|
+
}
|
|
20895
|
+
return applied;
|
|
20896
|
+
}
|
|
20897
|
+
function patchHtmlTable(table, scanTable, orig, edited, ctx, skip) {
|
|
20898
|
+
return patchHtmlTableRaw(table, scanTable, orig.raw, edited.raw, ctx, skip, 0);
|
|
20899
|
+
}
|
|
20900
|
+
function patchHtmlTableRaw(table, scanTable, origRaw, editedRaw, ctx, skip, depth) {
|
|
20901
|
+
if (depth > 8) return skip("\uC911\uCCA9\uD45C \uAE4A\uC774 \uCD08\uACFC");
|
|
20902
|
+
if (replicateTableToHtml(table) !== origRaw) return skip("\uD45C \uC88C\uD45C \uC7AC\uD604 \uBD88\uC77C\uCE58 \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
|
|
20903
|
+
const replica = replicateHtmlTable(table);
|
|
20904
|
+
const origRows = parseHtmlTable(origRaw);
|
|
20905
|
+
if (!origRows || origRows.length !== replica.length || origRows.some((r, i) => r.cells.length !== replica[i].cells.length || r.cells.some((c, j) => c.inner !== replica[i].cells[j].inner))) {
|
|
20906
|
+
return skip("\uC140 \uACBD\uACC4 \uBAA8\uD638 (\uB9AC\uD130\uB7F4 \uD0DC\uADF8 \uC758\uC2EC) \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
|
|
20907
|
+
}
|
|
20908
|
+
const editedRows = parseHtmlTable(editedRaw);
|
|
20909
|
+
if (!editedRows) return skip("\uD3B8\uC9D1\uB41C HTML \uD45C \uD30C\uC2F1 \uC2E4\uD328");
|
|
20910
|
+
if (editedRows.length !== replica.length) return skip("\uD45C \uD589 \uCD94\uAC00/\uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0 (\uD45C \uAD6C\uC870 \uBCC0\uACBD)");
|
|
20911
|
+
let applied = 0;
|
|
20912
|
+
for (let r = 0; r < replica.length; r++) {
|
|
20913
|
+
if (editedRows[r].cells.length !== replica[r].cells.length) {
|
|
20914
|
+
skip(`\uD45C ${r + 1}\uD589 \uC140 \uC218 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0`);
|
|
20915
|
+
continue;
|
|
20916
|
+
}
|
|
20917
|
+
for (let c = 0; c < replica[r].cells.length; c++) {
|
|
20918
|
+
const oc = replica[r].cells[c];
|
|
20919
|
+
const ec = editedRows[r].cells[c];
|
|
20920
|
+
if (oc.colSpan !== ec.colSpan || oc.rowSpan !== ec.rowSpan) {
|
|
20921
|
+
skip(`\uC140 \uBCD1\uD569(colspan/rowspan) \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0`);
|
|
20922
|
+
continue;
|
|
20923
|
+
}
|
|
20924
|
+
if (oc.inner === ec.inner) continue;
|
|
20925
|
+
const origContent = htmlCellInnerToLines(oc.inner);
|
|
20926
|
+
const editedContent = htmlCellInnerToLines(ec.inner);
|
|
20927
|
+
if (origContent.hadNonText || editedContent.hadNonText) {
|
|
20928
|
+
if (extractImgTags(oc.inner) !== extractImgTags(ec.inner)) {
|
|
20929
|
+
skip("\uC140 \uB0B4 \uC774\uBBF8\uC9C0 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
|
|
20930
|
+
continue;
|
|
20931
|
+
}
|
|
20932
|
+
const origTables = extractTopLevelTables(oc.inner);
|
|
20933
|
+
const editedTables = extractTopLevelTables(ec.inner);
|
|
20934
|
+
if (origTables.length !== editedTables.length) {
|
|
20935
|
+
skip("\uC140 \uB0B4 \uC911\uCCA9\uD45C \uCD94\uAC00/\uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0");
|
|
20936
|
+
continue;
|
|
20937
|
+
}
|
|
20938
|
+
if (origTables.join("\n") !== editedTables.join("\n")) {
|
|
20939
|
+
applied += patchNestedTables(table, scanTable, oc, origTables, editedTables, ctx, skip, depth);
|
|
20940
|
+
}
|
|
20941
|
+
}
|
|
20942
|
+
if (origContent.lines.join("\n") !== editedContent.lines.join("\n")) {
|
|
20943
|
+
const newLines = editedContent.lines.map((l) => unescapeGfm(l));
|
|
20944
|
+
applied += applyCellEdit(table, scanTable, oc.gridR, oc.gridC, newLines, ctx, oc.inner, ec.inner, origContent.lines.length);
|
|
20945
|
+
}
|
|
20946
|
+
}
|
|
20947
|
+
}
|
|
20948
|
+
return applied;
|
|
20949
|
+
}
|
|
20950
|
+
function patchNestedTables(table, scanTable, oc, origTables, editedTables, ctx, skip, depth) {
|
|
20951
|
+
const irCell = table.cells[oc.gridR]?.[oc.gridC];
|
|
20952
|
+
const scanCell = scanTable.cellByAnchor.get(`${oc.gridR},${oc.gridC}`);
|
|
20953
|
+
const nestedIRs = (irCell?.blocks ?? []).filter((b) => b.type === "table" && b.table).map((b) => b.table);
|
|
20954
|
+
if (!scanCell || nestedIRs.length !== origTables.length || scanCell.tables.length !== origTables.length) {
|
|
20955
|
+
return skip("\uC911\uCCA9\uD45C \uC18C\uC2A4\uB9F5 \uB9E4\uD551 \uC2E4\uD328");
|
|
20956
|
+
}
|
|
20957
|
+
let applied = 0;
|
|
20958
|
+
for (let k = 0; k < origTables.length; k++) {
|
|
20959
|
+
if (origTables[k] === editedTables[k]) continue;
|
|
20960
|
+
applied += patchHtmlTableRaw(nestedIRs[k], scanCell.tables[k], origTables[k], editedTables[k], ctx, skip, depth + 1);
|
|
20961
|
+
}
|
|
20962
|
+
return applied;
|
|
20963
|
+
}
|
|
20964
|
+
function extractImgTags(inner) {
|
|
20965
|
+
return (inner.match(/<img\s(?:"[^"]*"|'[^']*'|[^>"'])*?>/gi) || []).join(" ");
|
|
20966
|
+
}
|
|
20967
|
+
var CELL_TOKEN_RE = /!\[image\]\([^)]*\)|\[이미지: [^\]]*\]/g;
|
|
20968
|
+
function extractCellTokens(text) {
|
|
20969
|
+
return (text.match(CELL_TOKEN_RE) || []).join(" ");
|
|
20970
|
+
}
|
|
20971
|
+
function stripCellTokens(text) {
|
|
20972
|
+
return text.replace(CELL_TOKEN_RE, "");
|
|
20973
|
+
}
|
|
20974
|
+
function patchTextChunkTable(table, scanTable, orig, edited, ctx, skip) {
|
|
20975
|
+
if (table.rows === 1 && table.cols === 1) {
|
|
20976
|
+
const content = sanitizeText(table.cells[0][0].text);
|
|
20977
|
+
const replicaLines = content.split(/\n/).map((line) => {
|
|
20978
|
+
const t = line.trim();
|
|
20979
|
+
if (!t) return "";
|
|
20980
|
+
if (/^\d+\.\s/.test(t)) return `**${escapeGfm(t)}**`;
|
|
20981
|
+
return escapeGfm(t);
|
|
20982
|
+
}).filter(Boolean);
|
|
20983
|
+
if (replicaLines.join("\n") !== orig.lines.join("\n")) return skip("\uD45C \uC88C\uD45C \uC7AC\uD604 \uBD88\uC77C\uCE58 \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
|
|
20984
|
+
if (extractCellTokens(orig.raw) !== extractCellTokens(edited.raw)) return skip("\uC140 \uB0B4 \uC774\uBBF8\uC9C0 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
|
|
20985
|
+
const newLines = edited.lines.map((l) => {
|
|
20986
|
+
const m = l.match(/^\*\*([\s\S]*)\*\*$/);
|
|
20987
|
+
const unwrap = m && /^\d+\.\s/.test(unescapeGfm(m[1]));
|
|
20988
|
+
return stripCellTokens(unescapeGfm(unwrap ? m[1] : l)).trim();
|
|
20989
|
+
}).filter(Boolean);
|
|
20990
|
+
return applyCellEdit(table, scanTable, 0, 0, newLines, ctx, orig.raw, edited.raw, orig.lines.length);
|
|
20991
|
+
}
|
|
20992
|
+
if (table.cols === 1 && table.rows >= 2) {
|
|
20993
|
+
const replica = [];
|
|
20994
|
+
for (let r = 0; r < table.rows; r++) {
|
|
20995
|
+
const line = escapeGfm(sanitizeText(table.cells[r][0].text)).replace(/\n/g, " ");
|
|
20996
|
+
if (line) replica.push({ line, gridR: r });
|
|
20997
|
+
}
|
|
20998
|
+
if (replica.map((x) => x.line).join("\n") !== orig.lines.join("\n")) return skip("\uD45C \uC88C\uD45C \uC7AC\uD604 \uBD88\uC77C\uCE58 \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
|
|
20999
|
+
if (edited.lines.length !== replica.length) return skip("\uD45C \uD589 \uCD94\uAC00/\uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0 (\uD45C \uAD6C\uC870 \uBCC0\uACBD)");
|
|
21000
|
+
let applied = 0;
|
|
21001
|
+
for (let i = 0; i < replica.length; i++) {
|
|
21002
|
+
if (replica[i].line === edited.lines[i]) continue;
|
|
21003
|
+
if (extractCellTokens(replica[i].line) !== extractCellTokens(edited.lines[i])) {
|
|
21004
|
+
skip("\uC140 \uB0B4 \uC774\uBBF8\uC9C0 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
|
|
21005
|
+
continue;
|
|
21006
|
+
}
|
|
21007
|
+
const newLines = [stripCellTokens(unescapeGfm(edited.lines[i])).trim()].filter(Boolean);
|
|
21008
|
+
applied += applyCellEdit(table, scanTable, replica[i].gridR, 0, newLines, ctx, replica[i].line, edited.lines[i], 1);
|
|
21009
|
+
}
|
|
21010
|
+
return applied;
|
|
21011
|
+
}
|
|
21012
|
+
return skip("\uD45C \uB80C\uB354 \uACBD\uB85C \uC2DD\uBCC4 \uC2E4\uD328");
|
|
21013
|
+
}
|
|
21014
|
+
function applyCellEdit(table, scanTable, gridR, gridC, newLines, ctx, before, after, origLineCount) {
|
|
21015
|
+
const skip = (reason) => {
|
|
21016
|
+
ctx.skipped.push({ reason, before: summarize(before), after: summarize(after) });
|
|
21017
|
+
return 0;
|
|
21018
|
+
};
|
|
21019
|
+
const cell = scanTable.cellByAnchor.get(`${gridR},${gridC}`);
|
|
21020
|
+
if (!cell) return skip("\uC140 \uC88C\uD45C \uB9E4\uD551 \uC2E4\uD328 (\uBCD1\uD569 \uC601\uC5ED\uC758 \uBE48 \uCE78\uC774\uAC70\uB098 \uC88C\uD45C \uBD88\uC77C\uCE58)");
|
|
21021
|
+
const irCell = table.cells[gridR]?.[gridC];
|
|
21022
|
+
const scanJoined = cell.paragraphs.map((p) => p.text).filter((t) => normForMatch(t)).join("\n");
|
|
21023
|
+
if (irCell && normForMatch(scanJoined) !== normForMatch(stripCellTokens(irCell.text))) {
|
|
21024
|
+
if (normForMatch(irCell.text) !== "" || normForMatch(scanJoined) !== "") {
|
|
21025
|
+
const flatBlocks = (irCell.blocks ?? []).filter((b) => b.type === "paragraph" || b.type === "heading");
|
|
21026
|
+
const flatJoined = flatBlocks.map((b) => b.text ?? "").join("\n");
|
|
21027
|
+
if (normForMatch(scanJoined) !== normForMatch(flatJoined)) {
|
|
21028
|
+
return skip("\uC140 \uCF58\uD150\uCE20 \uAD6C\uC870 \uBCF5\uC7A1 (\uC911\uCCA9\uD45C/\uAE00\uC0C1\uC790) \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
|
|
21029
|
+
}
|
|
21030
|
+
}
|
|
21031
|
+
}
|
|
21032
|
+
const nonEmpty = cell.paragraphs.filter((p) => normForMatch(p.text) !== "");
|
|
21033
|
+
if (origLineCount !== void 0 && nonEmpty.length > 0 && origLineCount !== nonEmpty.length) {
|
|
21034
|
+
return skip("\uC140 \uC904 \uACBD\uACC4 \uB9E4\uD551 \uBAA8\uD638 (\uB9AC\uD130\uB7F4 <br>/\uBB38\uB2E8 \uB0B4 \uC904\uBC14\uAFC8) \u2014 \uBBF8\uC9C0\uC6D0");
|
|
21035
|
+
}
|
|
21036
|
+
const splices = [];
|
|
21037
|
+
let sectionIndex = -1;
|
|
21038
|
+
const unstable = newLines.find((l) => sanitizeText(l) !== l);
|
|
21039
|
+
if (unstable !== void 0) return skip("\uACF5\uBC31 \uC815\uADDC\uD654 \uBD88\uC548\uC815 \uD14D\uC2A4\uD2B8 \u2014 \uD328\uCE58 \uC2DC \uC6D0\uBB38 \uBCF4\uC874 \uBD88\uAC00\uB85C \uBBF8\uC9C0\uC6D0");
|
|
21040
|
+
if (nonEmpty.length === 0) {
|
|
21041
|
+
if (newLines.length === 0) return 0;
|
|
21042
|
+
const target = cell.paragraphs[0];
|
|
21043
|
+
if (!target) return skip("\uBE48 \uC140\uC5D0 \uBB38\uB2E8\uC774 \uC5C6\uC5B4 \uD14D\uC2A4\uD2B8 \uC0BD\uC785 \uBD88\uAC00");
|
|
21044
|
+
const sp = buildParagraphSplices(target, newLines.join(" "), ctx.scans[target.sectionIndex]?.xml);
|
|
21045
|
+
if (sp === null) return skip("\uC140 \uBB38\uB2E8\uC5D0 \uD14D\uC2A4\uD2B8 \uB178\uB4DC\uB97C \uB9CC\uB4E4 \uC218 \uC5C6\uC74C");
|
|
21046
|
+
splices.push(...sp);
|
|
21047
|
+
sectionIndex = target.sectionIndex;
|
|
21048
|
+
if (newLines.length > 1) {
|
|
21049
|
+
ctx.skipped.push({ reason: "\uC140 \uB0B4 \uC904 \uCD94\uAC00\uB294 \uBB38\uB2E8 \uC0DD\uC131 \uBBF8\uC9C0\uC6D0 \u2014 \uD55C \uBB38\uB2E8\uC73C\uB85C \uBCD1\uD569 \uC801\uC6A9", after: summarize(after) });
|
|
21050
|
+
}
|
|
21051
|
+
} else {
|
|
21052
|
+
const assigned = [];
|
|
21053
|
+
for (let i = 0; i < nonEmpty.length; i++) {
|
|
21054
|
+
if (i < newLines.length) {
|
|
21055
|
+
assigned.push(i === nonEmpty.length - 1 && newLines.length > nonEmpty.length ? newLines.slice(i).join(" ") : newLines[i]);
|
|
21056
|
+
} else {
|
|
21057
|
+
assigned.push("");
|
|
21058
|
+
}
|
|
21059
|
+
}
|
|
21060
|
+
if (newLines.length > nonEmpty.length) {
|
|
21061
|
+
ctx.skipped.push({ reason: "\uC140 \uB0B4 \uC904 \uCD94\uAC00\uB294 \uBB38\uB2E8 \uC0DD\uC131 \uBBF8\uC9C0\uC6D0 \u2014 \uB9C8\uC9C0\uB9C9 \uBB38\uB2E8\uC5D0 \uBCD1\uD569 \uC801\uC6A9", after: summarize(after) });
|
|
21062
|
+
}
|
|
21063
|
+
for (let i = 0; i < nonEmpty.length; i++) {
|
|
21064
|
+
if (assigned[i] === nonEmpty[i].text || normForMatch(assigned[i]) === normForMatch(nonEmpty[i].text)) continue;
|
|
21065
|
+
const sp = buildParagraphSplices(nonEmpty[i], assigned[i], ctx.scans[nonEmpty[i].sectionIndex]?.xml);
|
|
21066
|
+
if (sp === null) return skip("\uC140 \uBB38\uB2E8\uC5D0 \uD14D\uC2A4\uD2B8 \uB178\uB4DC\uB97C \uB9CC\uB4E4 \uC218 \uC5C6\uC74C");
|
|
21067
|
+
splices.push(...sp);
|
|
21068
|
+
sectionIndex = nonEmpty[i].sectionIndex;
|
|
21069
|
+
}
|
|
21070
|
+
}
|
|
21071
|
+
if (splices.length === 0) return 0;
|
|
21072
|
+
ctx.sectionSplices[sectionIndex].push(...splices);
|
|
21073
|
+
return 1;
|
|
21074
|
+
}
|
|
21075
|
+
|
|
21076
|
+
// src/roundtrip/patcher.ts
|
|
21077
|
+
async function patchHwpx(original, editedMarkdown, options) {
|
|
21078
|
+
const skipped = [];
|
|
21079
|
+
let applied = 0;
|
|
21080
|
+
let origBlocks;
|
|
21081
|
+
try {
|
|
21082
|
+
const parsed = await parseHwpxDocument(u8ToArrayBuffer(original));
|
|
21083
|
+
origBlocks = parsed.blocks;
|
|
21084
|
+
} catch (err) {
|
|
21085
|
+
return { success: false, applied: 0, skipped, error: `\uC6D0\uBCF8 HWPX \uD30C\uC2F1 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}` };
|
|
21086
|
+
}
|
|
21087
|
+
let zip;
|
|
21088
|
+
try {
|
|
21089
|
+
zip = await JSZip7.loadAsync(original);
|
|
21090
|
+
} catch {
|
|
21091
|
+
return { success: false, applied: 0, skipped, error: "ZIP \uB85C\uB4DC \uC2E4\uD328" };
|
|
21092
|
+
}
|
|
21093
|
+
const sectionPaths = await resolveSectionEntryNames(zip);
|
|
21094
|
+
if (sectionPaths.length === 0) {
|
|
21095
|
+
return { success: false, applied: 0, skipped, error: "HWPX \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4" };
|
|
21096
|
+
}
|
|
21097
|
+
const scans = [];
|
|
21098
|
+
for (let i = 0; i < sectionPaths.length; i++) {
|
|
21099
|
+
const xml = await zip.file(sectionPaths[i]).async("text");
|
|
21100
|
+
scans.push(scanSectionXml(xml, i));
|
|
21101
|
+
}
|
|
21102
|
+
const origUnits = buildOrigUnits(origBlocks);
|
|
21103
|
+
const editedUnits = splitMarkdownUnits(editedMarkdown);
|
|
21104
|
+
const pairs = alignUnits(origUnits.map((u) => u.raw), editedUnits.map((u) => u.raw));
|
|
21105
|
+
const paraMap = resolveParagraphMappings(origBlocks, scans);
|
|
21106
|
+
const scanTables = scans.flatMap((s) => s.tables.filter((t) => t.rows.length > 0));
|
|
21107
|
+
const obTableOrdinals = buildTableOrdinals(origBlocks);
|
|
21108
|
+
const sectionSplices = scans.map(() => []);
|
|
21109
|
+
for (const [oi, ei] of pairs) {
|
|
21110
|
+
if (oi !== null && ei !== null) {
|
|
21111
|
+
const orig = origUnits[oi];
|
|
21112
|
+
const edited = editedUnits[ei];
|
|
21113
|
+
if (orig.raw === edited.raw) continue;
|
|
21114
|
+
applied += handleModifiedUnit(orig, edited, {
|
|
21115
|
+
origBlocks,
|
|
21116
|
+
paraMap,
|
|
21117
|
+
scans,
|
|
21118
|
+
scanTables,
|
|
21119
|
+
obTableOrdinals,
|
|
21120
|
+
sectionSplices,
|
|
21121
|
+
skipped
|
|
21122
|
+
});
|
|
21123
|
+
} else if (oi !== null) {
|
|
21124
|
+
skipped.push({ reason: "\uBE14\uB85D \uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0 (v1) \u2014 \uC6D0\uBCF8 \uC720\uC9C0", before: summarize(origUnits[oi].raw) });
|
|
21125
|
+
} else if (ei !== null) {
|
|
21126
|
+
skipped.push({ reason: "\uBE14\uB85D \uCD94\uAC00\uB294 \uBBF8\uC9C0\uC6D0 (v1)", after: summarize(editedUnits[ei].raw) });
|
|
21127
|
+
}
|
|
21128
|
+
}
|
|
21129
|
+
const replacements = /* @__PURE__ */ new Map();
|
|
21130
|
+
const encoder = new TextEncoder();
|
|
21131
|
+
try {
|
|
21132
|
+
for (let i = 0; i < scans.length; i++) {
|
|
21133
|
+
if (sectionSplices[i].length === 0) continue;
|
|
21134
|
+
const newXml = applySplices(scans[i].xml, sectionSplices[i]);
|
|
21135
|
+
replacements.set(sectionPaths[i], encoder.encode(newXml));
|
|
21136
|
+
}
|
|
21137
|
+
} catch (err) {
|
|
21138
|
+
return { success: false, applied: 0, skipped, error: `\uC18C\uC2A4\uB9F5 splice \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}` };
|
|
21139
|
+
}
|
|
21140
|
+
let data;
|
|
21141
|
+
if (replacements.size === 0) {
|
|
21142
|
+
data = new Uint8Array(original);
|
|
21143
|
+
} else {
|
|
21144
|
+
try {
|
|
21145
|
+
data = patchZipEntries(original, replacements);
|
|
21146
|
+
} catch (err) {
|
|
21147
|
+
return { success: false, applied: 0, skipped, error: `ZIP \uC7AC\uC870\uB9BD \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}` };
|
|
21148
|
+
}
|
|
21149
|
+
}
|
|
21150
|
+
let verification;
|
|
21151
|
+
if (options?.verify !== false) {
|
|
21152
|
+
try {
|
|
21153
|
+
const reparsed = await parseHwpxDocument(u8ToArrayBuffer(data));
|
|
21154
|
+
verification = diffUnitLists(splitMarkdownUnits(reparsed.markdown), editedUnits);
|
|
21155
|
+
} catch (err) {
|
|
21156
|
+
return { success: false, applied, skipped, error: `\uD328\uCE58\uBCF8 \uC7AC\uD30C\uC2F1 \uC2E4\uD328 \u2014 \uD328\uCE58 \uC911\uB2E8: ${err instanceof Error ? err.message : String(err)}` };
|
|
21157
|
+
}
|
|
21158
|
+
}
|
|
21159
|
+
return { success: true, data, applied, skipped, verification };
|
|
21160
|
+
}
|
|
21161
|
+
function buildOrigUnits(blocks) {
|
|
21162
|
+
const units = [];
|
|
21163
|
+
for (let i = 0; i < blocks.length; i++) {
|
|
21164
|
+
const block = blocks[i];
|
|
21165
|
+
let consume = 1;
|
|
21166
|
+
let chunk;
|
|
21167
|
+
if (block.type === "paragraph" && block.text && /^\[별표\s*\d+/.test(sanitizeText(block.text))) {
|
|
21168
|
+
const next = blocks[i + 1];
|
|
21169
|
+
if (next?.type === "paragraph" && next.text && /관련\)?$/.test(next.text)) consume = 2;
|
|
21170
|
+
chunk = blocksToMarkdown(blocks.slice(i, i + consume));
|
|
21171
|
+
} else {
|
|
21172
|
+
chunk = blocksToMarkdown([block]);
|
|
21173
|
+
}
|
|
21174
|
+
if (chunk) {
|
|
21175
|
+
const subUnits = splitMarkdownUnits(chunk);
|
|
21176
|
+
const isFragment = consume === 2 || (block.type === "paragraph" || block.type === "heading") && subUnits.length > 1;
|
|
21177
|
+
for (let s = 0; s < subUnits.length; s++) {
|
|
21178
|
+
const u = { ...subUnits[s], blockIdx: i, fragment: isFragment || void 0 };
|
|
21179
|
+
if (block.type === "table" && block.table?.caption && s === 0 && subUnits.length > 1 && u.kind === "text" && u.raw.startsWith("**")) {
|
|
21180
|
+
u.role = "caption";
|
|
21181
|
+
}
|
|
21182
|
+
units.push(u);
|
|
21183
|
+
}
|
|
21184
|
+
}
|
|
21185
|
+
i += consume - 1;
|
|
21186
|
+
}
|
|
21187
|
+
return units;
|
|
21188
|
+
}
|
|
21189
|
+
function buildTableOrdinals(blocks) {
|
|
21190
|
+
const map = /* @__PURE__ */ new Map();
|
|
21191
|
+
let ordinal = 0;
|
|
21192
|
+
for (let i = 0; i < blocks.length; i++) {
|
|
21193
|
+
if (blocks[i].type === "table" && blocks[i].table) map.set(i, ordinal++);
|
|
21194
|
+
}
|
|
21195
|
+
return map;
|
|
21196
|
+
}
|
|
21197
|
+
function alignUnits(a, b) {
|
|
21198
|
+
const m = a.length, n = b.length;
|
|
21199
|
+
if (m * n > 4e6) {
|
|
21200
|
+
const result2 = [];
|
|
21201
|
+
let pre = 0;
|
|
21202
|
+
while (pre < m && pre < n && a[pre] === b[pre]) {
|
|
21203
|
+
result2.push([pre, pre]);
|
|
21204
|
+
pre++;
|
|
21205
|
+
}
|
|
21206
|
+
let suf = 0;
|
|
21207
|
+
while (suf < m - pre && suf < n - pre && a[m - 1 - suf] === b[n - 1 - suf]) suf++;
|
|
21208
|
+
const aMid = m - pre - suf, bMid = n - pre - suf;
|
|
21209
|
+
if (aMid === bMid) {
|
|
21210
|
+
for (let i2 = 0; i2 < aMid; i2++) result2.push([pre + i2, pre + i2]);
|
|
21211
|
+
} else {
|
|
21212
|
+
for (let i2 = 0; i2 < aMid; i2++) result2.push([pre + i2, null]);
|
|
21213
|
+
for (let j2 = 0; j2 < bMid; j2++) result2.push([null, pre + j2]);
|
|
21214
|
+
}
|
|
21215
|
+
for (let s = suf - 1; s >= 0; s--) result2.push([m - 1 - s, n - 1 - s]);
|
|
21216
|
+
return result2;
|
|
21217
|
+
}
|
|
21218
|
+
const dp = Array.from({ length: m + 1 }, () => new Int32Array(n + 1));
|
|
21219
|
+
for (let i2 = 1; i2 <= m; i2++) {
|
|
21220
|
+
for (let j2 = 1; j2 <= n; j2++) {
|
|
21221
|
+
dp[i2][j2] = a[i2 - 1] === b[j2 - 1] ? dp[i2 - 1][j2 - 1] + 1 : Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
|
|
21222
|
+
}
|
|
21223
|
+
}
|
|
21224
|
+
const matches = [];
|
|
21225
|
+
let i = m, j = n;
|
|
21226
|
+
while (i > 0 && j > 0) {
|
|
21227
|
+
if (a[i - 1] === b[j - 1] && dp[i][j] === dp[i - 1][j - 1] + 1) {
|
|
21228
|
+
matches.push([i - 1, j - 1]);
|
|
21229
|
+
i--;
|
|
21230
|
+
j--;
|
|
21231
|
+
} else if (dp[i - 1][j] >= dp[i][j - 1]) i--;
|
|
21232
|
+
else j--;
|
|
21233
|
+
}
|
|
21234
|
+
matches.reverse();
|
|
21235
|
+
const result = [];
|
|
21236
|
+
let ai = 0, bi = 0;
|
|
21237
|
+
const flushGap = (aEnd, bEnd) => {
|
|
21238
|
+
if (aEnd - ai === bEnd - bi) {
|
|
21239
|
+
while (ai < aEnd) result.push([ai++, bi++]);
|
|
21240
|
+
return;
|
|
21241
|
+
}
|
|
21242
|
+
while (ai < aEnd && bi < bEnd) {
|
|
21243
|
+
const sim = normalizedSimilarity(a[ai], b[bi]);
|
|
21244
|
+
if (sim >= 0.4) {
|
|
21245
|
+
if (aEnd - ai > bEnd - bi && bestSimInRange(a, ai + 1, ai + (aEnd - ai) - (bEnd - bi), b[bi]) > sim) {
|
|
21246
|
+
result.push([ai++, null]);
|
|
21247
|
+
} else if (bEnd - bi > aEnd - ai && bestSimInRange(b, bi + 1, bi + (bEnd - bi) - (aEnd - ai), a[ai]) > sim) {
|
|
21248
|
+
result.push([null, bi++]);
|
|
21249
|
+
} else {
|
|
21250
|
+
result.push([ai++, bi++]);
|
|
21251
|
+
}
|
|
21252
|
+
} else if (aEnd - ai >= bEnd - bi) result.push([ai++, null]);
|
|
21253
|
+
else result.push([null, bi++]);
|
|
21254
|
+
}
|
|
21255
|
+
while (ai < aEnd) result.push([ai++, null]);
|
|
21256
|
+
while (bi < bEnd) result.push([null, bi++]);
|
|
21257
|
+
};
|
|
21258
|
+
for (const [pi, pj] of matches) {
|
|
21259
|
+
flushGap(pi, pj);
|
|
21260
|
+
result.push([ai++, bi++]);
|
|
21261
|
+
}
|
|
21262
|
+
flushGap(m, n);
|
|
21263
|
+
return result;
|
|
21264
|
+
}
|
|
21265
|
+
function bestSimInRange(arr, from, to, target) {
|
|
21266
|
+
let best = 0;
|
|
21267
|
+
for (let k = from; k <= to && k < arr.length; k++) {
|
|
21268
|
+
const s = normalizedSimilarity(arr[k], target);
|
|
21269
|
+
if (s > best) best = s;
|
|
21270
|
+
}
|
|
21271
|
+
return best;
|
|
21272
|
+
}
|
|
21273
|
+
function resolveParagraphMappings(blocks, scans) {
|
|
21274
|
+
const buckets = /* @__PURE__ */ new Map();
|
|
21275
|
+
for (const scan of scans) {
|
|
21276
|
+
for (const para of scan.bodyParagraphs) {
|
|
21277
|
+
const key = normForMatch(para.text);
|
|
21278
|
+
if (!key) continue;
|
|
21279
|
+
let list = buckets.get(key);
|
|
21280
|
+
if (!list) buckets.set(key, list = []);
|
|
21281
|
+
list.push(para);
|
|
21282
|
+
}
|
|
21283
|
+
}
|
|
21284
|
+
const headerNorms = new Set(scans.flatMap((s) => s.headerTexts.map(normForMatch)).filter(Boolean));
|
|
21285
|
+
const footerNorms = new Set(scans.flatMap((s) => s.footerTexts.map(normForMatch)).filter(Boolean));
|
|
21286
|
+
const pageText = /* @__PURE__ */ new Set();
|
|
21287
|
+
for (let i = 0; i < blocks.length; i++) {
|
|
21288
|
+
const b = blocks[i];
|
|
21289
|
+
if (b.type !== "paragraph" && b.type !== "heading" || !b.text || !headerNorms.has(normForMatch(b.text))) break;
|
|
21290
|
+
pageText.add(i);
|
|
21291
|
+
}
|
|
21292
|
+
for (let i = blocks.length - 1; i >= 0; i--) {
|
|
21293
|
+
const b = blocks[i];
|
|
21294
|
+
if (b.type !== "paragraph" && b.type !== "heading" || !b.text || !footerNorms.has(normForMatch(b.text))) break;
|
|
21295
|
+
pageText.add(i);
|
|
21296
|
+
}
|
|
21297
|
+
const counters = /* @__PURE__ */ new Map();
|
|
21298
|
+
const result = /* @__PURE__ */ new Map();
|
|
21299
|
+
for (let i = 0; i < blocks.length; i++) {
|
|
21300
|
+
const b = blocks[i];
|
|
21301
|
+
if (b.type !== "paragraph" && b.type !== "heading" || !b.text) continue;
|
|
21302
|
+
if (pageText.has(i)) {
|
|
21303
|
+
result.set(i, {});
|
|
21304
|
+
continue;
|
|
21305
|
+
}
|
|
21306
|
+
let key = normForMatch(b.text);
|
|
21307
|
+
let prefixStripped = false;
|
|
21308
|
+
if (!buckets.has(key)) {
|
|
21309
|
+
const sp = b.text.indexOf(" ");
|
|
21310
|
+
if (sp > 0) {
|
|
21311
|
+
const alt = normForMatch(b.text.slice(sp + 1));
|
|
21312
|
+
if (alt && buckets.has(alt)) {
|
|
21313
|
+
key = alt;
|
|
21314
|
+
prefixStripped = true;
|
|
21315
|
+
}
|
|
21316
|
+
}
|
|
21317
|
+
}
|
|
21318
|
+
const list = buckets.get(key);
|
|
21319
|
+
if (!list) {
|
|
21320
|
+
result.set(i, {});
|
|
21321
|
+
continue;
|
|
21322
|
+
}
|
|
21323
|
+
const occ = counters.get(key) ?? 0;
|
|
21324
|
+
counters.set(key, occ + 1);
|
|
21325
|
+
result.set(i, occ < list.length ? { para: list[occ], prefixStripped } : {});
|
|
21326
|
+
}
|
|
21327
|
+
return result;
|
|
21328
|
+
}
|
|
21329
|
+
function handleModifiedUnit(orig, edited, ctx) {
|
|
21330
|
+
const block = ctx.origBlocks[orig.blockIdx];
|
|
21331
|
+
const skip = (reason) => {
|
|
21332
|
+
ctx.skipped.push({ reason, before: summarize(orig.raw), after: summarize(edited.raw) });
|
|
21333
|
+
return 0;
|
|
21334
|
+
};
|
|
21335
|
+
if (orig.role === "caption") return skip("\uD45C \uCEA1\uC158 \uC218\uC815\uC740 \uBBF8\uC9C0\uC6D0 (v1)");
|
|
21336
|
+
if (orig.kind === "separator" || orig.kind === "image") return skip("\uC774\uBBF8\uC9C0/\uAD6C\uBD84\uC120 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
|
|
21337
|
+
if (!block) return skip("\uBE14\uB85D \uB9E4\uD551 \uC2E4\uD328");
|
|
21338
|
+
if (orig.fragment) return skip("\uBB38\uB2E8 \uBD84\uC808(\uAC15\uC81C \uC904\uBC14\uAFC8/\uBCD1\uD569 \uC720\uB2DB) \u2014 \uBD80\uBD84 \uC218\uC815\uC740 \uBBF8\uC9C0\uC6D0 (v1)");
|
|
21339
|
+
if (block.type === "table" && block.table) {
|
|
21340
|
+
if (orig.kind !== edited.kind) return skip("\uD45C \u2194 \uBE44\uD45C \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0 (\uD45C \uAD6C\uC870 \uBCC0\uACBD)");
|
|
21341
|
+
if (ctx.obTableOrdinals.size !== ctx.scanTables.length) return skip("\uD45C \uAC1C\uC218 \uBD88\uC77C\uCE58 \u2014 \uC18C\uC2A4\uB9F5 \uC2E0\uB8B0 \uBD88\uAC00");
|
|
21342
|
+
const ordinal = ctx.obTableOrdinals.get(orig.blockIdx);
|
|
21343
|
+
const scanTable = ordinal !== void 0 ? ctx.scanTables[ordinal] : void 0;
|
|
21344
|
+
if (!scanTable) return skip("\uD45C \uC18C\uC2A4\uB9F5 \uB9E4\uD551 \uC2E4\uD328");
|
|
21345
|
+
if (orig.kind === "gfm-table") return patchGfmTable(block.table, scanTable, orig, edited, ctx, skip);
|
|
21346
|
+
if (orig.kind === "html-table") return patchHtmlTable(block.table, scanTable, orig, edited, ctx, skip);
|
|
21347
|
+
return patchTextChunkTable(block.table, scanTable, orig, edited, ctx, skip);
|
|
21348
|
+
}
|
|
21349
|
+
if ((block.type === "paragraph" || block.type === "heading") && orig.kind === "text" && edited.kind === "text") {
|
|
21350
|
+
return patchParagraphUnit(block, orig, edited, ctx, skip);
|
|
21351
|
+
}
|
|
21352
|
+
return skip("\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uBE14\uB85D \uC720\uD615 \uBCC0\uACBD");
|
|
21353
|
+
}
|
|
21354
|
+
function patchParagraphUnit(block, orig, edited, ctx, skip) {
|
|
21355
|
+
const mapping = ctx.paraMap.get(orig.blockIdx);
|
|
21356
|
+
if (!mapping?.para) return skip("\uBB38\uB2E8 \uC18C\uC2A4\uB9F5 \uB9E4\uD551 \uC2E4\uD328 (\uBA38\uB9AC\uB9D0/\uAE00\uC0C1\uC790/\uCEA1\uC158 \uC601\uC5ED\uC774\uAC70\uB098 \uD14D\uC2A4\uD2B8 \uBD88\uC77C\uCE58)");
|
|
21357
|
+
if (block.text && block.text.includes("\n")) {
|
|
21358
|
+
return skip("\uBB38\uB2E8 \uB0B4 \uAC15\uC81C \uC904\uBC14\uAFC8 \uD3EC\uD568 \u2014 \uC218\uC815 \uC2DC \uC904\uBC14\uAFC8 \uBCF4\uC874 \uBD88\uAC00\uB85C \uBBF8\uC9C0\uC6D0 (v1)");
|
|
21359
|
+
}
|
|
21360
|
+
const origPlain = textUnitToPlain(orig.raw, block);
|
|
21361
|
+
let newPlain = textUnitToPlain(edited.raw, block);
|
|
21362
|
+
if (block.footnoteText) {
|
|
21363
|
+
const noteMatch = newPlain.match(/\s*\(주: ([\s\S]*)\)$/);
|
|
21364
|
+
if (noteMatch) {
|
|
21365
|
+
newPlain = newPlain.slice(0, noteMatch.index).trimEnd();
|
|
21366
|
+
if (normForMatch(noteMatch[1]) !== normForMatch(block.footnoteText)) {
|
|
21367
|
+
ctx.skipped.push({ reason: "\uAC01\uC8FC \uD14D\uC2A4\uD2B8 \uC218\uC815\uC740 \uBBF8\uC9C0\uC6D0 \u2014 \uBCF8\uBB38\uB9CC \uC801\uC6A9", before: block.footnoteText, after: noteMatch[1] });
|
|
21368
|
+
}
|
|
21369
|
+
} else {
|
|
21370
|
+
ctx.skipped.push({ reason: "\uAC01\uC8FC \uD45C\uAE30 \uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0 \u2014 \uAC01\uC8FC \uC720\uC9C0, \uBCF8\uBB38\uB9CC \uC801\uC6A9", before: `(\uC8FC: ${block.footnoteText})` });
|
|
21371
|
+
}
|
|
21372
|
+
}
|
|
21373
|
+
if (mapping.prefixStripped) {
|
|
21374
|
+
const origPrefix = block.text.split(" ", 1)[0];
|
|
21375
|
+
const sp = newPlain.indexOf(" ");
|
|
21376
|
+
const newFirst = sp > 0 ? newPlain.slice(0, sp) : newPlain;
|
|
21377
|
+
if (newFirst === origPrefix || /^(?:[0-90-9a-zA-Z가-힣]{1,6}[.)\]:]|[([][0-90-9a-zA-Z가-힣]{1,6}[)\]][.:]?|[ⅰ-ⅹⅠ-Ⅹ①-⑮][.)\]:]?)$/u.test(newFirst)) {
|
|
21378
|
+
newPlain = sp > 0 ? newPlain.slice(sp + 1) : "";
|
|
21379
|
+
} else {
|
|
21380
|
+
ctx.skipped.push({ reason: "\uC790\uB3D9\uBC88\uD638 \uC811\uB450 \uC2DD\uBCC4 \uC2E4\uD328 \u2014 \uBC88\uD638 \uD3EC\uD568 \uD14D\uC2A4\uD2B8\uB85C \uC801\uC6A9 (\uBDF0\uC5B4\uC5D0\uC11C \uC911\uBCF5 \uD45C\uC2DC \uAC00\uB2A5)", after: summarize(newPlain) });
|
|
21381
|
+
}
|
|
21382
|
+
}
|
|
21383
|
+
if (newPlain === origPlain) return skip("\uD14D\uC2A4\uD2B8 \uC678 \uBCC0\uACBD(\uD5E4\uB529 \uB808\uBCA8/\uC11C\uC2DD)\uB9CC \uAC10\uC9C0 \u2014 \uC2A4\uD0C0\uC77C \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
|
|
21384
|
+
if (sanitizeText(newPlain) !== newPlain) {
|
|
21385
|
+
return skip("\uACF5\uBC31 \uC815\uADDC\uD654 \uBD88\uC548\uC815 \uD14D\uC2A4\uD2B8 \u2014 \uD328\uCE58 \uC2DC \uC6D0\uBB38 \uBCF4\uC874 \uBD88\uAC00\uB85C \uBBF8\uC9C0\uC6D0");
|
|
21386
|
+
}
|
|
21387
|
+
const splices = buildParagraphSplices(mapping.para, newPlain, ctx.scans[mapping.para.sectionIndex]?.xml);
|
|
21388
|
+
if (splices === null) return skip("\uBB38\uB2E8\uC5D0 \uD14D\uC2A4\uD2B8 \uB178\uB4DC\uB97C \uB9CC\uB4E4 \uC218 \uC5C6\uC74C");
|
|
21389
|
+
ctx.sectionSplices[mapping.para.sectionIndex].push(...splices);
|
|
21390
|
+
return 1;
|
|
21391
|
+
}
|
|
21392
|
+
function textUnitToPlain(raw, block) {
|
|
21393
|
+
let text = raw.split("\n").map((l) => l.trim()).filter(Boolean).join(" ");
|
|
21394
|
+
if (block.type === "heading" || block.text && /^\[별표\s*\d+/.test(sanitizeText(block.text))) {
|
|
21395
|
+
text = text.replace(/^#{1,6}\s+/, "");
|
|
21396
|
+
}
|
|
21397
|
+
if (block.href) {
|
|
21398
|
+
const linkMatch = text.match(/^\[([\s\S]*)\]\([^)]*\)$/);
|
|
21399
|
+
if (linkMatch) text = linkMatch[1];
|
|
21400
|
+
}
|
|
21401
|
+
if (/^\*[^*][\s\S]*\*$/.test(text) && block.text && /^\([^)]*조[^)]*관련\)$/.test(sanitizeText(block.text))) {
|
|
21402
|
+
text = text.slice(1, -1);
|
|
21403
|
+
}
|
|
21404
|
+
return unescapeGfm(text);
|
|
21405
|
+
}
|
|
21406
|
+
function diffUnitLists(a, b) {
|
|
21407
|
+
const pairs = alignUnits(a.map((u) => u.raw), b.map((u) => u.raw));
|
|
21408
|
+
const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
|
|
21409
|
+
const diffs = [];
|
|
21410
|
+
for (const [ai, bi] of pairs) {
|
|
21411
|
+
if (ai !== null && bi !== null) {
|
|
21412
|
+
if (a[ai].raw === b[bi].raw) {
|
|
21413
|
+
stats.unchanged++;
|
|
21414
|
+
continue;
|
|
21415
|
+
}
|
|
21416
|
+
stats.modified++;
|
|
21417
|
+
diffs.push({ type: "modified", before: unitToBlock(a[ai]), after: unitToBlock(b[bi]), similarity: normalizedSimilarity(a[ai].raw, b[bi].raw) });
|
|
21418
|
+
} else if (ai !== null) {
|
|
21419
|
+
stats.removed++;
|
|
21420
|
+
diffs.push({ type: "removed", before: unitToBlock(a[ai]) });
|
|
21421
|
+
} else if (bi !== null) {
|
|
21422
|
+
stats.added++;
|
|
21423
|
+
diffs.push({ type: "added", after: unitToBlock(b[bi]) });
|
|
21424
|
+
}
|
|
21425
|
+
}
|
|
21426
|
+
return { stats, diffs };
|
|
21427
|
+
}
|
|
21428
|
+
function unitToBlock(u) {
|
|
21429
|
+
return { type: "paragraph", text: u.raw };
|
|
21430
|
+
}
|
|
21431
|
+
function u8ToArrayBuffer(u8) {
|
|
21432
|
+
return u8.buffer.slice(u8.byteOffset, u8.byteOffset + u8.byteLength);
|
|
21433
|
+
}
|
|
21434
|
+
async function resolveSectionEntryNames(zip) {
|
|
21435
|
+
for (const mp of ["Contents/content.hpf", "content.hpf"]) {
|
|
21436
|
+
const f = zip.file(mp);
|
|
21437
|
+
if (!f) continue;
|
|
21438
|
+
const xml = await f.async("text");
|
|
21439
|
+
const paths = sectionPathsFromManifest(xml).filter((p) => zip.file(p) !== null);
|
|
21440
|
+
if (paths.length > 0) return paths;
|
|
21441
|
+
}
|
|
21442
|
+
return Object.keys(zip.files).filter((n) => /[Ss]ection\d+\.xml$/.test(n)).sort();
|
|
21443
|
+
}
|
|
21444
|
+
function sectionPathsFromManifest(xml) {
|
|
21445
|
+
const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
|
|
21446
|
+
const attr = (tag, name) => {
|
|
21447
|
+
const m = tag.match(new RegExp(`(?:^|\\s)${name}\\s*=\\s*(?:"([^"]*)"|'([^']*)')`));
|
|
21448
|
+
return m ? m[1] ?? m[2] : "";
|
|
21449
|
+
};
|
|
21450
|
+
const idToHref = /* @__PURE__ */ new Map();
|
|
21451
|
+
for (const m of xml.matchAll(/<opf:item(\s(?:"[^"]*"|'[^']*'|[^>"'])*?)\/?>/g)) {
|
|
21452
|
+
const id = attr(m[1], "id");
|
|
21453
|
+
let href = attr(m[1], "href");
|
|
21454
|
+
const mediaType = attr(m[1], "media-type");
|
|
21455
|
+
if (!isSectionId(id) && !mediaType.includes("xml")) continue;
|
|
21456
|
+
if (!href.startsWith("/") && !href.startsWith("Contents/") && isSectionId(id)) href = "Contents/" + href;
|
|
21457
|
+
if (id) idToHref.set(id, href);
|
|
21458
|
+
}
|
|
21459
|
+
const ordered = [];
|
|
21460
|
+
for (const m of xml.matchAll(/<opf:itemref(\s(?:"[^"]*"|'[^']*'|[^>"'])*?)\/?>/g)) {
|
|
21461
|
+
const href = idToHref.get(attr(m[1], "idref"));
|
|
21462
|
+
if (href) ordered.push(href);
|
|
21463
|
+
}
|
|
21464
|
+
if (ordered.length > 0) return ordered;
|
|
21465
|
+
return Array.from(idToHref.entries()).filter(([id]) => isSectionId(id)).sort((a, b) => a[0].localeCompare(b[0])).map(([, href]) => href);
|
|
21466
|
+
}
|
|
21467
|
+
|
|
19044
21468
|
// src/print/renderer.ts
|
|
19045
21469
|
import { existsSync } from "fs";
|
|
19046
21470
|
import MarkdownIt from "markdown-it";
|
|
@@ -19281,7 +21705,7 @@ async function parseHwp(buffer, options) {
|
|
|
19281
21705
|
async function parsePdf(buffer, options) {
|
|
19282
21706
|
let parsePdfDocument;
|
|
19283
21707
|
try {
|
|
19284
|
-
const mod = await import("./parser-
|
|
21708
|
+
const mod = await import("./parser-4IVYHKSL.js");
|
|
19285
21709
|
parsePdfDocument = mod.parsePdfDocument;
|
|
19286
21710
|
} catch {
|
|
19287
21711
|
return {
|
|
@@ -19399,6 +21823,7 @@ export {
|
|
|
19399
21823
|
parsePdf,
|
|
19400
21824
|
parseXls,
|
|
19401
21825
|
parseXlsx,
|
|
21826
|
+
patchHwpx,
|
|
19402
21827
|
renderHtml
|
|
19403
21828
|
};
|
|
19404
21829
|
//# sourceMappingURL=index.js.map
|