kordoc 2.9.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +66 -8
  2. package/dist/-K5SLEFZD.js +71 -0
  3. package/dist/-K5SLEFZD.js.map +1 -0
  4. package/dist/{chunk-M24KMDAR.js → chunk-326STEDU.js} +6684 -4061
  5. package/dist/chunk-326STEDU.js.map +1 -0
  6. package/dist/{chunk-QB7CS534.cjs → chunk-3WRJQQIO.cjs} +185 -16
  7. package/dist/chunk-3WRJQQIO.cjs.map +1 -0
  8. package/dist/chunk-MUOQXDZ4.cjs.map +1 -1
  9. package/dist/{chunk-RXZLTACX.js → chunk-NHXKJWR7.js} +182 -13
  10. package/dist/chunk-NHXKJWR7.js.map +1 -0
  11. package/dist/{chunk-SJ5TPMBT.js → chunk-SA2PERJ5.js} +182 -13
  12. package/dist/chunk-SA2PERJ5.js.map +1 -0
  13. package/dist/cli.js +42 -3
  14. package/dist/cli.js.map +1 -1
  15. package/dist/formula-XGG6ZP42.cjs.map +1 -1
  16. package/dist/index.cjs +3247 -822
  17. package/dist/index.cjs.map +1 -1
  18. package/dist/index.d.cts +61 -2
  19. package/dist/index.d.ts +61 -2
  20. package/dist/index.js +3025 -600
  21. package/dist/index.js.map +1 -1
  22. package/dist/mcp.js +3 -3
  23. package/dist/page-range-3C7UGGEK.cjs.map +1 -1
  24. package/dist/{parser-OMPBVEFU.js → parser-4IVYHKSL.js} +677 -85
  25. package/dist/parser-4IVYHKSL.js.map +1 -0
  26. package/dist/{parser-EL5YETUA.cjs → parser-5KHU732L.cjs} +689 -97
  27. package/dist/parser-5KHU732L.cjs.map +1 -0
  28. package/dist/{parser-XBYGROQB.js → parser-AU2NLC44.js} +677 -85
  29. package/dist/parser-AU2NLC44.js.map +1 -0
  30. package/dist/provider-SNONEZNW.cjs.map +1 -1
  31. package/dist/{watch-ULLLK7ID.js → watch-5DDN4BUI.js} +3 -3
  32. package/package.json +1 -1
  33. package/dist/chunk-M24KMDAR.js.map +0 -1
  34. package/dist/chunk-QB7CS534.cjs.map +0 -1
  35. package/dist/chunk-RXZLTACX.js.map +0 -1
  36. package/dist/chunk-SJ5TPMBT.js.map +0 -1
  37. package/dist/parser-EL5YETUA.cjs.map +0 -1
  38. package/dist/parser-OMPBVEFU.js.map +0 -1
  39. package/dist/parser-XBYGROQB.js.map +0 -1
  40. /package/dist/{watch-ULLLK7ID.js.map → watch-5DDN4BUI.js.map} +0 -0
package/dist/index.js CHANGED
@@ -12,11 +12,12 @@ import {
12
12
  convertTableToText,
13
13
  flattenLayoutTables,
14
14
  isPathTraversal,
15
+ mapPuaText,
15
16
  precheckZipSize,
16
17
  sanitizeHref,
17
18
  stripDtd,
18
19
  toArrayBuffer
19
- } from "./chunk-RXZLTACX.js";
20
+ } from "./chunk-NHXKJWR7.js";
20
21
  import {
21
22
  parsePageRange
22
23
  } from "./chunk-SBVRCJFH.js";
@@ -818,6 +819,9 @@ function clampSpan(val, max) {
818
819
  return Math.max(1, Math.min(val, max));
819
820
  }
820
821
  var MAX_XML_DEPTH = 200;
822
+ function createSectionShared() {
823
+ return { numState: /* @__PURE__ */ new Map(), pageText: { headers: [], footers: [] }, track: { deleteDepth: 0, warned: false } };
824
+ }
821
825
  function createXmlParser(warnings) {
822
826
  return new DOMParser({
823
827
  onError(level, msg) {
@@ -829,7 +833,10 @@ function createXmlParser(warnings) {
829
833
  async function extractHwpxStyles(zip, decompressed) {
830
834
  const result = {
831
835
  charProperties: /* @__PURE__ */ new Map(),
832
- styles: /* @__PURE__ */ new Map()
836
+ styles: /* @__PURE__ */ new Map(),
837
+ numberings: /* @__PURE__ */ new Map(),
838
+ bullets: /* @__PURE__ */ new Map(),
839
+ paraHeadings: /* @__PURE__ */ new Map()
833
840
  };
834
841
  const headerPaths = ["Contents/header.xml", "header.xml", "Contents/head.xml", "head.xml"];
835
842
  for (const hp of headerPaths) {
@@ -847,6 +854,10 @@ async function extractHwpxStyles(zip, decompressed) {
847
854
  if (!doc.documentElement) continue;
848
855
  parseCharProperties(doc, result.charProperties);
849
856
  parseStyleElements(doc, result.styles);
857
+ const domDoc = doc;
858
+ parseNumberings(domDoc, result.numberings);
859
+ parseBullets(domDoc, result.bullets);
860
+ parseParaHeadings(domDoc, result.paraHeadings);
850
861
  break;
851
862
  } catch {
852
863
  continue;
@@ -904,6 +915,162 @@ function parseStyleElements(doc, map) {
904
915
  }
905
916
  }
906
917
  }
918
+ function parseNumberings(doc, map) {
919
+ const tagNames = ["hh:numbering", "numbering"];
920
+ for (const tagName of tagNames) {
921
+ const elements = doc.getElementsByTagName(tagName);
922
+ for (let i = 0; i < elements.length; i++) {
923
+ const el = elements[i];
924
+ const id = el.getAttribute("id") || "";
925
+ if (!id) continue;
926
+ const def = { heads: /* @__PURE__ */ new Map() };
927
+ const children = el.childNodes;
928
+ for (let j = 0; j < children.length; j++) {
929
+ const ch = children[j];
930
+ if (ch.nodeType !== 1) continue;
931
+ const tag = (ch.tagName || ch.localName || "").replace(/^[^:]+:/, "");
932
+ if (tag !== "paraHead") continue;
933
+ const level = parseInt(ch.getAttribute("level") || "", 10);
934
+ if (isNaN(level) || level < 1 || level > 10) continue;
935
+ const start = parseInt(ch.getAttribute("start") || "1", 10);
936
+ def.heads.set(level, {
937
+ numFormat: ch.getAttribute("numFormat") || "DIGIT",
938
+ text: ch.textContent || "",
939
+ start: isNaN(start) ? 1 : start
940
+ });
941
+ }
942
+ if (def.heads.size > 0) map.set(id, def);
943
+ }
944
+ if (map.size > 0) break;
945
+ }
946
+ }
947
+ function parseBullets(doc, map) {
948
+ const tagNames = ["hh:bullet", "bullet"];
949
+ for (const tagName of tagNames) {
950
+ const elements = doc.getElementsByTagName(tagName);
951
+ for (let i = 0; i < elements.length; i++) {
952
+ const el = elements[i];
953
+ const id = el.getAttribute("id") || "";
954
+ const char = el.getAttribute("char") || "";
955
+ if (id && char) map.set(id, char);
956
+ }
957
+ if (map.size > 0) break;
958
+ }
959
+ }
960
+ function parseParaHeadings(doc, map) {
961
+ const tagNames = ["hh:paraPr", "paraPr"];
962
+ for (const tagName of tagNames) {
963
+ const elements = doc.getElementsByTagName(tagName);
964
+ for (let i = 0; i < elements.length; i++) {
965
+ const el = elements[i];
966
+ const id = el.getAttribute("id") || "";
967
+ if (!id) continue;
968
+ const heading = findChildByLocalName(el, "heading");
969
+ if (!heading) continue;
970
+ const type = heading.getAttribute("type") || "NONE";
971
+ if (type !== "NUMBER" && type !== "BULLET" && type !== "OUTLINE") continue;
972
+ const level = parseInt(heading.getAttribute("level") || "0", 10);
973
+ map.set(id, {
974
+ type,
975
+ idRef: heading.getAttribute("idRef") || "0",
976
+ level: isNaN(level) ? 0 : Math.max(0, Math.min(level, 9))
977
+ });
978
+ }
979
+ if (map.size > 0) break;
980
+ }
981
+ }
982
+ var HANGUL_SYLLABLE_SEQ = "\uAC00\uB098\uB2E4\uB77C\uB9C8\uBC14\uC0AC\uC544\uC790\uCC28\uCE74\uD0C0\uD30C\uD558";
983
+ var HANGUL_JAMO_SEQ = "\u3131\u3134\u3137\u3139\u3141\u3142\u3145\u3147\u3148\u314A\u314B\u314C\u314D\u314E";
984
+ function toRoman(n) {
985
+ if (n <= 0 || n >= 4e3) return String(n);
986
+ const table = [
987
+ [1e3, "M"],
988
+ [900, "CM"],
989
+ [500, "D"],
990
+ [400, "CD"],
991
+ [100, "C"],
992
+ [90, "XC"],
993
+ [50, "L"],
994
+ [40, "XL"],
995
+ [10, "X"],
996
+ [9, "IX"],
997
+ [5, "V"],
998
+ [4, "IV"],
999
+ [1, "I"]
1000
+ ];
1001
+ let out = "";
1002
+ for (const [v, s] of table) {
1003
+ while (n >= v) {
1004
+ out += s;
1005
+ n -= v;
1006
+ }
1007
+ }
1008
+ return out;
1009
+ }
1010
+ function formatHeadNumber(n, numFormat) {
1011
+ if (n <= 0) n = 1;
1012
+ switch (numFormat) {
1013
+ case "DIGIT":
1014
+ return String(n);
1015
+ case "CIRCLED_DIGIT":
1016
+ return n <= 20 ? String.fromCodePoint(9312 + n - 1) : `(${n})`;
1017
+ case "HANGUL_SYLLABLE":
1018
+ return HANGUL_SYLLABLE_SEQ[(n - 1) % HANGUL_SYLLABLE_SEQ.length];
1019
+ case "CIRCLED_HANGUL_SYLLABLE":
1020
+ return n <= 14 ? String.fromCodePoint(12910 + n - 1) : HANGUL_SYLLABLE_SEQ[(n - 1) % 14];
1021
+ case "HANGUL_JAMO":
1022
+ return HANGUL_JAMO_SEQ[(n - 1) % HANGUL_JAMO_SEQ.length];
1023
+ case "CIRCLED_HANGUL_JAMO":
1024
+ return n <= 14 ? String.fromCodePoint(12896 + n - 1) : HANGUL_JAMO_SEQ[(n - 1) % 14];
1025
+ case "LATIN_CAPITAL":
1026
+ return String.fromCharCode(65 + (n - 1) % 26);
1027
+ case "LATIN_SMALL":
1028
+ return String.fromCharCode(97 + (n - 1) % 26);
1029
+ case "CIRCLED_LATIN_CAPITAL":
1030
+ return n <= 26 ? String.fromCodePoint(9398 + n - 1) : String.fromCharCode(65 + (n - 1) % 26);
1031
+ case "CIRCLED_LATIN_SMALL":
1032
+ return n <= 26 ? String.fromCodePoint(9424 + n - 1) : String.fromCharCode(97 + (n - 1) % 26);
1033
+ case "ROMAN_CAPITAL":
1034
+ return toRoman(n);
1035
+ case "ROMAN_SMALL":
1036
+ return toRoman(n).toLowerCase();
1037
+ default:
1038
+ return String(n);
1039
+ }
1040
+ }
1041
+ function resolveParaHeading(paraEl, ctx) {
1042
+ const sm = ctx.styleMap;
1043
+ if (!sm) return null;
1044
+ const prId = paraEl.getAttribute("paraPrIDRef");
1045
+ if (!prId) return null;
1046
+ const ref = sm.paraHeadings.get(prId);
1047
+ if (!ref) return null;
1048
+ if (ref.type === "BULLET") {
1049
+ const char = sm.bullets.get(ref.idRef);
1050
+ return char ? { prefix: char } : null;
1051
+ }
1052
+ const numId = ref.type === "OUTLINE" ? ctx.outlineNumId || "1" : ref.idRef;
1053
+ const level = Math.min(ref.level + 1, 10);
1054
+ const headingLevel = ref.type === "OUTLINE" ? Math.min(ref.level + 1, 6) : void 0;
1055
+ const numDef = sm.numberings.get(numId);
1056
+ if (!numDef) return headingLevel ? { headingLevel } : null;
1057
+ let counters = ctx.shared.numState.get(numId);
1058
+ if (!counters) {
1059
+ counters = new Array(11).fill(0);
1060
+ ctx.shared.numState.set(numId, counters);
1061
+ }
1062
+ const head = numDef.heads.get(level);
1063
+ counters[level] = counters[level] === 0 ? head?.start ?? 1 : counters[level] + 1;
1064
+ for (let l = level + 1; l <= 10; l++) counters[l] = 0;
1065
+ const fmtText = head?.text?.trim() || `^${level}.`;
1066
+ const prefix = fmtText.replace(/\^(10|[1-9])/g, (_, d) => {
1067
+ const lv = parseInt(d, 10);
1068
+ const refHead = numDef.heads.get(lv);
1069
+ const n = counters[lv] || refHead?.start || 1;
1070
+ return formatHeadNumber(n, refHead?.numFormat || "DIGIT");
1071
+ });
1072
+ return { prefix, headingLevel };
1073
+ }
907
1074
  async function parseHwpxDocument(buffer, options) {
908
1075
  precheckZipSize(buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
909
1076
  let zip;
@@ -940,7 +1107,7 @@ async function parseHwpxDocument(buffer, options) {
940
1107
  const pageFilter = options?.pages ? parsePageRange(options.pages, sectionPaths.length) : null;
941
1108
  const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
942
1109
  const blocks = [];
943
- const nestedTableCounter = { count: 0 };
1110
+ const shared = createSectionShared();
944
1111
  let parsedSections = 0;
945
1112
  for (let si = 0; si < sectionPaths.length; si++) {
946
1113
  if (pageFilter && !pageFilter.has(si + 1)) continue;
@@ -950,7 +1117,7 @@ async function parseHwpxDocument(buffer, options) {
950
1117
  const xml = await file.async("text");
951
1118
  decompressed.total += xml.length * 2;
952
1119
  if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new KordocError("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
953
- blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, nestedTableCounter));
1120
+ blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, shared));
954
1121
  parsedSections++;
955
1122
  options?.onProgress?.(parsedSections, totalTarget);
956
1123
  } catch (secErr) {
@@ -958,12 +1125,22 @@ async function parseHwpxDocument(buffer, options) {
958
1125
  warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
959
1126
  }
960
1127
  }
1128
+ applyPageText(blocks, shared);
961
1129
  const images = await extractImagesFromZip(zip, blocks, decompressed, warnings);
962
1130
  detectHwpxHeadings(blocks, styleMap);
963
1131
  const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
964
1132
  const markdown = blocksToMarkdown(blocks);
965
1133
  return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
966
1134
  }
1135
+ function applyPageText(blocks, shared) {
1136
+ const { headers, footers } = shared.pageText;
1137
+ if (headers.length > 0) {
1138
+ blocks.unshift(...headers.map((t) => ({ type: "paragraph", text: t, pageNumber: 1 })));
1139
+ }
1140
+ if (footers.length > 0) {
1141
+ blocks.push(...footers.map((t) => ({ type: "paragraph", text: t })));
1142
+ }
1143
+ }
967
1144
  function imageExtToMime(ext) {
968
1145
  switch (ext.toLowerCase()) {
969
1146
  case "jpg":
@@ -999,10 +1176,26 @@ function mimeToExt(mime) {
999
1176
  if (mime.includes("svg")) return "svg";
1000
1177
  return "bin";
1001
1178
  }
1179
+ function collectImageBlocks(blocks, out, ownerCell, depth = 0) {
1180
+ if (depth > MAX_XML_DEPTH) return;
1181
+ for (const block of blocks) {
1182
+ if (block.type === "image") {
1183
+ out.push({ block, ownerCell });
1184
+ } else if (block.type === "table" && block.table) {
1185
+ for (const row of block.table.cells) {
1186
+ for (const cell of row) {
1187
+ if (cell.blocks?.length) collectImageBlocks(cell.blocks, out, cell, depth + 1);
1188
+ }
1189
+ }
1190
+ }
1191
+ }
1192
+ }
1002
1193
  async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
1003
1194
  const images = [];
1004
1195
  let imageIndex = 0;
1005
- for (const block of blocks) {
1196
+ const imageBlocks = [];
1197
+ collectImageBlocks(blocks, imageBlocks);
1198
+ for (const { block, ownerCell } of imageBlocks) {
1006
1199
  if (block.type !== "image" || !block.text) continue;
1007
1200
  const ref = block.text;
1008
1201
  const candidates = [
@@ -1040,6 +1233,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
1040
1233
  images.push({ filename, data, mimeType });
1041
1234
  block.text = filename;
1042
1235
  block.imageData = { data, mimeType, filename: ref };
1236
+ if (ownerCell) ownerCell.text = ownerCell.text.replace(`![image](${ref})`, `![image](${filename})`);
1043
1237
  found = true;
1044
1238
  break;
1045
1239
  } catch (err) {
@@ -1050,6 +1244,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
1050
1244
  warnings?.push({ page: block.pageNumber, message: `\uC774\uBBF8\uC9C0 \uD30C\uC77C \uC5C6\uC74C: ${ref}`, code: "SKIPPED_IMAGE" });
1051
1245
  block.type = "paragraph";
1052
1246
  block.text = `[\uC774\uBBF8\uC9C0: ${ref}]`;
1247
+ if (ownerCell) ownerCell.text = ownerCell.text.replace(`![image](${ref})`, `[\uC774\uBBF8\uC9C0: ${ref}]`);
1053
1248
  }
1054
1249
  }
1055
1250
  return images;
@@ -1106,7 +1301,7 @@ function extractFromBrokenZip(buffer) {
1106
1301
  let totalDecompressed = 0;
1107
1302
  let entryCount = 0;
1108
1303
  let sectionNum = 0;
1109
- const nestedTableCounter = { count: 0 };
1304
+ const shared = createSectionShared();
1110
1305
  while (pos < data.length - 30) {
1111
1306
  if (data[pos] !== 80 || data[pos + 1] !== 75 || data[pos + 2] !== 3 || data[pos + 3] !== 4) {
1112
1307
  pos++;
@@ -1153,12 +1348,13 @@ function extractFromBrokenZip(buffer) {
1153
1348
  totalDecompressed += content.length * 2;
1154
1349
  if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new KordocError("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
1155
1350
  sectionNum++;
1156
- blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, nestedTableCounter));
1351
+ blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, shared));
1157
1352
  } catch {
1158
1353
  continue;
1159
1354
  }
1160
1355
  }
1161
1356
  if (blocks.length === 0) throw new KordocError("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
1357
+ applyPageText(blocks, shared);
1162
1358
  const markdown = blocksToMarkdown(blocks);
1163
1359
  return { markdown, blocks, warnings: warnings.length > 0 ? warnings : void 0 };
1164
1360
  }
@@ -1203,6 +1399,7 @@ function parseSectionPathsFromManifest(xml) {
1203
1399
  return Array.from(idToHref.entries()).filter(([id]) => isSectionId(id)).sort((a, b) => a[0].localeCompare(b[0])).map(([, href]) => href);
1204
1400
  }
1205
1401
  function detectHwpxHeadings(blocks, styleMap) {
1402
+ if (blocks.some((b) => b.type === "heading")) return;
1206
1403
  let baseFontSize = 0;
1207
1404
  const sizeFreq = /* @__PURE__ */ new Map();
1208
1405
  for (const b of blocks) {
@@ -1238,39 +1435,73 @@ function detectHwpxHeadings(blocks, styleMap) {
1238
1435
  }
1239
1436
  }
1240
1437
  }
1241
- function makeNestedTableMarker(counter, rows) {
1242
- counter.count++;
1243
- const firstRow = rows[0] ?? [];
1244
- const hint = firstRow.map((c) => c.text.trim().replace(/\n/g, " ")).filter(Boolean).join(" | ");
1245
- const hintChars = [...hint];
1246
- const truncated = hintChars.length > 60 ? hintChars.slice(0, 60).join("") + "\u2026" : hint;
1247
- return truncated ? `[\uC911\uCCA9 \uD14C\uC774\uBE14 #${counter.count}: ${truncated}]` : `[\uC911\uCCA9 \uD14C\uC774\uBE14 #${counter.count}]`;
1248
- }
1249
- function handleNestedTable(newTable, tableStack, blocks, ctx) {
1250
- const parentTable = tableStack.pop();
1251
- let nestedCols = 0;
1252
- for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
1253
- if (newTable.rows.length >= 3 && nestedCols >= 2) {
1254
- blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: ctx.sectionNum });
1255
- if (parentTable.cell) {
1256
- const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
1257
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker;
1438
+ function buildTableWithCellMeta(state) {
1439
+ const table = buildTable(state.rows);
1440
+ if (state.caption) table.caption = state.caption;
1441
+ const claimed = /* @__PURE__ */ new Set();
1442
+ for (const row of state.rows) {
1443
+ for (const src of row) {
1444
+ const needsBlocks = src.hasStructure && src.blocks && src.blocks.length > 0;
1445
+ if (!needsBlocks && !src.isHeader) continue;
1446
+ let target;
1447
+ const trimmed = src.text.trim();
1448
+ if (src.rowAddr !== void 0 && src.colAddr !== void 0) {
1449
+ const cand = table.cells[src.rowAddr]?.[src.colAddr];
1450
+ if (cand && cand.text === trimmed && !claimed.has(cand)) target = cand;
1451
+ }
1452
+ if (!target) {
1453
+ outer: for (const irRow of table.cells) {
1454
+ for (const cand of irRow) {
1455
+ if (!claimed.has(cand) && cand.text === trimmed && cand.colSpan === src.colSpan && cand.rowSpan === src.rowSpan) {
1456
+ target = cand;
1457
+ break outer;
1458
+ }
1459
+ }
1460
+ }
1461
+ }
1462
+ if (!target) continue;
1463
+ claimed.add(target);
1464
+ if (needsBlocks) target.blocks = src.blocks;
1465
+ if (src.isHeader) target.isHeader = true;
1258
1466
  }
1467
+ }
1468
+ return table;
1469
+ }
1470
+ function completeTable(newTable, tableStack, blocks, ctx) {
1471
+ const parentTable = tableStack.length > 0 ? tableStack.pop() : null;
1472
+ if (newTable.rows.length === 0) {
1473
+ if (newTable.caption) blocks.push({ type: "paragraph", text: newTable.caption, pageNumber: ctx.sectionNum });
1474
+ return parentTable;
1475
+ }
1476
+ const ir = buildTableWithCellMeta(newTable);
1477
+ const block = { type: "table", table: ir, pageNumber: ctx.sectionNum };
1478
+ if (parentTable?.cell) {
1479
+ const cell = parentTable.cell;
1480
+ (cell.blocks ??= []).push(block);
1481
+ cell.hasStructure = true;
1482
+ let flat = convertTableToText(newTable.rows);
1483
+ if (newTable.caption) flat = newTable.caption + (flat ? "\n" + flat : "");
1484
+ if (flat) cell.text += (cell.text ? "\n" : "") + flat;
1259
1485
  } else {
1260
- const nestedText = convertTableToText(newTable.rows);
1261
- if (parentTable.cell) {
1262
- const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
1263
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker + "\n" + nestedText;
1264
- }
1486
+ blocks.push(block);
1265
1487
  }
1266
1488
  return parentTable;
1267
1489
  }
1268
- function parseSectionXml(xml, styleMap, warnings, sectionNum, counter) {
1490
+ function parseSectionXml(xml, styleMap, warnings, sectionNum, shared) {
1269
1491
  const parser = createXmlParser(warnings);
1270
1492
  const doc = parser.parseFromString(stripDtd(xml), "text/xml");
1271
1493
  if (!doc.documentElement) return [];
1494
+ const ctx = { styleMap, warnings, sectionNum, shared: shared ?? createSectionShared() };
1495
+ ctx.shared.track.deleteDepth = 0;
1496
+ for (const tagName of ["hp:secPr", "secPr"]) {
1497
+ const els = doc.getElementsByTagName(tagName);
1498
+ if (els.length > 0) {
1499
+ const v = els[0].getAttribute("outlineShapeIDRef");
1500
+ if (v) ctx.outlineNumId = v;
1501
+ break;
1502
+ }
1503
+ }
1272
1504
  const blocks = [];
1273
- const ctx = { styleMap, warnings, sectionNum, counter };
1274
1505
  walkSection(doc.documentElement, blocks, null, [], ctx);
1275
1506
  return blocks;
1276
1507
  }
@@ -1306,18 +1537,16 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
1306
1537
  if (tableCtx) tableStack.push(tableCtx);
1307
1538
  const newTable = { rows: [], currentRow: [], cell: null };
1308
1539
  walkSection(el, blocks, newTable, tableStack, ctx, depth + 1);
1309
- if (newTable.rows.length > 0) {
1310
- if (tableStack.length > 0) {
1311
- tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
1312
- } else {
1313
- blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: ctx.sectionNum });
1314
- tableCtx = null;
1315
- }
1316
- } else {
1317
- tableCtx = tableStack.length > 0 ? tableStack.pop() : null;
1318
- }
1540
+ tableCtx = completeTable(newTable, tableStack, blocks, ctx);
1319
1541
  break;
1320
1542
  }
1543
+ // 표/도표 캡션 — IRTable.caption으로 보존 (v3.0, 기존 무음 드롭 수정)
1544
+ case "caption":
1545
+ if (tableCtx) {
1546
+ const capText = collectSubListText(el, ctx);
1547
+ if (capText) tableCtx.caption = (tableCtx.caption ? tableCtx.caption + "\n" : "") + capText;
1548
+ }
1549
+ break;
1321
1550
  case "tr":
1322
1551
  if (tableCtx) {
1323
1552
  tableCtx.currentRow = [];
@@ -1329,6 +1558,7 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
1329
1558
  case "tc":
1330
1559
  if (tableCtx) {
1331
1560
  tableCtx.cell = { text: "", colSpan: 1, rowSpan: 1 };
1561
+ if (el.getAttribute("header") === "1" || el.getAttribute("header") === "true") tableCtx.cell.isHeader = true;
1332
1562
  walkSection(el, blocks, tableCtx, tableStack, ctx, depth + 1);
1333
1563
  if (tableCtx.cell) {
1334
1564
  tableCtx.currentRow.push(tableCtx.cell);
@@ -1355,30 +1585,52 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
1355
1585
  }
1356
1586
  break;
1357
1587
  case "p": {
1358
- const { text, href, footnote, style } = extractParagraphInfo(el, ctx.styleMap);
1588
+ const { text: rawText, href, footnote, style } = extractParagraphInfo(el, ctx.styleMap, ctx);
1589
+ let text = rawText;
1590
+ let headingLevel;
1591
+ if (text) {
1592
+ const ph = resolveParaHeading(el, ctx);
1593
+ if (ph?.prefix) text = ph.prefix + " " + text;
1594
+ headingLevel = ph?.headingLevel;
1595
+ }
1359
1596
  if (text) {
1360
1597
  if (tableCtx?.cell) {
1361
- tableCtx.cell.text += (tableCtx.cell.text ? "\n" : "") + text;
1598
+ const cell = tableCtx.cell;
1599
+ if (footnote) text += ` (\uC8FC: ${footnote})`;
1600
+ cell.text += (cell.text ? "\n" : "") + text;
1601
+ (cell.blocks ??= []).push({ type: "paragraph", text, pageNumber: ctx.sectionNum });
1362
1602
  } else if (!tableCtx) {
1363
- const block = { type: "paragraph", text, pageNumber: ctx.sectionNum };
1603
+ const block = { type: headingLevel ? "heading" : "paragraph", text, pageNumber: ctx.sectionNum };
1604
+ if (headingLevel) block.level = headingLevel;
1364
1605
  if (style) block.style = style;
1365
1606
  if (href) block.href = href;
1366
1607
  if (footnote) block.footnoteText = footnote;
1367
1608
  blocks.push(block);
1609
+ } else {
1610
+ blocks.push({ type: "paragraph", text, pageNumber: ctx.sectionNum });
1368
1611
  }
1369
1612
  }
1370
1613
  tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, ctx, depth + 1);
1371
1614
  break;
1372
1615
  }
1373
- // 이미지/그림경로 추출 또는 경고
1616
+ // 이미지/그림/글상자이미지·텍스트·캡션 병행 추출
1374
1617
  case "pic":
1375
1618
  case "shape":
1376
1619
  case "drawingObject": {
1377
- const imgRef = extractImageRef(el);
1378
- if (imgRef) {
1379
- blocks.push({ type: "image", text: imgRef, pageNumber: ctx.sectionNum });
1380
- } else if (ctx.warnings && ctx.sectionNum) {
1381
- ctx.warnings.push({ page: ctx.sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
1620
+ if (tableCtx?.cell) {
1621
+ const sink = [];
1622
+ handleShape(el, sink, ctx);
1623
+ mergeBlocksIntoCell(tableCtx.cell, sink);
1624
+ } else {
1625
+ handleShape(el, blocks, ctx);
1626
+ }
1627
+ break;
1628
+ }
1629
+ // 메모 — 본문 혼입 차단 (v3.0)
1630
+ case "memogroup":
1631
+ case "memo": {
1632
+ if (ctx.warnings && extractTextFromNode(el)) {
1633
+ ctx.warnings.push({ page: ctx.sectionNum, message: "\uBA54\uBAA8 \uD14D\uC2A4\uD2B8 \uBCF8\uBB38 \uC81C\uC678: memogroup", code: "HIDDEN_TEXT_FILTERED" });
1382
1634
  }
1383
1635
  break;
1384
1636
  }
@@ -1388,6 +1640,73 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
1388
1640
  }
1389
1641
  }
1390
1642
  }
1643
+ function handleShape(el, sink, ctx) {
1644
+ const imgRef = extractImageRef(el);
1645
+ const drawTextChild = findDescendant(el, "drawText");
1646
+ if (imgRef) {
1647
+ const block = { type: "image", text: imgRef, pageNumber: ctx.sectionNum };
1648
+ const alt = userShapeComment(el);
1649
+ if (alt) block.footnoteText = alt;
1650
+ sink.push(block);
1651
+ }
1652
+ if (drawTextChild) {
1653
+ extractDrawTextBlocks(drawTextChild, sink, ctx);
1654
+ }
1655
+ const capEl = findChildByLocalName(el, "caption");
1656
+ if (capEl) {
1657
+ const capText = collectSubListText(capEl, ctx);
1658
+ if (capText) sink.push({ type: "paragraph", text: capText, pageNumber: ctx.sectionNum });
1659
+ }
1660
+ if (!imgRef && !drawTextChild && ctx.warnings && ctx.sectionNum) {
1661
+ const localTag = (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
1662
+ ctx.warnings.push({ page: ctx.sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
1663
+ }
1664
+ }
1665
+ function userShapeComment(el) {
1666
+ const commentEl = findChildByLocalName(el, "shapeComment");
1667
+ if (!commentEl) return void 0;
1668
+ const text = extractTextFromNode(commentEl);
1669
+ if (!text) return void 0;
1670
+ if (/^그림입니다/.test(text)) return void 0;
1671
+ if (/^(?:모서리가 둥근 |둥근 )?[^\n]{1,20}입니다\.?$/.test(text)) return void 0;
1672
+ return text;
1673
+ }
1674
+ function mergeBlocksIntoCell(cell, sink) {
1675
+ for (const b of sink) {
1676
+ if ((b.type === "paragraph" || b.type === "heading") && b.text) {
1677
+ cell.text += (cell.text ? "\n" : "") + b.text;
1678
+ (cell.blocks ??= []).push(b);
1679
+ } else if (b.type === "image" || b.type === "table") {
1680
+ if (b.type === "image" && b.text) {
1681
+ cell.text += (cell.text ? "\n" : "") + `![image](${b.text})`;
1682
+ }
1683
+ ;
1684
+ (cell.blocks ??= []).push(b);
1685
+ cell.hasStructure = true;
1686
+ }
1687
+ }
1688
+ }
1689
+ function collectSubListText(el, ctx, depth = 0) {
1690
+ if (depth > 10) return "";
1691
+ const parts = [];
1692
+ const children = el.childNodes;
1693
+ if (!children) return "";
1694
+ for (let i = 0; i < children.length; i++) {
1695
+ const ch = children[i];
1696
+ if (ch.nodeType !== 1) continue;
1697
+ const tag = (ch.tagName || ch.localName || "").replace(/^[^:]+:/, "");
1698
+ if (tag === "p" || tag === "para") {
1699
+ const t = extractParagraphInfo(ch, ctx.styleMap, ctx).text;
1700
+ if (t) parts.push(t);
1701
+ } else if (tag === "tbl") {
1702
+ continue;
1703
+ } else {
1704
+ const t = collectSubListText(ch, ctx, depth + 1);
1705
+ if (t) parts.push(t);
1706
+ }
1707
+ }
1708
+ return parts.join("\n").trim();
1709
+ }
1391
1710
  function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
1392
1711
  if (depth > MAX_XML_DEPTH) return tableCtx;
1393
1712
  const children = node.childNodes;
@@ -1405,34 +1724,25 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth =
1405
1724
  if (tableCtx) tableStack.push(tableCtx);
1406
1725
  const newTable = { rows: [], currentRow: [], cell: null };
1407
1726
  walkSection(el, blocks, newTable, tableStack, ctx, d + 1);
1408
- if (newTable.rows.length > 0) {
1409
- if (tableStack.length > 0) {
1410
- tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
1411
- } else {
1412
- blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: ctx.sectionNum });
1413
- tableCtx = null;
1414
- }
1415
- } else {
1416
- tableCtx = tableStack.length > 0 ? tableStack.pop() : null;
1417
- }
1727
+ tableCtx = completeTable(newTable, tableStack, blocks, ctx);
1418
1728
  } else if (localTag === "pic" || localTag === "shape" || localTag === "drawingObject") {
1419
- const drawTextChild = findDescendant(el, "drawText");
1420
- if (drawTextChild) {
1421
- extractDrawTextBlocks(drawTextChild, blocks, ctx.styleMap, ctx.sectionNum);
1729
+ if (tableCtx?.cell) {
1730
+ const sink = [];
1731
+ handleShape(el, sink, ctx);
1732
+ mergeBlocksIntoCell(tableCtx.cell, sink);
1422
1733
  } else {
1423
- const imgRef = extractImageRef(el);
1424
- if (imgRef) {
1425
- blocks.push({ type: "image", text: imgRef, pageNumber: ctx.sectionNum });
1426
- } else if (ctx.warnings && ctx.sectionNum) {
1427
- ctx.warnings.push({ page: ctx.sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
1428
- }
1734
+ handleShape(el, blocks, ctx);
1429
1735
  }
1430
1736
  } else if (localTag === "drawText") {
1431
- extractDrawTextBlocks(el, blocks, ctx.styleMap, ctx.sectionNum);
1737
+ if (tableCtx?.cell) {
1738
+ const sink = [];
1739
+ extractDrawTextBlocks(el, sink, ctx);
1740
+ mergeBlocksIntoCell(tableCtx.cell, sink);
1741
+ } else {
1742
+ extractDrawTextBlocks(el, blocks, ctx);
1743
+ }
1432
1744
  } else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
1433
1745
  walkChildren(el, d + 1);
1434
- } else if (localTag === "run") {
1435
- tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, ctx, depth + 1);
1436
1746
  }
1437
1747
  }
1438
1748
  };
@@ -1453,7 +1763,7 @@ function findDescendant(node, targetTag, depth = 0) {
1453
1763
  }
1454
1764
  return null;
1455
1765
  }
1456
- function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
1766
+ function extractDrawTextBlocks(drawTextNode, blocks, ctx) {
1457
1767
  const children = drawTextNode.childNodes;
1458
1768
  if (!children) return;
1459
1769
  for (let i = 0; i < children.length; i++) {
@@ -1462,29 +1772,136 @@ function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
1462
1772
  const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1463
1773
  if (tag === "subList" || tag === "p" || tag === "para") {
1464
1774
  if (tag === "subList") {
1465
- extractDrawTextBlocks(child, blocks, styleMap, sectionNum);
1775
+ extractDrawTextBlocks(child, blocks, ctx);
1466
1776
  } else {
1467
- const info = extractParagraphInfo(child, styleMap);
1468
- const text = info.text.trim();
1777
+ const info = extractParagraphInfo(child, ctx.styleMap, ctx);
1778
+ let text = info.text.trim();
1469
1779
  if (text) {
1470
- blocks.push({ type: "paragraph", text, style: info.style ?? void 0, pageNumber: sectionNum });
1780
+ const ph = resolveParaHeading(child, ctx);
1781
+ if (ph?.prefix) text = ph.prefix + " " + text;
1782
+ const block = { type: "paragraph", text, style: info.style ?? void 0, pageNumber: ctx.sectionNum };
1783
+ if (info.href) block.href = info.href;
1784
+ if (info.footnote) block.footnoteText = info.footnote;
1785
+ blocks.push(block);
1471
1786
  }
1787
+ walkParagraphChildren(child, blocks, null, [], ctx);
1472
1788
  }
1473
1789
  }
1474
1790
  }
1475
1791
  }
1476
- function extractParagraphInfo(para, styleMap) {
1792
+ function extractHyperlinkHref(fieldBegin) {
1793
+ if ((fieldBegin.getAttribute("type") || "").toUpperCase() !== "HYPERLINK") return void 0;
1794
+ const params = findChildByLocalName(fieldBegin, "parameters");
1795
+ if (!params) return void 0;
1796
+ const children = params.childNodes;
1797
+ if (!children) return void 0;
1798
+ for (let i = 0; i < children.length; i++) {
1799
+ const ch = children[i];
1800
+ if (ch.nodeType !== 1) continue;
1801
+ const tag = (ch.tagName || ch.localName || "").replace(/^[^:]+:/, "");
1802
+ if (tag !== "stringParam" || ch.getAttribute("name") !== "Path") continue;
1803
+ let url = (ch.textContent || "").trim();
1804
+ if (!url) continue;
1805
+ url = url.replace(/^https?:\/\/(?=https?:\/\/)/i, "");
1806
+ const safe = sanitizeHref(url);
1807
+ if (safe) return safe;
1808
+ }
1809
+ return void 0;
1810
+ }
1811
+ function isInDeletedRange(ctx) {
1812
+ return (ctx?.shared.track.deleteDepth ?? 0) > 0;
1813
+ }
1814
+ function extractParagraphInfo(para, styleMap, ctx) {
1477
1815
  let text = "";
1478
1816
  let href;
1479
1817
  let footnote;
1480
1818
  let charPrId;
1819
+ const handleCtrl = (ctrlEl) => {
1820
+ const kids2 = ctrlEl.childNodes;
1821
+ if (!kids2) return;
1822
+ for (let j = 0; j < kids2.length; j++) {
1823
+ const k = kids2[j];
1824
+ if (k.nodeType !== 1) continue;
1825
+ const ktag = (k.tagName || k.localName || "").replace(/^[^:]+:/, "");
1826
+ switch (ktag) {
1827
+ // 머리말/꼬리말 — 문서당 1회 수집, 본문 앞/뒤 배치
1828
+ case "header":
1829
+ case "footer": {
1830
+ if (!ctx) break;
1831
+ const t = collectSubListText(k, ctx);
1832
+ if (t) {
1833
+ const bucket = ktag === "header" ? ctx.shared.pageText.headers : ctx.shared.pageText.footers;
1834
+ if (!bucket.includes(t)) bucket.push(t);
1835
+ }
1836
+ break;
1837
+ }
1838
+ // 각주/미주 — 해당 문단의 footnote로 인라인 보존
1839
+ case "footNote":
1840
+ case "endNote": {
1841
+ const noteText = extractTextFromNode(k);
1842
+ if (noteText) footnote = (footnote ? footnote + "; " : "") + noteText;
1843
+ break;
1844
+ }
1845
+ // 하이퍼링크 — fieldBegin type=HYPERLINK의 Path 파라미터
1846
+ case "fieldBegin": {
1847
+ const url = extractHyperlinkHref(k);
1848
+ if (url && !href) href = url;
1849
+ break;
1850
+ }
1851
+ case "fieldEnd":
1852
+ break;
1853
+ // 변경추적 — 삭제 구간(deleteBegin~End)의 텍스트는 출력 제외 (최종본 상태 재현)
1854
+ case "deleteBegin":
1855
+ if (ctx) ctx.shared.track.deleteDepth++;
1856
+ break;
1857
+ case "deleteEnd":
1858
+ if (ctx && ctx.shared.track.deleteDepth > 0) ctx.shared.track.deleteDepth--;
1859
+ break;
1860
+ case "insertBegin":
1861
+ case "insertEnd":
1862
+ break;
1863
+ // 삽입분은 최종본에 포함
1864
+ // 숨은 설명 — 본문 혼입 차단
1865
+ case "hiddenComment": {
1866
+ if (ctx?.warnings && extractTextFromNode(k)) {
1867
+ ctx.warnings.push({ page: ctx.sectionNum, message: "\uC228\uC740 \uC124\uBA85 \uD14D\uC2A4\uD2B8 \uC81C\uC678: hiddenComment", code: "HIDDEN_TEXT_FILTERED" });
1868
+ }
1869
+ break;
1870
+ }
1871
+ // 콘텐츠 없는 제어 요소 — 스킵
1872
+ case "bookmark":
1873
+ case "pageNum":
1874
+ case "pageNumCtrl":
1875
+ case "pageHiding":
1876
+ case "newNum":
1877
+ case "autoNum":
1878
+ case "indexmark":
1879
+ case "colPr":
1880
+ break;
1881
+ // 미지원 요소 — 텍스트를 가졌으면 무음 손실 대신 경고
1882
+ default: {
1883
+ if (ctx?.warnings && extractTextFromNode(k)) {
1884
+ ctx.warnings.push({ page: ctx.sectionNum, message: `\uBBF8\uC9C0\uC6D0 \uC81C\uC5B4 \uC694\uC18C\uC758 \uD14D\uC2A4\uD2B8 \uC190\uC2E4: ${ktag}`, code: "UNSUPPORTED_ELEMENT" });
1885
+ }
1886
+ }
1887
+ }
1888
+ }
1889
+ };
1481
1890
  const walk = (node) => {
1482
1891
  const children = node.childNodes;
1483
1892
  if (!children) return;
1484
1893
  for (let i = 0; i < children.length; i++) {
1485
1894
  const child = children[i];
1486
1895
  if (child.nodeType === 3) {
1487
- text += child.textContent || "";
1896
+ const t = child.textContent || "";
1897
+ if (isInDeletedRange(ctx)) {
1898
+ if (t && ctx && !ctx.shared.track.warned) {
1899
+ ctx.shared.track.warned = true;
1900
+ ctx.warnings?.push({ page: ctx.sectionNum, message: "\uBCC0\uACBD\uCD94\uC801 \uC0AD\uC81C \uD14D\uC2A4\uD2B8 \uCD9C\uB825 \uC81C\uC678", code: "HIDDEN_TEXT_FILTERED" });
1901
+ }
1902
+ } else {
1903
+ text += t;
1904
+ }
1488
1905
  continue;
1489
1906
  }
1490
1907
  if (child.nodeType !== 1) continue;
@@ -1506,6 +1923,10 @@ function extractParagraphInfo(para, styleMap) {
1506
1923
  case "br":
1507
1924
  if ((child.getAttribute("type") || "line") === "line") text += "\n";
1508
1925
  break;
1926
+ case "lineBreak":
1927
+ text += "\n";
1928
+ break;
1929
+ // 강제 줄바꿈 — ref 추출기·소스맵 스캐너와 동일 모델
1509
1930
  case "fwSpace":
1510
1931
  case "hwSpace":
1511
1932
  text += " ";
@@ -1532,9 +1953,26 @@ function extractParagraphInfo(para, styleMap) {
1532
1953
  if (noteText) footnote = (footnote ? footnote + "; " : "") + noteText;
1533
1954
  break;
1534
1955
  }
1535
- // 제어 요소 — 필드, 컨트롤, 매개변수 등 스킵
1956
+ // 제어 요소 — 선별 순회 (머리말/꼬리말/각주/하이퍼링크/변경추적, v3.0)
1536
1957
  case "ctrl":
1537
- case "fieldBegin":
1958
+ handleCtrl(child);
1959
+ break;
1960
+ // run 직계 fieldBegin (비표준 경로) — 하이퍼링크 URL만 추출
1961
+ case "fieldBegin": {
1962
+ const url = extractHyperlinkHref(child);
1963
+ if (url && !href) href = url;
1964
+ break;
1965
+ }
1966
+ // run 직계 변경추적 마커 (비표준 경로)
1967
+ case "deleteBegin":
1968
+ if (ctx) ctx.shared.track.deleteDepth++;
1969
+ break;
1970
+ case "deleteEnd":
1971
+ if (ctx && ctx.shared.track.deleteDepth > 0) ctx.shared.track.deleteDepth--;
1972
+ break;
1973
+ case "insertBegin":
1974
+ case "insertEnd":
1975
+ break;
1538
1976
  case "fieldEnd":
1539
1977
  case "parameters":
1540
1978
  case "stringParam":
@@ -1548,7 +1986,7 @@ function extractParagraphInfo(para, styleMap) {
1548
1986
  case "linesegarray":
1549
1987
  case "lineseg":
1550
1988
  // 레이아웃 정보
1551
- // 도형/이미지 요소 — 대체텍스트("사각형입니다." 등) 누출 방지
1989
+ // 도형/이미지 요소 — 대체텍스트("사각형입니다." 등) 누출 방지 (walkParagraphChildren에서 처리)
1552
1990
  case "pic":
1553
1991
  case "shape":
1554
1992
  case "drawingObject":
@@ -1635,8 +2073,14 @@ var TAG_CHAR_SHAPE = 68;
1635
2073
  var TAG_CTRL_HEADER = 71;
1636
2074
  var TAG_LIST_HEADER = 72;
1637
2075
  var TAG_TABLE = 77;
2076
+ var TAG_SHAPE_COMPONENT = 76;
2077
+ var TAG_SHAPE_COMPONENT_PICTURE = 85;
2078
+ var TAG_SHAPE_COMPONENT_CONTAINER = 86;
1638
2079
  var TAG_EQEDIT = 88;
2080
+ var TAG_BIN_DATA = 18;
1639
2081
  var TAG_DOC_CHAR_SHAPE = 21;
2082
+ var TAG_NUMBERING = 23;
2083
+ var TAG_BULLET = 24;
1640
2084
  var TAG_DOC_PARA_SHAPE = 25;
1641
2085
  var TAG_DOC_STYLE = 26;
1642
2086
  var CHAR_LINE = 0;
@@ -1692,15 +2136,76 @@ function parseFileHeader(data) {
1692
2136
  flags: data.readUInt32LE(36)
1693
2137
  };
1694
2138
  }
2139
+ function readHwpString(data, offset) {
2140
+ if (offset + 2 > data.length) return { value: "", next: data.length };
2141
+ const len = data.readUInt16LE(offset);
2142
+ const start = offset + 2;
2143
+ const end = start + len * 2;
2144
+ if (len === 0 || end > data.length) return { value: "", next: start };
2145
+ return { value: data.subarray(start, end).toString("utf16le"), next: end };
2146
+ }
1695
2147
  function parseDocInfo(records) {
1696
2148
  const charShapes = [];
1697
2149
  const paraShapes = [];
1698
2150
  const styles = [];
2151
+ const binData = [];
2152
+ const numberings = [];
2153
+ const bullets = [];
1699
2154
  for (const rec of records) {
1700
2155
  if (rec.tagId === TAG_DOC_PARA_SHAPE && rec.data.length >= 4) {
1701
- const flags = rec.data.readUInt32LE(0);
1702
- const outlineLevel = flags >> 25 & 7;
1703
- paraShapes.push({ outlineLevel });
2156
+ const attr1 = rec.data.readUInt32LE(0);
2157
+ const headType = attr1 >>> 23 & 3;
2158
+ const paraLevel = attr1 >>> 25 & 7;
2159
+ const numberingId = rec.data.length >= 32 ? rec.data.readUInt16LE(30) : 0;
2160
+ paraShapes.push({ headType, paraLevel, numberingId });
2161
+ }
2162
+ if (rec.tagId === TAG_BIN_DATA && rec.data.length >= 2) {
2163
+ const attr = rec.data.readUInt16LE(0);
2164
+ const typeBits = attr & 15;
2165
+ if (typeBits === 0) {
2166
+ binData.push({ kind: "link", storageId: 0, extension: "" });
2167
+ } else {
2168
+ const storageId = rec.data.length >= 4 ? rec.data.readUInt16LE(2) : 0;
2169
+ const { value: extension } = readHwpString(rec.data, 4);
2170
+ binData.push({ kind: typeBits === 2 ? "storage" : "embed", storageId, extension });
2171
+ }
2172
+ }
2173
+ if (rec.tagId === TAG_NUMBERING && rec.data.length >= 14) {
2174
+ const levelFormats = [];
2175
+ const numberFormats = [];
2176
+ const startNumbers = [1, 1, 1, 1, 1, 1, 1];
2177
+ let offset = 0;
2178
+ for (let level = 0; level < 7; level++) {
2179
+ if (offset + 12 > rec.data.length) {
2180
+ levelFormats.push("");
2181
+ numberFormats.push(0);
2182
+ continue;
2183
+ }
2184
+ const attr = rec.data.readUInt32LE(offset);
2185
+ numberFormats.push(attr >>> 5 & 15);
2186
+ offset += 12;
2187
+ const { value, next } = readHwpString(rec.data, offset);
2188
+ levelFormats.push(value);
2189
+ offset = next;
2190
+ }
2191
+ let baseStart = 1;
2192
+ if (offset + 2 <= rec.data.length) {
2193
+ baseStart = rec.data.readUInt16LE(offset) || 1;
2194
+ offset += 2;
2195
+ }
2196
+ for (let level = 0; level < 7; level++) {
2197
+ if (offset + 4 <= rec.data.length) {
2198
+ startNumbers[level] = rec.data.readUInt32LE(offset) || 1;
2199
+ offset += 4;
2200
+ } else {
2201
+ startNumbers[level] = baseStart;
2202
+ }
2203
+ }
2204
+ numberings.push({ levelFormats, numberFormats, startNumbers });
2205
+ }
2206
+ if (rec.tagId === TAG_BULLET && rec.data.length >= 14) {
2207
+ const code = rec.data.readUInt16LE(12);
2208
+ bullets.push({ char: code > 0 ? String.fromCharCode(code) : "\u2022" });
1704
2209
  }
1705
2210
  if (rec.tagId === TAG_DOC_CHAR_SHAPE && rec.data.length >= 18) {
1706
2211
  if (rec.data.length >= 50) {
@@ -1731,7 +2236,7 @@ function parseDocInfo(records) {
1731
2236
  }
1732
2237
  const type = offset < rec.data.length ? rec.data.readUInt8(offset) : 0;
1733
2238
  offset += 1;
1734
- offset += 2;
2239
+ offset += 1;
1735
2240
  offset += 2;
1736
2241
  const paraShapeId = offset + 2 <= rec.data.length ? rec.data.readUInt16LE(offset) : 0;
1737
2242
  offset += 2;
@@ -1741,11 +2246,25 @@ function parseDocInfo(records) {
1741
2246
  }
1742
2247
  }
1743
2248
  }
1744
- return { charShapes, paraShapes, styles };
2249
+ return { charShapes, paraShapes, styles, binData, numberings, bullets };
2250
+ }
2251
+ function createParaTextState() {
2252
+ return { text: "", ctrlIdx: 0, fieldStack: [], fieldRanges: [] };
1745
2253
  }
1746
- function extractTextWithControls(data, resolveControl) {
2254
+ function isExtendedOnlyCtrlChar(ch) {
2255
+ return ch >= 1 && ch <= 3 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= 18 || ch >= 21 && ch <= 23;
2256
+ }
2257
+ function appendParaText(state, data, resolveControl) {
1747
2258
  let result = "";
1748
2259
  let i = 0;
2260
+ const base = state.text.length;
2261
+ const resolveAt = (byteOffset, extended) => {
2262
+ const ctrlId = data.readUInt32LE(byteOffset);
2263
+ const idx = extended ? state.ctrlIdx : -1;
2264
+ const replacement = resolveControl?.(idx, ctrlId);
2265
+ if (replacement) result += replacement;
2266
+ if (extended) state.ctrlIdx++;
2267
+ };
1749
2268
  while (i + 1 < data.length) {
1750
2269
  const ch = data.readUInt16LE(i);
1751
2270
  i += 2;
@@ -1756,9 +2275,7 @@ function extractTextWithControls(data, resolveControl) {
1756
2275
  break;
1757
2276
  case CHAR_SECTION_BREAK: {
1758
2277
  if (i + 16 <= data.length && data.readUInt16LE(i) === 11) {
1759
- const ctrlId = data.subarray(i + 2, i + 6).toString("ascii");
1760
- const replacement = resolveControl?.(ctrlId);
1761
- if (replacement) result += replacement;
2278
+ resolveAt(i + 2, true);
1762
2279
  i += 16;
1763
2280
  break;
1764
2281
  }
@@ -1790,12 +2307,18 @@ function extractTextWithControls(data, resolveControl) {
1790
2307
  break;
1791
2308
  default:
1792
2309
  if (ch >= 1 && ch <= 31) {
1793
- const isExtended = ch >= 1 && ch <= 3 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= 18 || ch >= 21 && ch <= 23;
2310
+ const isExtended = isExtendedOnlyCtrlChar(ch);
1794
2311
  const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
1795
2312
  if ((isExtended || isInline) && i + 14 <= data.length) {
1796
- const ctrlId = data.subarray(i, i + 4).toString("ascii");
1797
- const replacement = resolveControl?.(ctrlId);
1798
- if (replacement) result += replacement;
2313
+ if (ch === 3) {
2314
+ state.fieldStack.push({ start: base + result.length, ctrlIdx: state.ctrlIdx });
2315
+ } else if (ch === 4) {
2316
+ const open = state.fieldStack.pop();
2317
+ if (open) {
2318
+ state.fieldRanges.push({ start: open.start, end: base + result.length, ctrlIdx: open.ctrlIdx });
2319
+ }
2320
+ }
2321
+ resolveAt(i, isExtended);
1799
2322
  i += 14;
1800
2323
  }
1801
2324
  } else if (ch >= 32) {
@@ -1813,7 +2336,7 @@ function extractTextWithControls(data, resolveControl) {
1813
2336
  break;
1814
2337
  }
1815
2338
  }
1816
- return result;
2339
+ state.text += result;
1817
2340
  }
1818
2341
  function extractEquationText(data) {
1819
2342
  if (data.length < 6) return null;
@@ -1825,100 +2348,407 @@ function extractEquationText(data) {
1825
2348
  return equation || null;
1826
2349
  }
1827
2350
 
1828
- // src/hwp5/aes.ts
1829
- var S_BOX = new Uint8Array([
1830
- 99,
1831
- 124,
1832
- 119,
1833
- 123,
1834
- 242,
1835
- 107,
1836
- 111,
1837
- 197,
1838
- 48,
1839
- 1,
1840
- 103,
1841
- 43,
1842
- 254,
1843
- 215,
1844
- 171,
1845
- 118,
1846
- 202,
1847
- 130,
1848
- 201,
1849
- 125,
1850
- 250,
1851
- 89,
1852
- 71,
1853
- 240,
1854
- 173,
1855
- 212,
1856
- 162,
1857
- 175,
1858
- 156,
1859
- 164,
1860
- 114,
1861
- 192,
1862
- 183,
1863
- 253,
1864
- 147,
1865
- 38,
1866
- 54,
1867
- 63,
1868
- 247,
1869
- 204,
1870
- 52,
1871
- 165,
1872
- 229,
1873
- 241,
1874
- 113,
1875
- 216,
1876
- 49,
1877
- 21,
1878
- 4,
1879
- 199,
1880
- 35,
1881
- 195,
1882
- 24,
1883
- 150,
1884
- 5,
1885
- 154,
1886
- 7,
1887
- 18,
1888
- 128,
1889
- 226,
1890
- 235,
1891
- 39,
1892
- 178,
1893
- 117,
1894
- 9,
1895
- 131,
1896
- 44,
1897
- 26,
1898
- 27,
1899
- 110,
1900
- 90,
1901
- 160,
1902
- 82,
1903
- 59,
1904
- 214,
1905
- 179,
1906
- 41,
1907
- 227,
1908
- 47,
1909
- 132,
1910
- 83,
1911
- 209,
1912
- 0,
1913
- 237,
1914
- 32,
1915
- 252,
1916
- 177,
1917
- 91,
1918
- 106,
1919
- 203,
1920
- 190,
1921
- 57,
2351
+ // src/hwp5/numbering.ts
2352
+ var NumberingState = class {
2353
+ currentId = 0;
2354
+ counters = [0, 0, 0, 0, 0, 0, 0];
2355
+ history = /* @__PURE__ */ new Map();
2356
+ /** 번호 문단 처리: 카운터 갱신 후 수준별 카운터 스냅샷 반환 */
2357
+ advance(numberingId, level) {
2358
+ const lv = Math.min(Math.max(level, 0), 6);
2359
+ if (this.currentId !== numberingId) {
2360
+ if (this.currentId !== 0) this.history.set(this.currentId, [...this.counters]);
2361
+ const saved = this.history.get(numberingId);
2362
+ if (saved) {
2363
+ this.counters = [...saved];
2364
+ } else {
2365
+ const prev = this.counters;
2366
+ this.counters = [0, 0, 0, 0, 0, 0, 0];
2367
+ for (let i = 0; i < lv; i++) this.counters[i] = prev[i];
2368
+ }
2369
+ this.currentId = numberingId;
2370
+ }
2371
+ this.counters[lv]++;
2372
+ for (let i = lv + 1; i < 7; i++) this.counters[i] = 0;
2373
+ return [...this.counters];
2374
+ }
2375
+ };
2376
+ function headFormatToNumFmt(code) {
2377
+ switch (code) {
2378
+ case 1:
2379
+ return "circled";
2380
+ case 2:
2381
+ return "romanUpper";
2382
+ case 3:
2383
+ return "romanLower";
2384
+ case 4:
2385
+ return "latinUpper";
2386
+ case 5:
2387
+ return "latinLower";
2388
+ case 8:
2389
+ return "ganada";
2390
+ case 9:
2391
+ return "circledGanada";
2392
+ case 10:
2393
+ return "jamo";
2394
+ case 11:
2395
+ return "circledJamo";
2396
+ case 12:
2397
+ return "hangulNum";
2398
+ case 13:
2399
+ return "hanjaNum";
2400
+ default:
2401
+ return "digit";
2402
+ }
2403
+ }
2404
+ function shapeFormatToNumFmt(code) {
2405
+ switch (code) {
2406
+ case 1:
2407
+ return "circled";
2408
+ case 2:
2409
+ return "romanUpper";
2410
+ case 3:
2411
+ return "romanLower";
2412
+ case 4:
2413
+ return "latinUpper";
2414
+ case 5:
2415
+ return "latinLower";
2416
+ case 6:
2417
+ return "ganada";
2418
+ case 7:
2419
+ return "hangulNum";
2420
+ case 8:
2421
+ return "hanjaNum";
2422
+ default:
2423
+ return "digit";
2424
+ }
2425
+ }
2426
+ var CIRCLED_DIGITS = "\u2460\u2461\u2462\u2463\u2464\u2465\u2466\u2467\u2468\u2469\u246A\u246B\u246C\u246D\u246E\u246F\u2470\u2471\u2472\u2473";
2427
+ var GANADA = "\uAC00\uB098\uB2E4\uB77C\uB9C8\uBC14\uC0AC\uC544\uC790\uCC28\uCE74\uD0C0\uD30C\uD558";
2428
+ var CIRCLED_GANADA = "\u326E\u326F\u3270\u3271\u3272\u3273\u3274\u3275\u3276\u3277\u3278\u3279\u327A\u327B";
2429
+ var JAMO = "\u3131\u3134\u3137\u3139\u3141\u3142\u3145\u3147\u3148\u314A\u314B\u314C\u314D\u314E";
2430
+ var CIRCLED_JAMO = "\u3260\u3261\u3262\u3263\u3264\u3265\u3266\u3267\u3268\u3269\u326A\u326B\u326C\u326D";
2431
+ function fromTable(n, table) {
2432
+ return n >= 1 && n <= table.length ? table[n - 1] : String(n);
2433
+ }
2434
+ function formatRoman(n, upper) {
2435
+ if (n <= 0 || n > 3999) return String(n);
2436
+ const values = [1e3, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1];
2437
+ const symbols = upper ? ["M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"] : ["m", "cm", "d", "cd", "c", "xc", "l", "xl", "x", "ix", "v", "iv", "i"];
2438
+ let result = "";
2439
+ let num = n;
2440
+ for (let i = 0; i < values.length; i++) {
2441
+ while (num >= values[i]) {
2442
+ result += symbols[i];
2443
+ num -= values[i];
2444
+ }
2445
+ }
2446
+ return result;
2447
+ }
2448
+ function formatLatin(n, upper) {
2449
+ if (n <= 0) return "";
2450
+ let result = "";
2451
+ let num = n;
2452
+ while (num > 0) {
2453
+ num--;
2454
+ result = String.fromCharCode((upper ? 65 : 97) + num % 26) + result;
2455
+ num = Math.floor(num / 26);
2456
+ }
2457
+ return result;
2458
+ }
2459
+ function formatEastAsianNumber(n, digits, units, zero) {
2460
+ if (n === 0) return zero;
2461
+ if (n < 0 || n > 99999) return String(n);
2462
+ let result = "";
2463
+ let num = n;
2464
+ let unit = 0;
2465
+ while (num > 0) {
2466
+ const d = num % 10;
2467
+ if (d > 0) {
2468
+ const digitStr = d === 1 && unit > 0 ? "" : digits[d];
2469
+ result = digitStr + units[unit] + result;
2470
+ }
2471
+ num = Math.floor(num / 10);
2472
+ unit++;
2473
+ }
2474
+ return result;
2475
+ }
2476
+ var HANGUL_DIGITS = ["", "\uC77C", "\uC774", "\uC0BC", "\uC0AC", "\uC624", "\uC721", "\uCE60", "\uD314", "\uAD6C"];
2477
+ var HANGUL_UNITS = ["", "\uC2ED", "\uBC31", "\uCC9C", "\uB9CC"];
2478
+ var HANJA_DIGITS = ["", "\u4E00", "\u4E8C", "\u4E09", "\u56DB", "\u4E94", "\u516D", "\u4E03", "\u516B", "\u4E5D"];
2479
+ var HANJA_UNITS = ["", "\u5341", "\u767E", "\u5343", "\u842C"];
2480
+ function formatNumber(n, fmt) {
2481
+ switch (fmt) {
2482
+ case "circled":
2483
+ return fromTable(n, CIRCLED_DIGITS);
2484
+ case "romanUpper":
2485
+ return formatRoman(n, true);
2486
+ case "romanLower":
2487
+ return formatRoman(n, false);
2488
+ case "latinUpper":
2489
+ return formatLatin(n, true) || String(n);
2490
+ case "latinLower":
2491
+ return formatLatin(n, false) || String(n);
2492
+ case "ganada":
2493
+ return fromTable(n, GANADA);
2494
+ case "circledGanada":
2495
+ return fromTable(n, CIRCLED_GANADA);
2496
+ case "jamo":
2497
+ return fromTable(n, JAMO);
2498
+ case "circledJamo":
2499
+ return fromTable(n, CIRCLED_JAMO);
2500
+ case "hangulNum":
2501
+ return formatEastAsianNumber(n, HANGUL_DIGITS, HANGUL_UNITS, "\uC601");
2502
+ case "hanjaNum":
2503
+ return formatEastAsianNumber(n, HANJA_DIGITS, HANJA_UNITS, "\u96F6");
2504
+ default:
2505
+ return String(n);
2506
+ }
2507
+ }
2508
+ function expandNumberingFormat(formatStr, counters, numbering) {
2509
+ let result = "";
2510
+ let i = 0;
2511
+ while (i < formatStr.length) {
2512
+ const ch = formatStr[i];
2513
+ if (ch === "^" && i + 1 < formatStr.length && formatStr[i + 1] >= "1" && formatStr[i + 1] <= "7") {
2514
+ const levelRef = formatStr.charCodeAt(i + 1) - 48;
2515
+ const idx = levelRef - 1;
2516
+ const counterVal = counters[idx] ?? 0;
2517
+ const start = numbering.startNumbers[idx] ?? 1;
2518
+ const num = counterVal > 0 ? start - 1 + counterVal : start;
2519
+ result += formatNumber(num, headFormatToNumFmt(numbering.numberFormats[idx] ?? 0));
2520
+ i += 2;
2521
+ continue;
2522
+ }
2523
+ result += ch;
2524
+ i++;
2525
+ }
2526
+ return result;
2527
+ }
2528
+
2529
+ // src/hwp5/images.ts
2530
+ function detectImageMime(data) {
2531
+ if (data.length < 4) return null;
2532
+ if (data[0] === 137 && data[1] === 80 && data[2] === 78 && data[3] === 71) return "image/png";
2533
+ if (data[0] === 255 && data[1] === 216 && data[2] === 255) return "image/jpeg";
2534
+ if (data[0] === 71 && data[1] === 73 && data[2] === 70) return "image/gif";
2535
+ if (data[0] === 66 && data[1] === 77) return "image/bmp";
2536
+ if (data[0] === 215 && data[1] === 205 && data[2] === 198 && data[3] === 154) return "image/wmf";
2537
+ if (data[0] === 1 && data[1] === 0 && data[2] === 0 && data[3] === 0) return "image/emf";
2538
+ return null;
2539
+ }
2540
+ function normalizeBinPayload(data) {
2541
+ if (detectImageMime(data)) return data;
2542
+ try {
2543
+ const inflated = decompressStream(data);
2544
+ if (inflated.length > 0) return inflated;
2545
+ } catch {
2546
+ }
2547
+ return data;
2548
+ }
2549
+ var BIN_ENTRY_RE = /(?:^|\/)BIN([0-9A-Fa-f]{4,8})(?:\.[^./\\]*)?$/;
2550
+ function collectImageBlocks2(blocks, out) {
2551
+ for (const b of blocks) {
2552
+ if (b.type === "image") out.push(b);
2553
+ if (b.table) {
2554
+ for (const row of b.table.cells) {
2555
+ for (const cell of row) {
2556
+ if (cell.blocks) collectImageBlocks2(cell.blocks, out);
2557
+ }
2558
+ }
2559
+ }
2560
+ if (b.children) collectImageBlocks2(b.children, out);
2561
+ }
2562
+ }
2563
+ function forEachTableCell(blocks, fn) {
2564
+ for (const b of blocks) {
2565
+ if (b.table) {
2566
+ for (const row of b.table.cells) {
2567
+ for (const cell of row) {
2568
+ fn(cell);
2569
+ if (cell.blocks) forEachTableCell(cell.blocks, fn);
2570
+ }
2571
+ }
2572
+ }
2573
+ if (b.children) forEachTableCell(b.children, fn);
2574
+ }
2575
+ }
2576
+ var CELL_IMAGE_SENTINEL_RE = /!\[image\]\(hwp5bin:(\d+)\)/g;
2577
+ function resolveCellImageSentinels(blocks, renamed) {
2578
+ forEachTableCell(blocks, (cell) => {
2579
+ if (!cell.text.includes("hwp5bin:")) return;
2580
+ cell.text = cell.text.replace(CELL_IMAGE_SENTINEL_RE, (_m, idStr) => {
2581
+ const filename = renamed.get(Number(idStr));
2582
+ return filename ? `![image](${filename})` : "[\uC774\uBBF8\uC9C0]";
2583
+ });
2584
+ });
2585
+ }
2586
+ function resolveImageBlocks(binDataMap, blocks, warnings) {
2587
+ const imageBlocks = [];
2588
+ collectImageBlocks2(blocks, imageBlocks);
2589
+ if (imageBlocks.length === 0) return [];
2590
+ const images = [];
2591
+ const renamed = /* @__PURE__ */ new Map();
2592
+ let imageIndex = 0;
2593
+ for (const block of imageBlocks) {
2594
+ if (!block.text) continue;
2595
+ const storageId = parseInt(block.text, 10);
2596
+ if (isNaN(storageId)) continue;
2597
+ const bin = binDataMap.get(storageId);
2598
+ if (!bin) {
2599
+ warnings.push({ page: block.pageNumber, message: `BinData ${storageId} \uC5C6\uC74C`, code: "SKIPPED_IMAGE" });
2600
+ block.type = "paragraph";
2601
+ block.text = `[\uC774\uBBF8\uC9C0: BinData ${storageId}]`;
2602
+ continue;
2603
+ }
2604
+ const mime = detectImageMime(bin.data);
2605
+ if (!mime) {
2606
+ warnings.push({ page: block.pageNumber, message: `BinData ${storageId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
2607
+ block.type = "paragraph";
2608
+ block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
2609
+ continue;
2610
+ }
2611
+ imageIndex++;
2612
+ const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
2613
+ const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
2614
+ images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
2615
+ renamed.set(storageId, filename);
2616
+ block.text = filename;
2617
+ block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
2618
+ }
2619
+ resolveCellImageSentinels(blocks, renamed);
2620
+ return images;
2621
+ }
2622
+ function extractHwp5Images(fileIndex, blocks, warnings) {
2623
+ const binDataMap = /* @__PURE__ */ new Map();
2624
+ if (fileIndex) {
2625
+ for (const entry of fileIndex) {
2626
+ if (!entry?.name || !entry.content) continue;
2627
+ const match = entry.name.match(BIN_ENTRY_RE);
2628
+ if (!match) continue;
2629
+ const idx = parseInt(match[1], 16);
2630
+ const data = normalizeBinPayload(Buffer.from(entry.content));
2631
+ binDataMap.set(idx, { data, name: entry.name });
2632
+ }
2633
+ }
2634
+ if (binDataMap.size === 0) {
2635
+ resolveCellImageSentinels(blocks, /* @__PURE__ */ new Map());
2636
+ return [];
2637
+ }
2638
+ return resolveImageBlocks(binDataMap, blocks, warnings);
2639
+ }
2640
+ function extractHwp5ImagesLenient(lcfb, blocks, warnings) {
2641
+ const binDataMap = /* @__PURE__ */ new Map();
2642
+ const binRe = /^BIN([0-9A-Fa-f]{4,8})(?:\.|$)/;
2643
+ for (const e of lcfb.entries()) {
2644
+ const match = e.name.match(binRe);
2645
+ if (!match) continue;
2646
+ const idx = parseInt(match[1], 16);
2647
+ const raw = lcfb.findStream(e.name);
2648
+ if (!raw) continue;
2649
+ binDataMap.set(idx, { data: normalizeBinPayload(raw), name: e.name });
2650
+ }
2651
+ if (binDataMap.size === 0) {
2652
+ resolveCellImageSentinels(blocks, /* @__PURE__ */ new Map());
2653
+ return [];
2654
+ }
2655
+ return resolveImageBlocks(binDataMap, blocks, warnings);
2656
+ }
2657
+
2658
+ // src/hwp5/aes.ts
2659
+ var S_BOX = new Uint8Array([
2660
+ 99,
2661
+ 124,
2662
+ 119,
2663
+ 123,
2664
+ 242,
2665
+ 107,
2666
+ 111,
2667
+ 197,
2668
+ 48,
2669
+ 1,
2670
+ 103,
2671
+ 43,
2672
+ 254,
2673
+ 215,
2674
+ 171,
2675
+ 118,
2676
+ 202,
2677
+ 130,
2678
+ 201,
2679
+ 125,
2680
+ 250,
2681
+ 89,
2682
+ 71,
2683
+ 240,
2684
+ 173,
2685
+ 212,
2686
+ 162,
2687
+ 175,
2688
+ 156,
2689
+ 164,
2690
+ 114,
2691
+ 192,
2692
+ 183,
2693
+ 253,
2694
+ 147,
2695
+ 38,
2696
+ 54,
2697
+ 63,
2698
+ 247,
2699
+ 204,
2700
+ 52,
2701
+ 165,
2702
+ 229,
2703
+ 241,
2704
+ 113,
2705
+ 216,
2706
+ 49,
2707
+ 21,
2708
+ 4,
2709
+ 199,
2710
+ 35,
2711
+ 195,
2712
+ 24,
2713
+ 150,
2714
+ 5,
2715
+ 154,
2716
+ 7,
2717
+ 18,
2718
+ 128,
2719
+ 226,
2720
+ 235,
2721
+ 39,
2722
+ 178,
2723
+ 117,
2724
+ 9,
2725
+ 131,
2726
+ 44,
2727
+ 26,
2728
+ 27,
2729
+ 110,
2730
+ 90,
2731
+ 160,
2732
+ 82,
2733
+ 59,
2734
+ 214,
2735
+ 179,
2736
+ 41,
2737
+ 227,
2738
+ 47,
2739
+ 132,
2740
+ 83,
2741
+ 209,
2742
+ 0,
2743
+ 237,
2744
+ 32,
2745
+ 252,
2746
+ 177,
2747
+ 91,
2748
+ 106,
2749
+ 203,
2750
+ 190,
2751
+ 57,
1922
2752
  74,
1923
2753
  76,
1924
2754
  88,
@@ -2751,6 +3581,66 @@ var require2 = createRequire(import.meta.url);
2751
3581
  var CFB = require2("cfb");
2752
3582
  var MAX_SECTIONS = 100;
2753
3583
  var MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024;
3584
+ var MAX_NEST_DEPTH = 8;
3585
+ function cid(s) {
3586
+ return (s.charCodeAt(0) << 24 | s.charCodeAt(1) << 16 | s.charCodeAt(2) << 8 | s.charCodeAt(3)) >>> 0;
3587
+ }
3588
+ var CTRL_TBL = cid("tbl ");
3589
+ var CTRL_GSO = cid("gso ");
3590
+ var CTRL_EQED = cid("eqed");
3591
+ var CTRL_HEAD = cid("head");
3592
+ var CTRL_FOOT = cid("foot");
3593
+ var CTRL_FN = cid("fn ");
3594
+ var CTRL_EN = cid("en ");
3595
+ var CTRL_ATNO = cid("atno");
3596
+ var CTRL_NWNO = cid("nwno");
3597
+ var CTRL_PGNP = cid("pgnp");
3598
+ var CTRL_PGHD = cid("pghd");
3599
+ var CTRL_IDXM = cid("idxm");
3600
+ var CTRL_BOKM = cid("bokm");
3601
+ var CTRL_TCPS = cid("tcps");
3602
+ var CTRL_TDUT = cid("tdut");
3603
+ var CTRL_TCMT = cid("tcmt");
3604
+ var CTRL_SECD = cid("secd");
3605
+ var CTRL_COLD = cid("cold");
3606
+ var CTRL_FORM = cid("form");
3607
+ var CTRL_OLE = cid("ole ");
3608
+ var FIELD_HLK = cid("%hlk");
3609
+ var FIELD_CLK = cid("%clk");
3610
+ var KNOWN_CTRL_IDS = /* @__PURE__ */ new Set([
3611
+ CTRL_TBL,
3612
+ CTRL_GSO,
3613
+ CTRL_EQED,
3614
+ CTRL_HEAD,
3615
+ CTRL_FOOT,
3616
+ CTRL_FN,
3617
+ CTRL_EN,
3618
+ CTRL_ATNO,
3619
+ CTRL_NWNO,
3620
+ CTRL_PGNP,
3621
+ CTRL_PGHD,
3622
+ CTRL_IDXM,
3623
+ CTRL_BOKM,
3624
+ CTRL_TCPS,
3625
+ CTRL_TDUT,
3626
+ CTRL_TCMT,
3627
+ CTRL_SECD,
3628
+ CTRL_COLD,
3629
+ CTRL_FORM,
3630
+ CTRL_OLE
3631
+ ]);
3632
+ function isFieldCtrlId(id) {
3633
+ return id >>> 24 === 37;
3634
+ }
3635
+ function swap32(id) {
3636
+ return ((id & 255) << 24 | (id >>> 8 & 255) << 16 | (id >>> 16 & 255) << 8 | id >>> 24 & 255) >>> 0;
3637
+ }
3638
+ function normalizeCtrlId(raw) {
3639
+ if (KNOWN_CTRL_IDS.has(raw) || isFieldCtrlId(raw)) return raw;
3640
+ const sw = swap32(raw);
3641
+ if (KNOWN_CTRL_IDS.has(sw) || isFieldCtrlId(sw)) return sw;
3642
+ return raw;
3643
+ }
2754
3644
  function parseHwp5Document(buffer, options) {
2755
3645
  let cfb = null;
2756
3646
  let lenientCfb = null;
@@ -2790,8 +3680,8 @@ function parseHwp5Document(buffer, options) {
2790
3680
  metadata.pageCount = sections.length;
2791
3681
  const pageFilter = options?.pages ? parsePageRange(options.pages, sections.length) : null;
2792
3682
  const totalTarget = pageFilter ? pageFilter.size : sections.length;
2793
- const blocks = [];
2794
- const nestedTableCounter = { count: 0 };
3683
+ const bodyBlocks = [];
3684
+ const doc = createHwp5DocState();
2795
3685
  let totalDecompressed = 0;
2796
3686
  let parsedSections = 0;
2797
3687
  for (let si = 0; si < sections.length; si++) {
@@ -2802,8 +3692,8 @@ function parseHwp5Document(buffer, options) {
2802
3692
  totalDecompressed += data.length;
2803
3693
  if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new KordocError("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
2804
3694
  const records = readRecords(data);
2805
- const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, nestedTableCounter);
2806
- blocks.push(...sectionBlocks);
3695
+ const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, doc);
3696
+ bodyBlocks.push(...sectionBlocks);
2807
3697
  parsedSections++;
2808
3698
  options?.onProgress?.(parsedSections, totalTarget);
2809
3699
  } catch (secErr) {
@@ -2811,7 +3701,8 @@ function parseHwp5Document(buffer, options) {
2811
3701
  warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
2812
3702
  }
2813
3703
  }
2814
- const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
3704
+ const blocks = [...doc.headerBlocks, ...bodyBlocks, ...doc.footerBlocks];
3705
+ const images = cfb ? extractHwp5Images(cfb.FileIndex, blocks, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, warnings);
2815
3706
  const flatBlocks = flattenLayoutTables(blocks);
2816
3707
  if (docInfo) {
2817
3708
  detectHwp5Headings(flatBlocks, docInfo);
@@ -2842,28 +3733,28 @@ function parseDocInfoFromStream(raw, compressed) {
2842
3733
  }
2843
3734
  function detectHwp5Headings(blocks, docInfo) {
2844
3735
  let baseFontSize = 0;
2845
- for (const style of docInfo.styles) {
2846
- const name = (style.nameKo || style.name).toLowerCase();
2847
- if (name.includes("\uBC14\uD0D5") || name.includes("\uBCF8\uBB38") || name === "normal" || name === "body") {
2848
- const cs = docInfo.charShapes[style.charShapeId];
2849
- if (cs?.fontSize > 0) {
2850
- baseFontSize = cs.fontSize / 10;
2851
- break;
2852
- }
3736
+ const sizeFreq = /* @__PURE__ */ new Map();
3737
+ for (const b of blocks) {
3738
+ if (b.style?.fontSize && b.text) {
3739
+ sizeFreq.set(b.style.fontSize, (sizeFreq.get(b.style.fontSize) || 0) + b.text.length);
2853
3740
  }
2854
3741
  }
2855
- if (baseFontSize === 0) {
2856
- const sizeFreq = /* @__PURE__ */ new Map();
2857
- for (const b of blocks) {
2858
- if (b.style?.fontSize) {
2859
- sizeFreq.set(b.style.fontSize, (sizeFreq.get(b.style.fontSize) || 0) + 1);
2860
- }
3742
+ let maxWeight = 0;
3743
+ for (const [size, weight] of sizeFreq) {
3744
+ if (weight > maxWeight) {
3745
+ maxWeight = weight;
3746
+ baseFontSize = size;
2861
3747
  }
2862
- let maxCount = 0;
2863
- for (const [size, count] of sizeFreq) {
2864
- if (count > maxCount) {
2865
- maxCount = count;
2866
- baseFontSize = size;
3748
+ }
3749
+ if (baseFontSize === 0) {
3750
+ for (const style of docInfo.styles) {
3751
+ const name = (style.nameKo || style.name).toLowerCase();
3752
+ if (name.includes("\uBC14\uD0D5") || name.includes("\uBCF8\uBB38") || name === "normal" || name === "body") {
3753
+ const cs = docInfo.charShapes[style.charShapeId];
3754
+ if (cs?.fontSize > 0) {
3755
+ baseFontSize = cs.fontSize / 10;
3756
+ break;
3757
+ }
2867
3758
  }
2868
3759
  }
2869
3760
  }
@@ -3001,414 +3892,397 @@ function findViewTextSectionsLenient(lcfb, compressed) {
3001
3892
  }
3002
3893
  return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
3003
3894
  }
3004
- var TAG_SHAPE_COMPONENT = 74;
3005
- var CTRL_ID_EQEDIT = "deqe";
3006
- function extractBinDataId(records, ctrlIdx) {
3007
- const ctrlLevel = records[ctrlIdx].level;
3008
- for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 50; j++) {
3009
- const r = records[j];
3010
- if (r.level <= ctrlLevel) break;
3011
- if (r.data.length >= 2) {
3012
- if (r.tagId > TAG_SHAPE_COMPONENT && r.level > ctrlLevel + 1 && r.data.length >= 4) {
3013
- const possibleId = r.data.readUInt16LE(0);
3014
- if (possibleId < 1e4) return possibleId;
3015
- }
3016
- }
3017
- }
3018
- return -1;
3019
- }
3020
- function isEquationControlId(ctrlId) {
3021
- return ctrlId === CTRL_ID_EQEDIT || ctrlId === "eqed";
3022
- }
3023
3895
  function formatEquationForMarkdown(equation) {
3024
3896
  const normalized = hwpEquationToLatex(equation);
3025
3897
  if (!normalized) return "";
3026
3898
  return `$${normalized.replace(/\$/g, "\\$")}$`;
3027
3899
  }
3028
- function extractEquationFromControl(records, ctrlIdx) {
3029
- const ctrlLevel = records[ctrlIdx].level;
3030
- for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 10; j++) {
3031
- const r = records[j];
3032
- if (r.level <= ctrlLevel) break;
3033
- if (r.tagId !== TAG_EQEDIT) continue;
3034
- const equation = extractEquationText(r.data);
3900
+ function extractEquationFromSlice(records, start, end) {
3901
+ for (let i = start; i < end; i++) {
3902
+ if (records[i].tagId !== TAG_EQEDIT) continue;
3903
+ const equation = extractEquationText(records[i].data);
3035
3904
  return equation ? formatEquationForMarkdown(equation) : null;
3036
3905
  }
3037
3906
  return null;
3038
3907
  }
3039
- function renderTextWithEquations(textRecords, equations) {
3040
- const queue = [...equations];
3041
- return textRecords.map((data) => extractTextWithControls(data, (ctrlId) => {
3042
- if (!isEquationControlId(ctrlId) || queue.length === 0) return null;
3043
- return queue.shift();
3044
- })).join("").replace(/\$\$/g, "$ $");
3908
+ function createHwp5DocState() {
3909
+ return {
3910
+ numbering: new NumberingState(),
3911
+ outlineNumberingId: 0,
3912
+ autoCounters: /* @__PURE__ */ new Map(),
3913
+ headerTexts: /* @__PURE__ */ new Set(),
3914
+ headerBlocks: [],
3915
+ footerBlocks: []
3916
+ };
3045
3917
  }
3046
- function detectImageMime(data) {
3047
- if (data.length < 4) return null;
3048
- if (data[0] === 137 && data[1] === 80 && data[2] === 78 && data[3] === 71) return "image/png";
3049
- if (data[0] === 255 && data[1] === 216 && data[2] === 255) return "image/jpeg";
3050
- if (data[0] === 71 && data[1] === 73 && data[2] === 70) return "image/gif";
3051
- if (data[0] === 66 && data[1] === 77) return "image/bmp";
3052
- if (data[0] === 215 && data[1] === 205 && data[2] === 198 && data[3] === 154) return "image/wmf";
3053
- if (data[0] === 1 && data[1] === 0 && data[2] === 0 && data[3] === 0) return "image/emf";
3054
- return null;
3918
+ function parseSection(records, docInfo, warnings, sectionNum, doc) {
3919
+ const ctx = { docInfo, warnings, sectionNum, doc: doc ?? createHwp5DocState(), depth: 0 };
3920
+ return parseParagraphList(records, 0, records.length, ctx);
3055
3921
  }
3056
- function extractHwp5Images(cfb, blocks, compressed, warnings) {
3057
- const binDataMap = /* @__PURE__ */ new Map();
3058
- const binDataRe = /\/BinData\/[Bb][Ii][Nn](\d{4})$/;
3059
- if (cfb.FileIndex) {
3060
- for (const entry of cfb.FileIndex) {
3061
- if (!entry?.name || !entry.content) continue;
3062
- const match = entry.name.match(binDataRe);
3063
- if (!match) continue;
3064
- const idx = parseInt(match[1], 10);
3065
- let data = Buffer.from(entry.content);
3066
- if (compressed) {
3067
- try {
3068
- data = decompressStream(data);
3069
- } catch {
3070
- }
3071
- }
3072
- binDataMap.set(idx, { data, name: entry.name });
3922
+ function parseParagraphList(records, start, end, ctx) {
3923
+ const blocks = [];
3924
+ let i = start;
3925
+ while (i < end) {
3926
+ if (records[i].tagId === TAG_PARA_HEADER) {
3927
+ const baseLevel = records[i].level;
3928
+ let j = i + 1;
3929
+ while (j < end && records[j].level > baseLevel) j++;
3930
+ blocks.push(...parseParagraph(records, i, j, ctx));
3931
+ i = j;
3932
+ } else {
3933
+ i++;
3073
3934
  }
3074
3935
  }
3075
- if (binDataMap.size === 0) return [];
3076
- const images = [];
3077
- let imageIndex = 0;
3078
- for (const block of blocks) {
3079
- if (block.type !== "image" || !block.text) continue;
3080
- const binId = parseInt(block.text, 10);
3081
- if (isNaN(binId)) continue;
3082
- const bin = binDataMap.get(binId);
3083
- if (!bin) {
3084
- warnings.push({ page: block.pageNumber, message: `BinData ${binId} \uC5C6\uC74C`, code: "SKIPPED_IMAGE" });
3085
- block.type = "paragraph";
3086
- block.text = `[\uC774\uBBF8\uC9C0: BinData ${binId}]`;
3936
+ return blocks;
3937
+ }
3938
+ function parseParagraph(records, start, end, ctx) {
3939
+ const header = records[start];
3940
+ const baseLevel = header.level;
3941
+ const paraShapeId = header.data.length >= 10 ? header.data.readUInt16LE(8) : -1;
3942
+ const textRecords = [];
3943
+ const charShapeIds = [];
3944
+ const ctrls = [];
3945
+ let i = start + 1;
3946
+ while (i < end) {
3947
+ const rec = records[i];
3948
+ if (rec.tagId === TAG_CTRL_HEADER && rec.level === baseLevel + 1 && rec.data.length >= 4) {
3949
+ const childStart = i + 1;
3950
+ let j = childStart;
3951
+ while (j < end && records[j].level > baseLevel + 1) j++;
3952
+ const idRaw = rec.data.readUInt32LE(0);
3953
+ ctrls.push({ id: normalizeCtrlId(idRaw), idRaw, data: rec.data, childStart, childEnd: j });
3954
+ i = j;
3087
3955
  continue;
3088
3956
  }
3089
- const mime = detectImageMime(bin.data);
3090
- if (!mime) {
3091
- warnings.push({ page: block.pageNumber, message: `BinData ${binId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
3092
- block.type = "paragraph";
3093
- block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
3094
- continue;
3095
- }
3096
- imageIndex++;
3097
- const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
3098
- const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
3099
- images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
3100
- block.text = filename;
3101
- block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
3102
- }
3103
- return images;
3104
- }
3105
- function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
3106
- const binDataMap = /* @__PURE__ */ new Map();
3107
- const binRe = /^BIN(\d{4})/i;
3108
- for (const e of lcfb.entries()) {
3109
- const match = e.name.match(binRe);
3110
- if (!match) continue;
3111
- const idx = parseInt(match[1], 10);
3112
- let raw = lcfb.findStream(e.name);
3113
- if (!raw) continue;
3114
- if (compressed) {
3115
- try {
3116
- raw = decompressStream(raw);
3117
- } catch {
3957
+ if (rec.tagId === TAG_PARA_TEXT && rec.level === baseLevel + 1) {
3958
+ textRecords.push(rec.data);
3959
+ } else if (rec.tagId === TAG_CHAR_SHAPE && rec.level === baseLevel + 1 && rec.data.length >= 8) {
3960
+ for (let offset = 0; offset + 7 < rec.data.length; offset += 8) {
3961
+ charShapeIds.push(rec.data.readUInt32LE(offset + 4));
3118
3962
  }
3119
3963
  }
3120
- binDataMap.set(idx, { data: raw, name: e.name });
3964
+ i++;
3121
3965
  }
3122
- if (binDataMap.size === 0) return [];
3123
- const images = [];
3124
- let imageIndex = 0;
3125
- for (const block of blocks) {
3126
- if (block.type !== "image" || !block.text) continue;
3127
- const binId = parseInt(block.text, 10);
3128
- if (isNaN(binId)) continue;
3129
- const bin = binDataMap.get(binId);
3130
- if (!bin) {
3131
- warnings.push({ page: block.pageNumber, message: `BinData ${binId} \uFFFD\uFFFD\uFFFD\uC74C`, code: "SKIPPED_IMAGE" });
3132
- block.type = "paragraph";
3133
- block.text = `[\uC774\uBBF8\uC9C0: BinData ${binId}]`;
3134
- continue;
3966
+ for (const ctrl of ctrls) {
3967
+ applyCtrlEffect(ctrl, records, ctx);
3968
+ }
3969
+ const state = createParaTextState();
3970
+ const resolver = (idx, id) => {
3971
+ let ctrl = idx >= 0 && idx < ctrls.length ? ctrls[idx] : void 0;
3972
+ if (!ctrl || ctrl.idRaw !== id && ctrl.id !== id) {
3973
+ ctrl = ctrls.find((c) => !c.resolved && (c.idRaw === id || c.id === id));
3135
3974
  }
3136
- const mime = detectImageMime(bin.data);
3137
- if (!mime) {
3138
- warnings.push({ page: block.pageNumber, message: `BinData ${binId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
3139
- block.type = "paragraph";
3140
- block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
3141
- continue;
3975
+ if (!ctrl) return null;
3976
+ ctrl.resolved = true;
3977
+ return ctrl.inlineText ?? null;
3978
+ };
3979
+ for (const data of textRecords) {
3980
+ appendParaText(state, data, resolver);
3981
+ }
3982
+ let text = state.text;
3983
+ if (state.fieldRanges.length > 0) {
3984
+ const ranges = [...state.fieldRanges].sort((a, b) => b.start - a.start);
3985
+ const applied = [];
3986
+ for (const r of ranges) {
3987
+ const ctrl = ctrls[r.ctrlIdx];
3988
+ if (!ctrl?.href || r.end <= r.start) continue;
3989
+ if (applied.some(([s, e]) => r.start < e && r.end > s)) continue;
3990
+ const href = sanitizeHref(ctrl.href);
3991
+ if (!href) continue;
3992
+ const anchor = text.slice(r.start, r.end);
3993
+ if (!anchor.trim()) continue;
3994
+ text = text.slice(0, r.start) + `[${anchor}](${href})` + text.slice(r.end);
3995
+ applied.push([r.start, r.end]);
3142
3996
  }
3143
- imageIndex++;
3144
- const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
3145
- const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
3146
- images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
3147
- block.text = filename;
3148
- block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
3149
3997
  }
3150
- return images;
3151
- }
3152
- function parseSection(records, docInfo, warnings, sectionNum, counter) {
3153
- const blocks = [];
3154
- let i = 0;
3155
- while (i < records.length) {
3156
- const rec = records[i];
3157
- if (rec.tagId === TAG_PARA_HEADER && rec.level === 0) {
3158
- const { paragraph, tables, nextIdx, charShapeIds, paraShapeId } = parseParagraphWithTables(records, i, counter);
3159
- if (paragraph) {
3160
- const block = { type: "paragraph", text: paragraph, pageNumber: sectionNum };
3161
- if (docInfo && charShapeIds.length > 0) {
3162
- const style = resolveCharStyle(charShapeIds, docInfo);
3163
- if (style) block.style = style;
3164
- }
3165
- if (docInfo && paraShapeId >= 0 && paraShapeId < docInfo.paraShapes.length) {
3166
- const ol = docInfo.paraShapes[paraShapeId].outlineLevel;
3167
- if (ol >= 1 && ol <= 6) {
3168
- block.type = "heading";
3169
- block.level = ol;
3170
- }
3171
- }
3172
- blocks.push(block);
3173
- }
3174
- for (const t of tables) blocks.push({ type: "table", table: t, pageNumber: sectionNum });
3175
- i = nextIdx;
3176
- continue;
3998
+ const trimmed = text.replace(/\$\$/g, "$ $").trim();
3999
+ let headingLevel = 0;
4000
+ let headMarker = null;
4001
+ const ps = ctx.docInfo && paraShapeId >= 0 && paraShapeId < ctx.docInfo.paraShapes.length ? ctx.docInfo.paraShapes[paraShapeId] : null;
4002
+ if (ps && ps.headType > 0) {
4003
+ if (ps.headType === 1) {
4004
+ headingLevel = Math.min(ps.paraLevel + 1, 6);
3177
4005
  }
3178
- if (rec.tagId === TAG_CTRL_HEADER && rec.level <= 1 && rec.data.length >= 4) {
3179
- const ctrlId = rec.data.subarray(0, 4).toString("ascii");
3180
- if (ctrlId === " lbt" || ctrlId === "tbl ") {
3181
- const { table, nextIdx } = parseTableBlock(records, i, counter);
3182
- if (table) blocks.push({ type: "table", table, pageNumber: sectionNum });
3183
- i = nextIdx;
3184
- continue;
3185
- }
3186
- if (ctrlId === "gso " || ctrlId === " osg") {
3187
- const binId = extractBinDataId(records, i);
3188
- if (binId >= 0) {
3189
- blocks.push({ type: "image", text: String(binId), pageNumber: sectionNum });
3190
- } else {
3191
- const boxText = extractTextBoxText(records, i);
3192
- if (boxText) {
3193
- blocks.push({ type: "paragraph", text: boxText, pageNumber: sectionNum });
3194
- }
3195
- }
3196
- } else if (ctrlId === " elo" || ctrlId === "ole ") {
3197
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
3198
- } else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
3199
- const noteText = extractNoteText(records, i);
3200
- if (noteText && blocks.length > 0) {
3201
- const lastBlock = blocks[blocks.length - 1];
3202
- if (lastBlock.type === "paragraph") {
3203
- lastBlock.footnoteText = lastBlock.footnoteText ? lastBlock.footnoteText + "; " + noteText : noteText;
3204
- }
3205
- }
3206
- } else if (ctrlId === "%tok" || ctrlId === "klnk") {
3207
- const url = extractHyperlinkUrl(rec.data);
3208
- if (url && blocks.length > 0) {
3209
- const lastBlock = blocks[blocks.length - 1];
3210
- if (lastBlock.type === "paragraph" && !lastBlock.href) {
3211
- lastBlock.href = sanitizeHref(url) ?? void 0;
3212
- }
4006
+ if (ps.headType === 1 || ps.headType === 2) {
4007
+ const nid = ps.numberingId || (ps.headType === 1 ? ctx.doc.outlineNumberingId : 0);
4008
+ const numbering = nid >= 1 ? ctx.docInfo?.numberings[nid - 1] : void 0;
4009
+ if (numbering) {
4010
+ const counters = ctx.doc.numbering.advance(nid, ps.paraLevel);
4011
+ const fmt = numbering.levelFormats[Math.min(ps.paraLevel, 6)];
4012
+ if (fmt) {
4013
+ const headText = expandNumberingFormat(fmt, counters, numbering);
4014
+ if (headText) headMarker = headText;
3213
4015
  }
3214
4016
  }
4017
+ } else if (ps.headType === 3) {
4018
+ const bullet = ps.numberingId >= 1 ? ctx.docInfo?.bullets[ps.numberingId - 1] : void 0;
4019
+ if (bullet && bullet.char !== "\uFFFF") headMarker = bullet.char;
3215
4020
  }
3216
- i++;
4021
+ }
4022
+ const blocks = [];
4023
+ const footnotes = ctrls.filter((c) => c.footnote).map((c) => c.footnote);
4024
+ if (trimmed) {
4025
+ const block = {
4026
+ type: headingLevel > 0 ? "heading" : "paragraph",
4027
+ text: headMarker ? `${headMarker} ${trimmed}` : trimmed,
4028
+ pageNumber: ctx.sectionNum
4029
+ };
4030
+ if (headingLevel > 0) block.level = headingLevel;
4031
+ if (ctx.docInfo && charShapeIds.length > 0) {
4032
+ const style = resolveCharStyle(charShapeIds, ctx.docInfo);
4033
+ if (style) block.style = style;
4034
+ }
4035
+ if (footnotes.length > 0) block.footnoteText = footnotes.join("; ");
4036
+ blocks.push(block);
4037
+ } else if (footnotes.length > 0) {
4038
+ blocks.push({ type: "paragraph", text: `(\uC8FC: ${footnotes.join("; ")})`, pageNumber: ctx.sectionNum });
4039
+ }
4040
+ for (const ctrl of ctrls) {
4041
+ if (ctrl.afterBlocks) blocks.push(...ctrl.afterBlocks);
3217
4042
  }
3218
4043
  return blocks;
3219
4044
  }
3220
- function extractNoteText(records, ctrlIdx) {
3221
- const ctrlLevel = records[ctrlIdx].level;
3222
- const texts = [];
3223
- let textRecords = [];
3224
- let equations = [];
3225
- const flushText = () => {
3226
- const text = renderTextWithEquations(textRecords, equations).trim();
3227
- if (text) texts.push(text);
3228
- textRecords = [];
3229
- equations = [];
3230
- };
3231
- for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 100; j++) {
3232
- const r = records[j];
3233
- if (r.level <= ctrlLevel) break;
3234
- if (r.tagId === TAG_PARA_HEADER) {
3235
- flushText();
3236
- }
3237
- if (r.tagId === TAG_PARA_TEXT) {
3238
- textRecords.push(r.data);
3239
- }
3240
- if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
3241
- const ctrlId = r.data.subarray(0, 4).toString("ascii");
3242
- if (isEquationControlId(ctrlId)) {
3243
- const equation = extractEquationFromControl(records, j);
3244
- if (equation) equations.push(equation);
4045
+ function applyCtrlEffect(ctrl, records, ctx) {
4046
+ switch (ctrl.id) {
4047
+ case CTRL_TBL: {
4048
+ const table = parseTableControl(ctrl, records, ctx);
4049
+ if (table) ctrl.afterBlocks = [{ type: "table", table, pageNumber: ctx.sectionNum }];
4050
+ return;
4051
+ }
4052
+ case CTRL_GSO: {
4053
+ const blocks = parseGsoControl(ctrl, records, ctx);
4054
+ if (blocks.length > 0) ctrl.afterBlocks = blocks;
4055
+ return;
4056
+ }
4057
+ case CTRL_EQED: {
4058
+ const eq = extractEquationFromSlice(records, ctrl.childStart, ctrl.childEnd);
4059
+ if (eq) ctrl.inlineText = eq;
4060
+ return;
4061
+ }
4062
+ case CTRL_FN:
4063
+ case CTRL_EN: {
4064
+ applyNoteEffect(ctrl, records, ctx, ctrl.id === CTRL_FN ? 1 : 2);
4065
+ return;
4066
+ }
4067
+ case CTRL_HEAD:
4068
+ case CTRL_FOOT: {
4069
+ applyHeaderFooterEffect(ctrl, records, ctx, ctrl.id === CTRL_HEAD);
4070
+ return;
4071
+ }
4072
+ case CTRL_ATNO: {
4073
+ if (ctrl.data.length >= 8) {
4074
+ const attr = ctrl.data.readUInt32LE(4);
4075
+ const type = attr & 15;
4076
+ const format = attr >>> 4 & 255;
4077
+ const num = ctx.doc.autoCounters.get(type) ?? 1;
4078
+ ctx.doc.autoCounters.set(type, num + 1);
4079
+ const prefix = ctrl.data.length >= 14 ? wcharAt(ctrl.data, 12) : "";
4080
+ const suffix = ctrl.data.length >= 16 ? wcharAt(ctrl.data, 14) : "";
4081
+ ctrl.inlineText = `${prefix}${formatNumber(num, shapeFormatToNumFmt(format))}${suffix}`;
3245
4082
  }
4083
+ return;
3246
4084
  }
3247
- }
3248
- flushText();
3249
- return texts.length > 0 ? texts.join(" ") : null;
3250
- }
3251
- function extractTextBoxText(records, ctrlIdx) {
3252
- const ctrlLevel = records[ctrlIdx].level;
3253
- const texts = [];
3254
- let textRecords = [];
3255
- let equations = [];
3256
- const flushText = () => {
3257
- const text = renderTextWithEquations(textRecords, equations).trim();
3258
- if (text) texts.push(text);
3259
- textRecords = [];
3260
- equations = [];
3261
- };
3262
- for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 200; j++) {
3263
- const r = records[j];
3264
- if (r.level <= ctrlLevel) break;
3265
- if (r.tagId === TAG_PARA_HEADER) {
3266
- flushText();
3267
- }
3268
- if (r.tagId === TAG_PARA_TEXT) {
3269
- textRecords.push(r.data);
3270
- }
3271
- if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
3272
- const ctrlId = r.data.subarray(0, 4).toString("ascii");
3273
- if (isEquationControlId(ctrlId)) {
3274
- const equation = extractEquationFromControl(records, j);
3275
- if (equation) equations.push(equation);
4085
+ case CTRL_NWNO: {
4086
+ if (ctrl.data.length >= 10) {
4087
+ const attr = ctrl.data.readUInt32LE(4);
4088
+ const type = attr & 15;
4089
+ const num = ctrl.data.readUInt16LE(8);
4090
+ if (num > 0) ctx.doc.autoCounters.set(type, num);
3276
4091
  }
4092
+ return;
3277
4093
  }
3278
- }
3279
- flushText();
3280
- return texts.length > 0 ? texts.join("\n") : null;
3281
- }
3282
- function extractHyperlinkUrl(data) {
3283
- try {
3284
- const httpSig = Buffer.from("http", "utf16le");
3285
- const idx = data.indexOf(httpSig);
3286
- if (idx >= 0) {
3287
- let end = idx;
3288
- while (end + 1 < data.length) {
3289
- const ch = data.readUInt16LE(end);
3290
- if (ch === 0) break;
3291
- end += 2;
4094
+ case CTRL_SECD: {
4095
+ if (ctrl.data.length >= 20) {
4096
+ ctx.doc.outlineNumberingId = ctrl.data.readUInt16LE(18);
3292
4097
  }
3293
- const url = data.subarray(idx, end).toString("utf16le");
3294
- if (/^https?:\/\/.+/.test(url) && url.length < 2e3) {
3295
- return url;
4098
+ return;
4099
+ }
4100
+ case CTRL_OLE: {
4101
+ ctx.warnings.push({ page: ctx.sectionNum, message: "\uC2A4\uD0B5\uB41C OLE \uAC1C\uCCB4", code: "SKIPPED_OLE" });
4102
+ return;
4103
+ }
4104
+ // 숨은 설명/단 정의/쪽번호 위치/감추기/찾아보기/책갈피/글자겹침/덧말 — 본문 텍스트 없음 또는 의도적 스킵
4105
+ case CTRL_TCMT:
4106
+ case CTRL_COLD:
4107
+ case CTRL_PGNP:
4108
+ case CTRL_PGHD:
4109
+ case CTRL_IDXM:
4110
+ case CTRL_BOKM:
4111
+ case CTRL_TCPS:
4112
+ case CTRL_TDUT:
4113
+ case CTRL_FORM:
4114
+ return;
4115
+ default: {
4116
+ if (isFieldCtrlId(ctrl.id)) {
4117
+ applyFieldEffect(ctrl);
4118
+ return;
3296
4119
  }
4120
+ const blocks = parseListHeaderParagraphs(ctrl, records, ctx);
4121
+ if (blocks.length > 0) ctrl.afterBlocks = blocks;
3297
4122
  }
3298
- } catch {
3299
4123
  }
3300
- return null;
3301
4124
  }
3302
- function resolveCharStyle(charShapeIds, docInfo) {
3303
- if (charShapeIds.length === 0 || docInfo.charShapes.length === 0) return void 0;
3304
- const freq = /* @__PURE__ */ new Map();
3305
- let maxCount = 0, dominantId = charShapeIds[0];
3306
- for (const id of charShapeIds) {
3307
- const count = (freq.get(id) || 0) + 1;
3308
- freq.set(id, count);
3309
- if (count > maxCount) {
3310
- maxCount = count;
3311
- dominantId = id;
4125
+ function wcharAt(data, offset) {
4126
+ const code = data.readUInt16LE(offset);
4127
+ return code > 0 ? String.fromCharCode(code) : "";
4128
+ }
4129
+ function parseListHeaderParagraphs(ctrl, records, ctx) {
4130
+ if (ctx.depth >= MAX_NEST_DEPTH) return [];
4131
+ for (let i = ctrl.childStart; i < ctrl.childEnd; i++) {
4132
+ if (records[i].tagId === TAG_LIST_HEADER) {
4133
+ return parseParagraphList(records, i + 1, ctrl.childEnd, { ...ctx, depth: ctx.depth + 1 });
3312
4134
  }
3313
4135
  }
3314
- const cs = docInfo.charShapes[dominantId];
3315
- if (!cs) return void 0;
3316
- const style = {};
3317
- if (cs.fontSize > 0) style.fontSize = cs.fontSize / 10;
3318
- if (cs.attrFlags & 1) style.italic = true;
3319
- if (cs.attrFlags & 2) style.bold = true;
3320
- return style.fontSize || style.bold || style.italic ? style : void 0;
4136
+ return [];
3321
4137
  }
3322
- function parseParagraphWithTables(records, startIdx, counter) {
3323
- const startLevel = records[startIdx].level;
3324
- const textRecords = [];
3325
- const equations = [];
3326
- const tables = [];
3327
- const charShapeIds = [];
3328
- const paraHeaderData = records[startIdx].data;
3329
- const paraShapeId = paraHeaderData.length >= 10 ? paraHeaderData.readUInt16LE(8) : -1;
3330
- let i = startIdx + 1;
3331
- while (i < records.length) {
3332
- const rec = records[i];
3333
- if (rec.tagId === TAG_PARA_HEADER && rec.level <= startLevel) break;
3334
- if (rec.tagId === TAG_PARA_TEXT) {
3335
- textRecords.push(rec.data);
4138
+ function blocksPlainText(blocks, sep) {
4139
+ const parts = [];
4140
+ for (const b of blocks) {
4141
+ if (b.type === "image") continue;
4142
+ if (b.type === "table") continue;
4143
+ if (b.text) {
4144
+ let t = b.text;
4145
+ if (b.footnoteText) t += ` (\uC8FC: ${b.footnoteText})`;
4146
+ parts.push(t);
3336
4147
  }
3337
- if (rec.tagId === TAG_CHAR_SHAPE && rec.data.length >= 8) {
3338
- for (let offset = 0; offset + 7 < rec.data.length; offset += 8) {
3339
- charShapeIds.push(rec.data.readUInt32LE(offset + 4));
3340
- }
4148
+ }
4149
+ return parts.join(sep).trim();
4150
+ }
4151
+ function applyNoteEffect(ctrl, records, ctx, autoType) {
4152
+ const num = ctx.doc.autoCounters.get(autoType) ?? 1;
4153
+ let before = "";
4154
+ let after = "";
4155
+ let shape = 0;
4156
+ if (ctrl.data.length >= 12) {
4157
+ before = wcharAt(ctrl.data, 8);
4158
+ after = wcharAt(ctrl.data, 10);
4159
+ }
4160
+ if (ctrl.data.length >= 16) {
4161
+ shape = ctrl.data.readUInt32LE(12) & 255;
4162
+ }
4163
+ const formatted = formatNumber(num, shapeFormatToNumFmt(shape));
4164
+ const marker = before || after ? `${before}${formatted}${after}` : `${formatted})`;
4165
+ const content = blocksPlainText(parseListHeaderParagraphs(ctrl, records, ctx), " ");
4166
+ if ((ctx.doc.autoCounters.get(autoType) ?? 1) <= num) {
4167
+ ctx.doc.autoCounters.set(autoType, num + 1);
4168
+ }
4169
+ ctrl.inlineText = marker;
4170
+ if (content) ctrl.footnote = content.startsWith(marker) ? content : `${marker} ${content}`;
4171
+ }
4172
+ function applyHeaderFooterEffect(ctrl, records, ctx, isHeader) {
4173
+ const text = blocksPlainText(parseListHeaderParagraphs(ctrl, records, ctx), "\n");
4174
+ if (!text) return;
4175
+ const key = (isHeader ? "h:" : "f:") + text;
4176
+ if (ctx.doc.headerTexts.has(key)) return;
4177
+ ctx.doc.headerTexts.add(key);
4178
+ const block = { type: "paragraph", text, pageNumber: ctx.sectionNum };
4179
+ if (isHeader) ctx.doc.headerBlocks.push(block);
4180
+ else ctx.doc.footerBlocks.push(block);
4181
+ }
4182
+ function applyFieldEffect(ctrl) {
4183
+ if (ctrl.id === FIELD_HLK) {
4184
+ const command = parseFieldCommand(ctrl.data);
4185
+ if (command) {
4186
+ const url = hyperlinkUrlFromCommand(command);
4187
+ if (url) ctrl.href = url;
3341
4188
  }
3342
- if (rec.tagId === TAG_CTRL_HEADER && rec.data.length >= 4) {
3343
- const ctrlId = rec.data.subarray(0, 4).toString("ascii");
3344
- if (isEquationControlId(ctrlId)) {
3345
- const equation = extractEquationFromControl(records, i);
3346
- if (equation) equations.push(equation);
3347
- } else if (ctrlId === " lbt" || ctrlId === "tbl ") {
3348
- const { table, nextIdx } = parseTableBlock(records, i, counter);
3349
- if (table) tables.push(table);
3350
- i = nextIdx;
3351
- continue;
3352
- }
4189
+ }
4190
+ }
4191
+ function parseFieldCommand(data) {
4192
+ if (data.length < 11) return null;
4193
+ const cmdLen = data.readUInt16LE(9);
4194
+ if (cmdLen === 0) return null;
4195
+ const start = 11;
4196
+ const end = start + cmdLen * 2;
4197
+ if (end > data.length) return null;
4198
+ return data.subarray(start, end).toString("utf16le").replace(/\0+$/, "");
4199
+ }
4200
+ function hyperlinkUrlFromCommand(command) {
4201
+ let url = "";
4202
+ for (let i = 0; i < command.length; i++) {
4203
+ const c = command[i];
4204
+ if (c === "\\" && i + 1 < command.length) {
4205
+ url += command[i + 1];
4206
+ i++;
4207
+ continue;
3353
4208
  }
3354
- i++;
4209
+ if (c === ";") break;
4210
+ url += c;
3355
4211
  }
3356
- const text = renderTextWithEquations(textRecords, equations);
3357
- const trimmed = text.trim();
3358
- return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds, paraShapeId };
4212
+ url = url.trim();
4213
+ return url.length > 0 && url.length < 2e3 ? url : null;
3359
4214
  }
3360
- function parseTableBlock(records, startIdx, counter) {
3361
- const tableLevel = records[startIdx].level;
3362
- let i = startIdx + 1;
3363
- let rows = 0, cols = 0;
4215
+ function parseTableControl(ctrl, records, ctx) {
4216
+ if (ctx.depth >= MAX_NEST_DEPTH) return null;
4217
+ const { childStart, childEnd } = ctrl;
4218
+ let rows = 0;
4219
+ let cols = 0;
4220
+ let tableIdx = -1;
4221
+ for (let i2 = childStart; i2 < childEnd; i2++) {
4222
+ if (records[i2].tagId === TAG_TABLE && records[i2].data.length >= 8) {
4223
+ rows = Math.min(records[i2].data.readUInt16LE(4), MAX_ROWS);
4224
+ cols = Math.min(records[i2].data.readUInt16LE(6), MAX_COLS);
4225
+ tableIdx = i2;
4226
+ break;
4227
+ }
4228
+ }
4229
+ if (tableIdx < 0 || rows === 0 || cols === 0) return null;
4230
+ let caption;
4231
+ for (let i2 = childStart; i2 < tableIdx; i2++) {
4232
+ if (records[i2].tagId === TAG_LIST_HEADER) {
4233
+ const capBlocks = parseParagraphList(records, i2 + 1, tableIdx, { ...ctx, depth: ctx.depth + 1 });
4234
+ const capText = blocksPlainText(capBlocks, " ");
4235
+ if (capText) caption = capText;
4236
+ break;
4237
+ }
4238
+ }
3364
4239
  const cells = [];
3365
- while (i < records.length) {
4240
+ let i = tableIdx + 1;
4241
+ while (i < childEnd) {
3366
4242
  const rec = records[i];
3367
- if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
3368
- if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
3369
- if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
3370
- rows = Math.min(rec.data.readUInt16LE(4), MAX_ROWS);
3371
- cols = Math.min(rec.data.readUInt16LE(6), MAX_COLS);
3372
- }
3373
4243
  if (rec.tagId === TAG_LIST_HEADER) {
3374
- const { cell, nextIdx } = parseCellBlock(records, i, tableLevel, counter);
3375
- if (cell) cells.push(cell);
3376
- i = nextIdx;
4244
+ const cellLevel = rec.level;
4245
+ let j = i + 1;
4246
+ while (j < childEnd) {
4247
+ const r = records[j];
4248
+ if (r.level < cellLevel) break;
4249
+ if (r.level === cellLevel && (r.tagId === TAG_LIST_HEADER || r.tagId === TAG_TABLE)) break;
4250
+ j++;
4251
+ }
4252
+ cells.push(parseCell(records, i, j, ctx));
4253
+ i = j;
3377
4254
  continue;
3378
4255
  }
3379
4256
  i++;
3380
4257
  }
3381
- if (rows === 0 || cols === 0 || cells.length === 0) return { table: null, nextIdx: i };
4258
+ if (cells.length === 0) return null;
3382
4259
  const hasAddr = cells.some((c) => c.colAddr !== void 0 && c.rowAddr !== void 0);
3383
4260
  if (hasAddr) {
3384
4261
  const cellRows2 = arrangeCells(rows, cols, cells);
3385
- const irCells = cellRows2.map((row) => row.map((c) => ({
3386
- text: c.text.trim(),
3387
- colSpan: c.colSpan,
3388
- rowSpan: c.rowSpan
3389
- })));
3390
- return { table: { rows, cols, cells: irCells, hasHeader: rows > 1 }, nextIdx: i };
4262
+ const irCells = cellRows2.map((row) => row.map((c) => {
4263
+ const ir = { text: c.text.trim(), colSpan: c.colSpan, rowSpan: c.rowSpan };
4264
+ if (c.blocks?.length) ir.blocks = c.blocks;
4265
+ if (c.isHeader) ir.isHeader = true;
4266
+ return ir;
4267
+ }));
4268
+ const table2 = { rows, cols, cells: irCells, hasHeader: rows > 1 };
4269
+ if (caption) table2.caption = caption;
4270
+ return table2;
3391
4271
  }
3392
4272
  const cellRows = arrangeCells(rows, cols, cells);
3393
- return { table: buildTable(cellRows), nextIdx: i };
3394
- }
3395
- function parseCellBlock(records, startIdx, tableLevel, counter) {
3396
- const rec = records[startIdx];
3397
- const cellLevel = rec.level;
3398
- const texts = [];
3399
- let textRecords = [];
3400
- let equations = [];
3401
- const flushText = () => {
3402
- const text = renderTextWithEquations(textRecords, equations).trim();
3403
- if (text) texts.push(text);
3404
- textRecords = [];
3405
- equations = [];
3406
- };
4273
+ const table = buildTable(cellRows);
4274
+ if (caption && table.rows > 0) table.caption = caption;
4275
+ return table.rows > 0 ? table : null;
4276
+ }
4277
+ function parseCell(records, lhIdx, end, ctx) {
4278
+ const rec = records[lhIdx];
3407
4279
  let colSpan = 1;
3408
4280
  let rowSpan = 1;
3409
4281
  let colAddr;
3410
4282
  let rowAddr;
4283
+ let isHeader = false;
3411
4284
  if (rec.data.length >= 16) {
4285
+ isHeader = (rec.data.readUInt16LE(6) & 4) !== 0;
3412
4286
  colAddr = rec.data.readUInt16LE(8);
3413
4287
  rowAddr = rec.data.readUInt16LE(10);
3414
4288
  const cs = rec.data.readUInt16LE(12);
@@ -3416,36 +4290,30 @@ function parseCellBlock(records, startIdx, tableLevel, counter) {
3416
4290
  if (cs > 0) colSpan = Math.min(cs, MAX_COLS);
3417
4291
  if (rs > 0) rowSpan = Math.min(rs, MAX_ROWS);
3418
4292
  }
3419
- let i = startIdx + 1;
3420
- while (i < records.length) {
3421
- const r = records[i];
3422
- if (r.tagId === TAG_LIST_HEADER && r.level <= cellLevel) break;
3423
- if (r.level <= tableLevel && (r.tagId === TAG_PARA_HEADER || r.tagId === TAG_CTRL_HEADER)) break;
3424
- if (r.tagId === TAG_PARA_HEADER) {
3425
- flushText();
3426
- }
3427
- if (r.tagId === TAG_PARA_TEXT) {
3428
- textRecords.push(r.data);
3429
- }
3430
- if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
3431
- const ctrlId = r.data.subarray(0, 4).toString("ascii");
3432
- if (isEquationControlId(ctrlId)) {
3433
- const equation = extractEquationFromControl(records, i);
3434
- if (equation) equations.push(equation);
3435
- } else if (ctrlId === " lbt" || ctrlId === "tbl ") {
3436
- flushText();
3437
- if (counter) {
3438
- counter.count++;
3439
- texts.push(`[\uC911\uCCA9 \uD14C\uC774\uBE14 #${counter.count}]`);
3440
- } else {
3441
- texts.push("[\uC911\uCCA9 \uD14C\uC774\uBE14]");
3442
- }
4293
+ const blocks = ctx.depth < MAX_NEST_DEPTH ? parseParagraphList(records, lhIdx + 1, end, { ...ctx, depth: ctx.depth + 1 }) : [];
4294
+ const parts = [];
4295
+ let hasStructure = false;
4296
+ for (const b of blocks) {
4297
+ if (b.type === "image" && b.text) {
4298
+ parts.push(`![image](hwp5bin:${b.text})`);
4299
+ hasStructure = true;
4300
+ } else if (b.type === "table" && b.table) {
4301
+ const flat = convertTableToText(b.table.cells);
4302
+ if (flat) parts.push(flat);
4303
+ hasStructure = true;
4304
+ } else if (b.text) {
4305
+ let t = b.text;
4306
+ if (b.footnoteText) {
4307
+ t += ` (\uC8FC: ${b.footnoteText})`;
4308
+ hasStructure = true;
3443
4309
  }
4310
+ parts.push(t);
3444
4311
  }
3445
- i++;
3446
4312
  }
3447
- flushText();
3448
- return { cell: { text: texts.join("\n"), colSpan, rowSpan, colAddr, rowAddr }, nextIdx: i };
4313
+ const cell = { text: parts.join("\n"), colSpan, rowSpan, colAddr, rowAddr };
4314
+ if (hasStructure && blocks.length > 0) cell.blocks = blocks;
4315
+ if (isHeader) cell.isHeader = true;
4316
+ return cell;
3449
4317
  }
3450
4318
  function arrangeCells(rows, cols, cells) {
3451
4319
  const grid = Array.from({ length: rows }, () => Array(cols).fill(null));
@@ -3483,6 +4351,78 @@ function arrangeCells(rows, cols, cells) {
3483
4351
  }
3484
4352
  return grid.map((row) => row.map((c) => c || { text: "", colSpan: 1, rowSpan: 1 }));
3485
4353
  }
4354
+ function parseGsoControl(ctrl, records, ctx) {
4355
+ if (ctx.depth >= MAX_NEST_DEPTH) return [];
4356
+ const { childStart, childEnd } = ctrl;
4357
+ const blocks = [];
4358
+ let scIdx = -1;
4359
+ for (let i = childStart; i < childEnd; i++) {
4360
+ const t = records[i].tagId;
4361
+ if (t === TAG_SHAPE_COMPONENT || t === TAG_SHAPE_COMPONENT_CONTAINER) {
4362
+ scIdx = i;
4363
+ break;
4364
+ }
4365
+ }
4366
+ if (scIdx > childStart) {
4367
+ for (let i = childStart; i < scIdx; i++) {
4368
+ if (records[i].tagId === TAG_LIST_HEADER) {
4369
+ blocks.push(...parseParagraphList(records, i + 1, scIdx, { ...ctx, depth: ctx.depth + 1 }));
4370
+ break;
4371
+ }
4372
+ }
4373
+ }
4374
+ const scanStart = scIdx >= 0 ? scIdx + 1 : childStart;
4375
+ let textListIdx = -1;
4376
+ for (let i = scanStart; i < childEnd; i++) {
4377
+ if (records[i].tagId === TAG_LIST_HEADER) {
4378
+ textListIdx = i;
4379
+ break;
4380
+ }
4381
+ }
4382
+ const picEnd = textListIdx >= 0 ? textListIdx : childEnd;
4383
+ for (let i = scanStart; i < picEnd; i++) {
4384
+ if (records[i].tagId === TAG_SHAPE_COMPONENT_PICTURE) {
4385
+ const img = pictureToImageBlock(records[i].data, ctx);
4386
+ if (img) blocks.push(img);
4387
+ }
4388
+ }
4389
+ if (textListIdx >= 0) {
4390
+ blocks.push(...parseParagraphList(records, textListIdx + 1, childEnd, { ...ctx, depth: ctx.depth + 1 }));
4391
+ }
4392
+ return blocks;
4393
+ }
4394
+ function pictureToImageBlock(data, ctx) {
4395
+ if (data.length < 73) return null;
4396
+ const binDataId = data.readUInt16LE(71);
4397
+ if (binDataId === 0) return null;
4398
+ const item = ctx.docInfo?.binData[binDataId - 1];
4399
+ if (item?.kind === "link") {
4400
+ ctx.warnings.push({ page: ctx.sectionNum, message: `\uC678\uBD80 \uC5F0\uACB0 \uC774\uBBF8\uC9C0 (binDataId ${binDataId})`, code: "SKIPPED_IMAGE" });
4401
+ return null;
4402
+ }
4403
+ const storageId = item && item.storageId > 0 ? item.storageId : binDataId;
4404
+ return { type: "image", text: String(storageId), pageNumber: ctx.sectionNum };
4405
+ }
4406
+ function resolveCharStyle(charShapeIds, docInfo) {
4407
+ if (charShapeIds.length === 0 || docInfo.charShapes.length === 0) return void 0;
4408
+ const freq = /* @__PURE__ */ new Map();
4409
+ let maxCount = 0, dominantId = charShapeIds[0];
4410
+ for (const id of charShapeIds) {
4411
+ const count = (freq.get(id) || 0) + 1;
4412
+ freq.set(id, count);
4413
+ if (count > maxCount) {
4414
+ maxCount = count;
4415
+ dominantId = id;
4416
+ }
4417
+ }
4418
+ const cs = docInfo.charShapes[dominantId];
4419
+ if (!cs) return void 0;
4420
+ const style = {};
4421
+ if (cs.fontSize > 0) style.fontSize = cs.fontSize / 10;
4422
+ if (cs.attrFlags & 1) style.italic = true;
4423
+ if (cs.attrFlags & 2) style.bold = true;
4424
+ return style.fontSize || style.bold || style.italic ? style : void 0;
4425
+ }
3486
4426
 
3487
4427
  // src/hwp3/parser.ts
3488
4428
  import { inflateRawSync as inflateRawSync3 } from "zlib";
@@ -15571,7 +16511,7 @@ function parseHwp3Document(buffer, _options) {
15571
16511
  const ctx = { paragraphs: [], warnings };
15572
16512
  try {
15573
16513
  skipFontFacesAndStyles(bodyReader);
15574
- parseParagraphList(bodyReader, ctx);
16514
+ parseParagraphList2(bodyReader, ctx);
15575
16515
  } catch (err) {
15576
16516
  warnings.push({
15577
16517
  code: "PARTIAL_PARSE",
@@ -15603,7 +16543,7 @@ function skipFontFacesAndStyles(reader) {
15603
16543
  const nStyles = reader.readU16();
15604
16544
  reader.skip(nStyles * STYLE_RECORD_SIZE);
15605
16545
  }
15606
- function parseParagraphList(reader, ctx) {
16546
+ function parseParagraphList2(reader, ctx) {
15607
16547
  for (; ; ) {
15608
16548
  if (reader.eof()) return;
15609
16549
  const followPrev = reader.readU8();
@@ -15688,17 +16628,17 @@ function parseCharStream(reader, charCount, ctx) {
15688
16628
  break;
15689
16629
  case 15: {
15690
16630
  reader.skip(8);
15691
- parseParagraphList(reader, ctx);
16631
+ parseParagraphList2(reader, ctx);
15692
16632
  break;
15693
16633
  }
15694
16634
  case 16: {
15695
16635
  reader.skip(10);
15696
- parseParagraphList(reader, ctx);
16636
+ parseParagraphList2(reader, ctx);
15697
16637
  break;
15698
16638
  }
15699
16639
  case 17: {
15700
16640
  reader.skip(14);
15701
- parseParagraphList(reader, ctx);
16641
+ parseParagraphList2(reader, ctx);
15702
16642
  break;
15703
16643
  }
15704
16644
  case 29:
@@ -15728,9 +16668,9 @@ function parseTableLike(reader, ctx) {
15728
16668
  }
15729
16669
  reader.skip(27 * cellCount);
15730
16670
  for (let i = 0; i < cellCount; i++) {
15731
- parseParagraphList(reader, ctx);
16671
+ parseParagraphList2(reader, ctx);
15732
16672
  }
15733
- parseParagraphList(reader, ctx);
16673
+ parseParagraphList2(reader, ctx);
15734
16674
  return "";
15735
16675
  }
15736
16676
  function parsePicture(reader, _ctx) {
@@ -17194,7 +18134,7 @@ function extractRun(r) {
17194
18134
  }
17195
18135
  return { text, bold, italic };
17196
18136
  }
17197
- function parseParagraph(p, styles, numbering, footnotes, rels) {
18137
+ function parseParagraph2(p, styles, numbering, footnotes, rels) {
17198
18138
  const pPrEls = getChildElements(p, "pPr");
17199
18139
  let styleId = "";
17200
18140
  let numId = "";
@@ -17310,7 +18250,7 @@ function parseTable(tbl, styles, numbering, footnotes, rels) {
17310
18250
  const cellTexts = [];
17311
18251
  const pElements = getChildElements(tc, "p");
17312
18252
  for (const p of pElements) {
17313
- const block = parseParagraph(p, styles, numbering, footnotes, rels);
18253
+ const block = parseParagraph2(p, styles, numbering, footnotes, rels);
17314
18254
  if (block?.text) cellTexts.push(block.text);
17315
18255
  }
17316
18256
  row.push({ text: cellTexts.join("\n"), colSpan, rowSpan });
@@ -17439,7 +18379,7 @@ async function parseDocxDocument(buffer, options) {
17439
18379
  const el = node;
17440
18380
  const localName3 = el.localName ?? el.tagName?.split(":").pop();
17441
18381
  if (localName3 === "p") {
17442
- const block = parseParagraph(el, styles, numbering, footnotes, rels);
18382
+ const block = parseParagraph2(el, styles, numbering, footnotes, rels);
17443
18383
  if (block) blocks.push(block);
17444
18384
  } else if (localName3 === "tbl") {
17445
18385
  const block = parseTable(el, styles, numbering, footnotes, rels);
@@ -17579,7 +18519,7 @@ function walkContent(node, blocks, paraShapeMap, sectionNum, warnings, inHeaderF
17579
18519
  }
17580
18520
  if (tag === "P") {
17581
18521
  if (!inHeaderFooter) {
17582
- parseParagraph2(el, blocks, paraShapeMap, sectionNum);
18522
+ parseParagraph3(el, blocks, paraShapeMap, sectionNum);
17583
18523
  }
17584
18524
  continue;
17585
18525
  }
@@ -17596,7 +18536,7 @@ function walkContent(node, blocks, paraShapeMap, sectionNum, warnings, inHeaderF
17596
18536
  walkContent(el, blocks, paraShapeMap, sectionNum, warnings, inHeaderFooter, depth + 1);
17597
18537
  }
17598
18538
  }
17599
- function parseParagraph2(el, blocks, paraShapeMap, sectionNum) {
18539
+ function parseParagraph3(el, blocks, paraShapeMap, sectionNum) {
17600
18540
  const paraShapeId = el.getAttribute("ParaShape") ?? "";
17601
18541
  const shapeInfo = paraShapeMap.get(paraShapeId);
17602
18542
  const text = extractParagraphText(el);
@@ -19041,6 +19981,1490 @@ function diffTableCells(a, b) {
19041
19981
  return result;
19042
19982
  }
19043
19983
 
19984
+ // src/roundtrip/patcher.ts
19985
+ import JSZip7 from "jszip";
19986
+
19987
+ // src/roundtrip/source-map.ts
19988
+ function escapeXmlText(text) {
19989
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
19990
+ }
19991
+ function decodeXmlEntities(text) {
19992
+ return text.replace(/&(lt|gt|amp|quot|apos|#x?[0-9a-fA-F]+);/g, (m, ent) => {
19993
+ switch (ent) {
19994
+ case "lt":
19995
+ return "<";
19996
+ case "gt":
19997
+ return ">";
19998
+ case "amp":
19999
+ return "&";
20000
+ case "quot":
20001
+ return '"';
20002
+ case "apos":
20003
+ return "'";
20004
+ }
20005
+ try {
20006
+ const code = ent[1] === "x" || ent[1] === "X" ? parseInt(ent.slice(2), 16) : parseInt(ent.slice(1), 10);
20007
+ if (!isNaN(code) && code >= 0 && code <= 1114111) return String.fromCodePoint(code);
20008
+ } catch {
20009
+ }
20010
+ return m;
20011
+ });
20012
+ }
20013
+ function tContentToText(raw) {
20014
+ return decodeXmlEntities(
20015
+ raw.replace(/<\/?(?:[A-Za-z0-9_]+:)?(?:tab|fwSpace|hwSpace|br|lineBreak)(?:\s[^>]*)?\/?>/g, " ").replace(/<[^>]*>/g, "")
20016
+ );
20017
+ }
20018
+ var TAG_RE = /<!--[\s\S]*?-->|<!\[CDATA\[[\s\S]*?\]\]>|<\?[\s\S]*?\?>|<!(?:"[^"]*"|'[^']*'|[^>"'])*>|<\/([^\s>]+)\s*>|<([^\s/>!?]+)((?:"[^"]*"|'[^']*'|[^>"'])*?)(\/?)>/g;
20019
+ var T_BARRIER = /* @__PURE__ */ new Set([
20020
+ "tbl",
20021
+ "ctrl",
20022
+ "caption",
20023
+ "pic",
20024
+ "shape",
20025
+ "drawingObject",
20026
+ "drawText",
20027
+ "shapeComment",
20028
+ "memogroup",
20029
+ "memo",
20030
+ "hiddenComment",
20031
+ "equation",
20032
+ "parameters",
20033
+ "subList",
20034
+ "p"
20035
+ ]);
20036
+ var PARA_CONTAINER = /* @__PURE__ */ new Set([
20037
+ "tc",
20038
+ "ctrl",
20039
+ "caption",
20040
+ "drawText",
20041
+ "pic",
20042
+ "shape",
20043
+ "drawingObject",
20044
+ "memogroup",
20045
+ "memo",
20046
+ "hiddenComment",
20047
+ "footNote",
20048
+ "endNote",
20049
+ "fn",
20050
+ "en"
20051
+ // 각주/미주 — 파서는 호스트 블록 footnoteText로만 흡수
20052
+ ]);
20053
+ var TABLE_BARRIER = /* @__PURE__ */ new Set([
20054
+ "tbl",
20055
+ "ctrl",
20056
+ "caption",
20057
+ "memogroup",
20058
+ "memo",
20059
+ "hiddenComment"
20060
+ ]);
20061
+ function localOf(qname) {
20062
+ const i = qname.indexOf(":");
20063
+ return i >= 0 ? qname.slice(i + 1) : qname;
20064
+ }
20065
+ function prefixOf(qname) {
20066
+ const i = qname.indexOf(":");
20067
+ return i >= 0 ? qname.slice(0, i) : "";
20068
+ }
20069
+ function scanSectionXml(xml, sectionIndex) {
20070
+ const stack = [];
20071
+ const bodyParagraphs = [];
20072
+ const tables = [];
20073
+ const headerTexts = [];
20074
+ const footerTexts = [];
20075
+ const paraStack = [];
20076
+ const tableStack = [];
20077
+ const rowStack = [];
20078
+ const cellStack = [];
20079
+ let pendingT = null;
20080
+ const ctrlSubStack = [];
20081
+ const classifyPara = () => {
20082
+ let sawDrawText = false;
20083
+ for (let i = stack.length - 1; i >= 0; i--) {
20084
+ const l = stack[i].local;
20085
+ if (l === "tc") return "cell";
20086
+ if (l === "drawText") {
20087
+ sawDrawText = true;
20088
+ continue;
20089
+ }
20090
+ if (PARA_CONTAINER.has(l)) return "excluded";
20091
+ }
20092
+ return sawDrawText ? "draw" : "body";
20093
+ };
20094
+ const owningPara = () => {
20095
+ if (paraStack.length === 0) return null;
20096
+ for (let i = stack.length - 1; i >= 0; i--) {
20097
+ const l = stack[i].local;
20098
+ if (l === "p") return paraStack[paraStack.length - 1];
20099
+ if (T_BARRIER.has(l)) return null;
20100
+ }
20101
+ return null;
20102
+ };
20103
+ const isTableTopLevel = () => {
20104
+ for (let i = stack.length - 1; i >= 0; i--) {
20105
+ if (TABLE_BARRIER.has(stack[i].local)) return false;
20106
+ }
20107
+ return true;
20108
+ };
20109
+ const currentCtrlSub = () => ctrlSubStack.length > 0 ? ctrlSubStack[ctrlSubStack.length - 1] : null;
20110
+ TAG_RE.lastIndex = 0;
20111
+ let m;
20112
+ while ((m = TAG_RE.exec(xml)) !== null) {
20113
+ const [full, closeName, openName, , selfClose] = m;
20114
+ if (closeName === void 0 && openName === void 0) continue;
20115
+ if (closeName !== void 0) {
20116
+ const local2 = localOf(closeName);
20117
+ if (local2 === "t" && pendingT) {
20118
+ const { para, contentStart: contentStart2 } = pendingT;
20119
+ para.tRanges.push({ contentStart: contentStart2, contentEnd: m.index });
20120
+ para.text += tContentToText(xml.slice(contentStart2, m.index));
20121
+ pendingT = null;
20122
+ }
20123
+ for (let i = stack.length - 1; i >= 0; i--) {
20124
+ if (stack[i].local === local2) {
20125
+ stack.length = i;
20126
+ break;
20127
+ }
20128
+ }
20129
+ if (local2 === "p") {
20130
+ const para = paraStack.pop();
20131
+ if (para && para.kind === "excluded") {
20132
+ const sub = currentCtrlSub();
20133
+ if (sub && para.text.trim()) sub.texts.push(para.text);
20134
+ }
20135
+ } else if (local2 === "tc") {
20136
+ const cell = cellStack.pop();
20137
+ const row = rowStack[rowStack.length - 1];
20138
+ if (cell && row) row.push(cell);
20139
+ } else if (local2 === "tr") {
20140
+ const row = rowStack[rowStack.length - 1];
20141
+ const table = tableStack[tableStack.length - 1];
20142
+ if (row && table && row.length > 0) table.rows.push(row);
20143
+ if (rowStack.length > 0) rowStack[rowStack.length - 1] = [];
20144
+ } else if (local2 === "tbl") {
20145
+ const table = tableStack.pop();
20146
+ rowStack.pop();
20147
+ if (table) {
20148
+ finalizeTable(table);
20149
+ if (!table.topLevel) {
20150
+ const cell = cellStack[cellStack.length - 1];
20151
+ if (cell) cell.tables.push(table);
20152
+ }
20153
+ }
20154
+ } else if (local2 === "header" || local2 === "footer") {
20155
+ const sub = ctrlSubStack[ctrlSubStack.length - 1];
20156
+ if (sub) {
20157
+ ctrlSubStack.pop();
20158
+ const joined = sub.texts.join("\n").trim();
20159
+ if (joined) (sub.kind === "header" ? headerTexts : footerTexts).push(joined);
20160
+ }
20161
+ }
20162
+ continue;
20163
+ }
20164
+ const qname = openName;
20165
+ const local = localOf(qname);
20166
+ const attrsRaw = m[3] || "";
20167
+ const isSelfClose = selfClose === "/";
20168
+ const contentStart = m.index + full.length;
20169
+ if (isSelfClose) {
20170
+ if (local === "t") {
20171
+ const para = owningPara();
20172
+ if (para) para.tRanges.push({ contentStart: m.index, contentEnd: m.index + full.length, selfClosing: true, prefix: prefixOf(qname) });
20173
+ } else if (local === "tab" || local === "fwSpace" || local === "hwSpace" || local === "br" || local === "lineBreak") {
20174
+ if (!pendingT) {
20175
+ const para = owningPara();
20176
+ if (para) para.text += " ";
20177
+ }
20178
+ } else if (local === "run" || local === "r") {
20179
+ const para = owningPara();
20180
+ if (para && !para.selfCloseRun) para.selfCloseRun = { start: m.index, end: m.index + full.length };
20181
+ } else if (local === "cellAddr") {
20182
+ const cell = cellStack[cellStack.length - 1];
20183
+ if (cell && insideCurrentTable(stack, tableStack)) {
20184
+ const ca = parseInt(getAttr2(attrsRaw, "colAddr") || "", 10);
20185
+ const ra = parseInt(getAttr2(attrsRaw, "rowAddr") || "", 10);
20186
+ if (!isNaN(ca)) cell.colAddr = ca;
20187
+ if (!isNaN(ra)) cell.rowAddr = ra;
20188
+ }
20189
+ } else if (local === "cellSpan") {
20190
+ const cell = cellStack[cellStack.length - 1];
20191
+ if (cell && insideCurrentTable(stack, tableStack)) {
20192
+ const cs = parseInt(getAttr2(attrsRaw, "colSpan") || "1", 10);
20193
+ const rs = parseInt(getAttr2(attrsRaw, "rowSpan") || "1", 10);
20194
+ cell.colSpan = isNaN(cs) || cs < 1 ? 1 : cs;
20195
+ cell.rowSpan = isNaN(rs) || rs < 1 ? 1 : rs;
20196
+ }
20197
+ }
20198
+ continue;
20199
+ }
20200
+ if (local === "t") {
20201
+ const para = owningPara();
20202
+ if (para) pendingT = { para, contentStart };
20203
+ stack.push({ local, qname, contentStart });
20204
+ continue;
20205
+ }
20206
+ stack.push({ local, qname, contentStart });
20207
+ if (local === "p") {
20208
+ const para = {
20209
+ sectionIndex,
20210
+ kind: "excluded",
20211
+ // 분류는 push 직후 스택 기준 (자기 자신 제외)
20212
+ start: m.index,
20213
+ tRanges: [],
20214
+ text: ""
20215
+ };
20216
+ stack.pop();
20217
+ para.kind = classifyPara();
20218
+ stack.push({ local, qname, contentStart });
20219
+ paraStack.push(para);
20220
+ if (para.kind === "body" || para.kind === "draw") bodyParagraphs.push(para);
20221
+ else if (para.kind === "cell") {
20222
+ const cell = cellStack[cellStack.length - 1];
20223
+ if (cell) cell.paragraphs.push(para);
20224
+ }
20225
+ } else if (local === "run" || local === "r") {
20226
+ const para = owningPara();
20227
+ if (para && para.runPrefix === void 0) para.runPrefix = prefixOf(qname);
20228
+ } else if (local === "tbl") {
20229
+ const table = {
20230
+ sectionIndex,
20231
+ start: m.index,
20232
+ topLevel: false,
20233
+ rows: [],
20234
+ cellByAnchor: /* @__PURE__ */ new Map()
20235
+ };
20236
+ stack.pop();
20237
+ table.topLevel = isTableTopLevel();
20238
+ stack.push({ local, qname, contentStart });
20239
+ tableStack.push(table);
20240
+ rowStack.push([]);
20241
+ if (table.topLevel) tables.push(table);
20242
+ } else if (local === "tr") {
20243
+ if (rowStack.length > 0) rowStack[rowStack.length - 1] = [];
20244
+ } else if (local === "tc") {
20245
+ cellStack.push({ colSpan: 1, rowSpan: 1, paragraphs: [], tables: [] });
20246
+ } else if (local === "cellAddr" || local === "cellSpan") {
20247
+ const cell = cellStack[cellStack.length - 1];
20248
+ if (cell && insideCurrentTable(stack, tableStack)) {
20249
+ if (local === "cellAddr") {
20250
+ const ca = parseInt(getAttr2(attrsRaw, "colAddr") || "", 10);
20251
+ const ra = parseInt(getAttr2(attrsRaw, "rowAddr") || "", 10);
20252
+ if (!isNaN(ca)) cell.colAddr = ca;
20253
+ if (!isNaN(ra)) cell.rowAddr = ra;
20254
+ } else {
20255
+ const cs = parseInt(getAttr2(attrsRaw, "colSpan") || "1", 10);
20256
+ const rs = parseInt(getAttr2(attrsRaw, "rowSpan") || "1", 10);
20257
+ cell.colSpan = isNaN(cs) || cs < 1 ? 1 : cs;
20258
+ cell.rowSpan = isNaN(rs) || rs < 1 ? 1 : rs;
20259
+ }
20260
+ }
20261
+ } else if (local === "header" || local === "footer") {
20262
+ if (stack.some((f) => f.local === "ctrl")) {
20263
+ ctrlSubStack.push({ kind: local, texts: [] });
20264
+ }
20265
+ } else if (local === "tab" || local === "fwSpace" || local === "hwSpace" || local === "br" || local === "lineBreak") {
20266
+ const para = owningPara();
20267
+ if (para) para.text += " ";
20268
+ }
20269
+ }
20270
+ for (const para of bodyParagraphs) fillRunInsertPos(para, xml);
20271
+ const fillTableInsertPos = (table, depth = 0) => {
20272
+ if (depth > 16) return;
20273
+ for (const row of table.rows) {
20274
+ for (const cell of row) {
20275
+ for (const para of cell.paragraphs) fillRunInsertPos(para, xml);
20276
+ for (const nested of cell.tables) fillTableInsertPos(nested, depth + 1);
20277
+ }
20278
+ }
20279
+ };
20280
+ for (const table of tables) fillTableInsertPos(table);
20281
+ return { sectionIndex, xml, bodyParagraphs, tables, headerTexts, footerTexts };
20282
+ }
20283
+ function getAttr2(attrsRaw, name) {
20284
+ const re = new RegExp(`(?:^|\\s)${name}\\s*=\\s*(?:"([^"]*)"|'([^']*)')`);
20285
+ const m = attrsRaw.match(re);
20286
+ return m ? m[1] ?? m[2] : void 0;
20287
+ }
20288
+ function insideCurrentTable(stack, tableStack) {
20289
+ if (tableStack.length === 0) return false;
20290
+ for (let i = stack.length - 1; i >= 0; i--) {
20291
+ const l = stack[i].local;
20292
+ if (l === "tc") return true;
20293
+ if (l === "tbl") return false;
20294
+ }
20295
+ return false;
20296
+ }
20297
+ function fillRunInsertPos(para, xml) {
20298
+ if (para.tRanges.length > 0) return;
20299
+ const pEnd = findElementEnd(xml, para.start);
20300
+ if (pEnd < 0) return;
20301
+ const slice = xml.slice(para.start, pEnd);
20302
+ const runOpen = slice.match(/<((?:[A-Za-z0-9_]+:)?run)(?:\s(?:"[^"]*"|'[^']*'|[^>"'])*?)?(\/?)>/);
20303
+ if (!runOpen || runOpen.index === void 0) return;
20304
+ if (runOpen[2] === "/") return;
20305
+ const qname = runOpen[1];
20306
+ const closeIdx = slice.indexOf(`</${qname}>`, runOpen.index);
20307
+ if (closeIdx < 0) return;
20308
+ para.runInsertPos = para.start + closeIdx;
20309
+ para.runPrefix = prefixOf(qname);
20310
+ }
20311
+ function findElementEnd(xml, start) {
20312
+ const open = xml.slice(start).match(/^<([^\s/>!?]+)/);
20313
+ if (!open) return -1;
20314
+ const qname = open[1];
20315
+ const re = new RegExp(`<${qname}(?=[\\s/>])(?:"[^"]*"|'[^']*'|[^>"'])*?(/?)>|</${qname}\\s*>`, "g");
20316
+ re.lastIndex = start;
20317
+ let depth = 0;
20318
+ let mm;
20319
+ while ((mm = re.exec(xml)) !== null) {
20320
+ if (mm[0].startsWith("</")) {
20321
+ depth--;
20322
+ if (depth === 0) return mm.index + mm[0].length;
20323
+ } else if (mm[1] !== "/") {
20324
+ depth++;
20325
+ }
20326
+ }
20327
+ return -1;
20328
+ }
20329
+ function finalizeTable(table) {
20330
+ const hasAddr = table.rows.some((row) => row.some((c) => c.colAddr !== void 0 && c.rowAddr !== void 0));
20331
+ if (hasAddr) {
20332
+ for (const row of table.rows) {
20333
+ for (const cell of row) {
20334
+ if (cell.rowAddr !== void 0 && cell.colAddr !== void 0) {
20335
+ table.cellByAnchor.set(`${cell.rowAddr},${cell.colAddr}`, cell);
20336
+ }
20337
+ }
20338
+ }
20339
+ return;
20340
+ }
20341
+ const numRows = table.rows.length;
20342
+ const occupied = Array.from({ length: numRows }, () => []);
20343
+ for (let rowIdx = 0; rowIdx < numRows; rowIdx++) {
20344
+ let colIdx = 0;
20345
+ for (const cell of table.rows[rowIdx]) {
20346
+ while (occupied[rowIdx][colIdx]) colIdx++;
20347
+ cell.rowAddr = rowIdx;
20348
+ cell.colAddr = colIdx;
20349
+ table.cellByAnchor.set(`${rowIdx},${colIdx}`, cell);
20350
+ for (let r = rowIdx; r < Math.min(rowIdx + cell.rowSpan, numRows); r++) {
20351
+ for (let c = colIdx; c < colIdx + cell.colSpan; c++) {
20352
+ occupied[r][c] = true;
20353
+ }
20354
+ }
20355
+ colIdx += cell.colSpan;
20356
+ }
20357
+ }
20358
+ }
20359
+ function buildParagraphSplices(para, newText, xml) {
20360
+ const escaped = escapeXmlText(newText);
20361
+ if (para.tRanges.length > 0) {
20362
+ const splices = [];
20363
+ const first = para.tRanges[0];
20364
+ if (first.selfClosing) {
20365
+ const prefix = first.prefix ? first.prefix + ":" : "";
20366
+ splices.push({ start: first.contentStart, end: first.contentEnd, replacement: `<${prefix}t>${escaped}</${prefix}t>` });
20367
+ } else {
20368
+ splices.push({ start: first.contentStart, end: first.contentEnd, replacement: escaped });
20369
+ }
20370
+ for (let i = 1; i < para.tRanges.length; i++) {
20371
+ const r = para.tRanges[i];
20372
+ if (!r.selfClosing && r.contentStart < r.contentEnd) {
20373
+ splices.push({ start: r.contentStart, end: r.contentEnd, replacement: "" });
20374
+ }
20375
+ }
20376
+ return splices;
20377
+ }
20378
+ if (para.runInsertPos !== void 0) {
20379
+ if (!newText) return [];
20380
+ const prefix = para.runPrefix ? para.runPrefix + ":" : "";
20381
+ return [{ start: para.runInsertPos, end: para.runInsertPos, replacement: `<${prefix}t>${escaped}</${prefix}t>` }];
20382
+ }
20383
+ if (para.selfCloseRun && xml) {
20384
+ if (!newText) return [];
20385
+ const { start, end } = para.selfCloseRun;
20386
+ const tag = xml.slice(start, end);
20387
+ const qm = tag.match(/^<([^\s/>]+)/);
20388
+ if (!qm || !tag.endsWith("/>")) return null;
20389
+ const qname = qm[1];
20390
+ const colon = qname.indexOf(":");
20391
+ const prefix = colon >= 0 ? qname.slice(0, colon) + ":" : "";
20392
+ const opened = tag.slice(0, tag.length - 2).trimEnd() + ">";
20393
+ return [{ start, end, replacement: `${opened}<${prefix}t>${escaped}</${prefix}t></${qname}>` }];
20394
+ }
20395
+ return newText ? null : [];
20396
+ }
20397
+ function applySplices(xml, splices) {
20398
+ const sorted = [...splices].sort((a, b) => a.start - b.start);
20399
+ for (let i = 1; i < sorted.length; i++) {
20400
+ if (sorted[i].start < sorted[i - 1].end) {
20401
+ throw new Error("\uC18C\uC2A4\uB9F5 splice \uBC94\uC704 \uACB9\uCE68 \u2014 \uB0B4\uBD80 \uC624\uB958");
20402
+ }
20403
+ }
20404
+ let result = xml;
20405
+ for (let i = sorted.length - 1; i >= 0; i--) {
20406
+ const s = sorted[i];
20407
+ result = result.slice(0, s.start) + s.replacement + result.slice(s.end);
20408
+ }
20409
+ return result;
20410
+ }
20411
+
20412
+ // src/roundtrip/zip-patch.ts
20413
+ import { deflateRawSync } from "zlib";
20414
+ var EOCD_SIG = 101010256;
20415
+ var CD_SIG = 33639248;
20416
+ var LOCAL_SIG = 67324752;
20417
+ var ZIP64_EOCD_LOC_SIG = 117853008;
20418
+ function copyBytes(buf, start, end) {
20419
+ return new Uint8Array(buf.subarray(start, end));
20420
+ }
20421
+ function parseCentralDirectory(buf) {
20422
+ const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
20423
+ const minEocd = Math.max(0, buf.length - 22 - 65535);
20424
+ let eocdOffset = -1;
20425
+ for (let i = buf.length - 22; i >= minEocd; i--) {
20426
+ if (view.getUint32(i, true) === EOCD_SIG && i + 22 + view.getUint16(i + 20, true) === buf.length) {
20427
+ eocdOffset = i;
20428
+ break;
20429
+ }
20430
+ }
20431
+ if (eocdOffset < 0) {
20432
+ for (let i = buf.length - 22; i >= minEocd; i--) {
20433
+ if (view.getUint32(i, true) !== EOCD_SIG) continue;
20434
+ if (i + 22 + view.getUint16(i + 20, true) > buf.length) continue;
20435
+ const cand = view.getUint32(i + 16, true);
20436
+ if (cand < buf.length - 4 && view.getUint32(cand, true) === CD_SIG) {
20437
+ eocdOffset = i;
20438
+ break;
20439
+ }
20440
+ }
20441
+ }
20442
+ if (eocdOffset < 0) throw new KordocError("ZIP EOCD\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
20443
+ const totalEntries = view.getUint16(eocdOffset + 10, true);
20444
+ const cdSize = view.getUint32(eocdOffset + 12, true);
20445
+ const cdOffset = view.getUint32(eocdOffset + 16, true);
20446
+ if (cdOffset === 4294967295 || totalEntries === 65535) throw new KordocError("ZIP64\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
20447
+ if (eocdOffset >= 20 && view.getUint32(eocdOffset - 20, true) === ZIP64_EOCD_LOC_SIG) {
20448
+ throw new KordocError("ZIP64\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
20449
+ }
20450
+ const decoder = new TextDecoder("utf-8");
20451
+ const entries = [];
20452
+ let pos = cdOffset;
20453
+ for (let i = 0; i < totalEntries; i++) {
20454
+ if (view.getUint32(pos, true) !== CD_SIG) throw new KordocError("ZIP Central Directory \uC190\uC0C1");
20455
+ const flags = view.getUint16(pos + 8, true);
20456
+ const method = view.getUint16(pos + 10, true);
20457
+ const crc = view.getUint32(pos + 16, true);
20458
+ const compSize = view.getUint32(pos + 20, true);
20459
+ const uncompSize = view.getUint32(pos + 24, true);
20460
+ const nameLen = view.getUint16(pos + 28, true);
20461
+ const extraLen = view.getUint16(pos + 30, true);
20462
+ const commentLen = view.getUint16(pos + 32, true);
20463
+ const localOffset = view.getUint32(pos + 42, true);
20464
+ if (compSize === 4294967295 || uncompSize === 4294967295 || localOffset === 4294967295) {
20465
+ throw new KordocError("ZIP64\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
20466
+ }
20467
+ const name = decoder.decode(buf.subarray(pos + 46, pos + 46 + nameLen));
20468
+ const cdEnd = pos + 46 + nameLen + extraLen + commentLen;
20469
+ entries.push({ cdStart: pos, cdEnd, name, flags, method, crc, compSize, uncompSize, localOffset });
20470
+ pos = cdEnd;
20471
+ }
20472
+ return { entries, cdOffset, cdSize, eocdOffset };
20473
+ }
20474
+ var CRC_TABLE = (() => {
20475
+ const table = new Uint32Array(256);
20476
+ for (let n = 0; n < 256; n++) {
20477
+ let c = n;
20478
+ for (let k = 0; k < 8; k++) c = c & 1 ? 3988292384 ^ c >>> 1 : c >>> 1;
20479
+ table[n] = c >>> 0;
20480
+ }
20481
+ return table;
20482
+ })();
20483
+ function crc32(data) {
20484
+ let crc = 4294967295;
20485
+ for (let i = 0; i < data.length; i++) {
20486
+ crc = CRC_TABLE[(crc ^ data[i]) & 255] ^ crc >>> 8;
20487
+ }
20488
+ return (crc ^ 4294967295) >>> 0;
20489
+ }
20490
+ function patchZipEntries(original, replacements) {
20491
+ const { entries, cdOffset, eocdOffset } = parseCentralDirectory(original);
20492
+ const view = new DataView(original.buffer, original.byteOffset, original.byteLength);
20493
+ for (const name of replacements.keys()) {
20494
+ if (!entries.some((e) => e.name === name)) throw new KordocError(`ZIP\uC5D0 \uC5C6\uB294 \uC5D4\uD2B8\uB9AC: ${name}`);
20495
+ }
20496
+ const byLocal = [...entries].sort((a, b) => a.localOffset - b.localOffset);
20497
+ const segments = [];
20498
+ const newLocalOffset = /* @__PURE__ */ new Map();
20499
+ const newMeta = /* @__PURE__ */ new Map();
20500
+ let offset = 0;
20501
+ for (let i = 0; i < byLocal.length; i++) {
20502
+ const e = byLocal[i];
20503
+ const segEnd = i + 1 < byLocal.length ? byLocal[i + 1].localOffset : cdOffset;
20504
+ newLocalOffset.set(e, offset);
20505
+ const newData = replacements.get(e.name);
20506
+ if (newData === void 0) {
20507
+ const seg = original.subarray(e.localOffset, segEnd);
20508
+ segments.push(seg);
20509
+ offset += seg.length;
20510
+ continue;
20511
+ }
20512
+ if (view.getUint32(e.localOffset, true) !== LOCAL_SIG) throw new KordocError("ZIP \uB85C\uCEEC \uD5E4\uB354 \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
20513
+ const nameLen = view.getUint16(e.localOffset + 26, true);
20514
+ const extraLen = view.getUint16(e.localOffset + 28, true);
20515
+ const headerLen = 30 + nameLen + extraLen;
20516
+ const header = copyBytes(original, e.localOffset, e.localOffset + headerLen);
20517
+ const hview = new DataView(header.buffer, header.byteOffset, header.byteLength);
20518
+ const method = e.method;
20519
+ const compData = method === 0 ? newData : new Uint8Array(deflateRawSync(newData));
20520
+ const crc = crc32(newData);
20521
+ const flags = e.flags & ~8;
20522
+ hview.setUint16(6, flags, true);
20523
+ hview.setUint32(14, crc, true);
20524
+ hview.setUint32(18, compData.length, true);
20525
+ hview.setUint32(22, newData.length, true);
20526
+ segments.push(header, compData);
20527
+ offset += headerLen + compData.length;
20528
+ newMeta.set(e, { crc, compSize: compData.length, uncompSize: newData.length, flags });
20529
+ }
20530
+ const newCdOffset = offset;
20531
+ for (const e of entries) {
20532
+ const cd = copyBytes(original, e.cdStart, e.cdEnd);
20533
+ const cview = new DataView(cd.buffer, cd.byteOffset, cd.byteLength);
20534
+ cview.setUint32(42, newLocalOffset.get(e), true);
20535
+ const meta = newMeta.get(e);
20536
+ if (meta) {
20537
+ cview.setUint16(8, meta.flags, true);
20538
+ cview.setUint32(16, meta.crc, true);
20539
+ cview.setUint32(20, meta.compSize, true);
20540
+ cview.setUint32(24, meta.uncompSize, true);
20541
+ }
20542
+ segments.push(cd);
20543
+ offset += cd.length;
20544
+ }
20545
+ const newCdSize = offset - newCdOffset;
20546
+ const eocd = copyBytes(original, eocdOffset);
20547
+ const eview = new DataView(eocd.buffer, eocd.byteOffset, eocd.byteLength);
20548
+ eview.setUint32(12, newCdSize, true);
20549
+ eview.setUint32(16, newCdOffset, true);
20550
+ segments.push(eocd);
20551
+ offset += eocd.length;
20552
+ const result = new Uint8Array(offset);
20553
+ let pos = 0;
20554
+ for (const seg of segments) {
20555
+ result.set(seg, pos);
20556
+ pos += seg.length;
20557
+ }
20558
+ return result;
20559
+ }
20560
+
20561
+ // src/roundtrip/markdown-units.ts
20562
+ function splitMarkdownUnits(md2) {
20563
+ const lines = md2.split("\n");
20564
+ const units = [];
20565
+ let i = 0;
20566
+ while (i < lines.length) {
20567
+ const line = lines[i];
20568
+ if (!line.trim()) {
20569
+ i++;
20570
+ continue;
20571
+ }
20572
+ if (line.trim().startsWith("<table>")) {
20573
+ const collected2 = [];
20574
+ let depth = 0;
20575
+ while (i < lines.length) {
20576
+ const l = lines[i];
20577
+ collected2.push(l);
20578
+ depth += (l.match(/<table>/g) || []).length;
20579
+ depth -= (l.match(/<\/table>/g) || []).length;
20580
+ i++;
20581
+ if (depth <= 0) break;
20582
+ }
20583
+ units.push({ kind: "html-table", raw: collected2.join("\n"), lines: collected2 });
20584
+ continue;
20585
+ }
20586
+ if (line.trimStart().startsWith("|")) {
20587
+ const collected2 = [];
20588
+ while (i < lines.length && lines[i].trimStart().startsWith("|")) {
20589
+ collected2.push(lines[i]);
20590
+ i++;
20591
+ }
20592
+ units.push({ kind: "gfm-table", raw: collected2.join("\n"), lines: collected2 });
20593
+ continue;
20594
+ }
20595
+ if (/^-{3,}\s*$/.test(line.trim())) {
20596
+ units.push({ kind: "separator", raw: line.trim(), lines: [line.trim()] });
20597
+ i++;
20598
+ continue;
20599
+ }
20600
+ if (/^!\[image\]\([^)]*\)\s*$/.test(line.trim())) {
20601
+ units.push({ kind: "image", raw: line.trim(), lines: [line.trim()] });
20602
+ i++;
20603
+ continue;
20604
+ }
20605
+ const collected = [];
20606
+ while (i < lines.length && lines[i].trim() && !lines[i].trimStart().startsWith("|") && !lines[i].trim().startsWith("<table>")) {
20607
+ collected.push(lines[i].trim());
20608
+ i++;
20609
+ }
20610
+ units.push({ kind: "text", raw: collected.join("\n"), lines: collected });
20611
+ }
20612
+ return units;
20613
+ }
20614
+ function escapeGfm(text) {
20615
+ return text.replace(/~/g, "\\~");
20616
+ }
20617
+ var HWP_SHAPE_ALT_TEXT_RE = /(?:모서리가 둥근 |둥근 )?(?:사각형|직사각형|정사각형|원|타원|삼각형|이등변 삼각형|직각 삼각형|선|직선|곡선|화살표|굵은 화살표|이중 화살표|오각형|육각형|팔각형|별|[4-8]점별|십자|십자형|구름|구름형|마름모|도넛|평행사변형|사다리꼴|부채꼴|호|반원|물결|번개|하트|빗금|블록 화살표|수식|표|그림|개체|그리기\s?개체|묶음\s?개체|글상자|수식\s?개체|OLE\s?개체)\s?입니다\.?/g;
20618
+ function sanitizeText(text) {
20619
+ let result = mapPuaText(text).replace(/[\u{F0000}-\u{FFFFD}]/gu, "").replace(HWP_SHAPE_ALT_TEXT_RE, "").replace(/ +/g, " ").trim();
20620
+ if (result.length <= 30 && result.includes(" ")) {
20621
+ const tokens = result.split(" ");
20622
+ const koreanSingleCharCount = tokens.filter((t) => t.length === 1 && /[가-힯ㄱ-ㆎ]/.test(t)).length;
20623
+ if (tokens.length >= 3 && koreanSingleCharCount / tokens.length >= 0.7) {
20624
+ result = tokens.join("");
20625
+ }
20626
+ }
20627
+ return result;
20628
+ }
20629
+ function normForMatch(text) {
20630
+ return sanitizeText(text).replace(/\s+/g, " ").trim();
20631
+ }
20632
+ function unescapeGfm(text) {
20633
+ return text.replace(/\\~/g, "~");
20634
+ }
20635
+ function summarize(text) {
20636
+ const t = text.replace(/\s+/g, " ").trim();
20637
+ return t.length > 80 ? t.slice(0, 77) + "..." : t;
20638
+ }
20639
+ function replicateGfmTable(table) {
20640
+ const { cells, rows: numRows, cols: numCols } = table;
20641
+ if (numRows === 0 || numCols === 0) return null;
20642
+ if (numRows === 1 && numCols === 1) return null;
20643
+ if (numCols === 1) return null;
20644
+ const display = Array.from({ length: numRows }, (_, r) => Array.from({ length: numCols }, (_2, c) => ({ text: "", gridR: r, gridC: c })));
20645
+ const skip = /* @__PURE__ */ new Set();
20646
+ for (let r = 0; r < numRows; r++) {
20647
+ for (let c = 0; c < numCols; c++) {
20648
+ if (skip.has(`${r},${c}`)) continue;
20649
+ const cell = cells[r]?.[c];
20650
+ if (!cell) continue;
20651
+ display[r][c] = {
20652
+ text: escapeGfm(sanitizeText(cell.text)).replace(/\|/g, "\\|").replace(/\n/g, "<br>"),
20653
+ gridR: r,
20654
+ gridC: c
20655
+ };
20656
+ for (let dr = 0; dr < cell.rowSpan; dr++) {
20657
+ for (let dc = 0; dc < cell.colSpan; dc++) {
20658
+ if (dr === 0 && dc === 0) continue;
20659
+ if (r + dr < numRows && c + dc < numCols) skip.add(`${r + dr},${c + dc}`);
20660
+ }
20661
+ }
20662
+ c += cell.colSpan - 1;
20663
+ }
20664
+ }
20665
+ const uniqueRows = [];
20666
+ let pendingLabelRow = null;
20667
+ for (let r = 0; r < display.length; r++) {
20668
+ const row = display[r];
20669
+ if (row.every((cell) => cell.text === "")) continue;
20670
+ const nonEmptyCols = row.filter((cell) => cell.text !== "");
20671
+ const hasSkipInRow = row.some((_, c) => skip.has(`${r},${c}`));
20672
+ if (!hasSkipInRow && nonEmptyCols.length === 1 && row[0].text !== "" && row.slice(1).every((c) => c.text === "")) {
20673
+ if (pendingLabelRow) uniqueRows.push(pendingLabelRow);
20674
+ pendingLabelRow = row;
20675
+ continue;
20676
+ }
20677
+ if (pendingLabelRow) {
20678
+ if (row[0].text === "") row[0] = pendingLabelRow[0];
20679
+ else uniqueRows.push(pendingLabelRow);
20680
+ pendingLabelRow = null;
20681
+ }
20682
+ uniqueRows.push(row);
20683
+ }
20684
+ if (pendingLabelRow) uniqueRows.push(pendingLabelRow);
20685
+ return uniqueRows.length > 0 ? uniqueRows : null;
20686
+ }
20687
+ function parseGfmTable(lines) {
20688
+ const rows = [];
20689
+ for (const line of lines) {
20690
+ const trimmed = line.trim();
20691
+ if (!trimmed.startsWith("|")) continue;
20692
+ const cells = trimmed.split(/(?<!\\)\|/).slice(1, -1).map((c) => c.trim());
20693
+ if (cells.length === 0) continue;
20694
+ if (cells.every((c) => /^:?-{3,}:?$/.test(c))) continue;
20695
+ rows.push(cells);
20696
+ }
20697
+ return rows;
20698
+ }
20699
+ function unescapeGfmCell(text) {
20700
+ return text.replace(/<br\s*\/?>/gi, "\n").replace(/\\\|/g, "|").replace(/\\~/g, "~");
20701
+ }
20702
+ function replicateCellInnerHtml(cell) {
20703
+ if (cell.blocks?.length) {
20704
+ return cell.blocks.map((b) => {
20705
+ if (b.type === "table" && b.table) {
20706
+ const cap = b.table.caption ? sanitizeText(b.table.caption) : "";
20707
+ return (cap ? cap + "<br>" : "") + replicateTableToHtml(b.table);
20708
+ }
20709
+ if (b.type === "image" && b.text) return `<img src="${b.text}" alt="image">`;
20710
+ const t = sanitizeText(b.text ?? "");
20711
+ return t ? t.replace(/\n/g, "<br>") : "";
20712
+ }).filter(Boolean).join("<br>");
20713
+ }
20714
+ return sanitizeText(cell.text).replace(/\n/g, "<br>");
20715
+ }
20716
+ function replicateTableToHtml(table) {
20717
+ const rows = replicateHtmlTable(table);
20718
+ const lines = ["<table>"];
20719
+ for (let r = 0; r < rows.length; r++) {
20720
+ const tag = rows[r].tag;
20721
+ const rowHtml = rows[r].cells.map((cell) => {
20722
+ const attrs = [];
20723
+ if (cell.colSpan > 1) attrs.push(`colspan="${cell.colSpan}"`);
20724
+ if (cell.rowSpan > 1) attrs.push(`rowspan="${cell.rowSpan}"`);
20725
+ const attrStr = attrs.length ? " " + attrs.join(" ") : "";
20726
+ return `<${tag}${attrStr}>${cell.inner}</${tag}>`;
20727
+ });
20728
+ if (rowHtml.length) lines.push(`<tr>${rowHtml.join("")}</tr>`);
20729
+ }
20730
+ lines.push("</table>");
20731
+ return lines.join("\n");
20732
+ }
20733
+ function replicateHtmlTable(table) {
20734
+ const { cells, rows: numRows, cols: numCols } = table;
20735
+ const skip = /* @__PURE__ */ new Set();
20736
+ const result = [];
20737
+ for (let r = 0; r < numRows; r++) {
20738
+ const tag = r === 0 ? "th" : "td";
20739
+ const rowCells = [];
20740
+ for (let c = 0; c < numCols; c++) {
20741
+ if (skip.has(`${r},${c}`)) continue;
20742
+ const cell = cells[r]?.[c];
20743
+ if (!cell) continue;
20744
+ for (let dr = 0; dr < cell.rowSpan; dr++) {
20745
+ for (let dc = 0; dc < cell.colSpan; dc++) {
20746
+ if (dr === 0 && dc === 0) continue;
20747
+ if (r + dr < numRows && c + dc < numCols) skip.add(`${r + dr},${c + dc}`);
20748
+ }
20749
+ }
20750
+ rowCells.push({
20751
+ inner: replicateCellInnerHtml(cell),
20752
+ colSpan: cell.colSpan,
20753
+ rowSpan: cell.rowSpan,
20754
+ gridR: r,
20755
+ gridC: c
20756
+ });
20757
+ }
20758
+ if (rowCells.length) result.push({ tag, cells: rowCells });
20759
+ }
20760
+ return result;
20761
+ }
20762
+ function parseHtmlTable(raw) {
20763
+ const re = /<(\/?)(table|tr|td|th)((?:"[^"]*"|'[^']*'|[^>"'])*?)>/gi;
20764
+ let depth = 0;
20765
+ let currentRow = null;
20766
+ let cellStart = -1;
20767
+ let cellInfo = null;
20768
+ const rows = [];
20769
+ let m;
20770
+ while ((m = re.exec(raw)) !== null) {
20771
+ const isClose = m[1] === "/";
20772
+ const tag = m[2].toLowerCase();
20773
+ const attrs = m[3] || "";
20774
+ if (tag === "table") {
20775
+ depth += isClose ? -1 : 1;
20776
+ if (depth < 0) return null;
20777
+ continue;
20778
+ }
20779
+ if (depth !== 1) continue;
20780
+ if (tag === "tr") {
20781
+ if (!isClose) currentRow = [];
20782
+ else if (currentRow) {
20783
+ rows.push({ tag: rows.length === 0 ? "th" : "td", cells: currentRow });
20784
+ currentRow = null;
20785
+ }
20786
+ } else {
20787
+ if (!isClose) {
20788
+ const cs = parseInt(attrs.match(/colspan\s*=\s*"(\d+)"/i)?.[1] || "1", 10);
20789
+ const rs = parseInt(attrs.match(/rowspan\s*=\s*"(\d+)"/i)?.[1] || "1", 10);
20790
+ cellStart = m.index + m[0].length;
20791
+ cellInfo = { colSpan: isNaN(cs) ? 1 : cs, rowSpan: isNaN(rs) ? 1 : rs };
20792
+ } else if (cellStart >= 0 && cellInfo && currentRow) {
20793
+ currentRow.push({ inner: raw.slice(cellStart, m.index), colSpan: cellInfo.colSpan, rowSpan: cellInfo.rowSpan });
20794
+ cellStart = -1;
20795
+ cellInfo = null;
20796
+ }
20797
+ }
20798
+ }
20799
+ if (depth !== 0) return null;
20800
+ return rows;
20801
+ }
20802
+ function htmlCellInnerToLines(inner) {
20803
+ let hadNonText = false;
20804
+ let work = inner;
20805
+ if (/<table[\s>]/i.test(work)) {
20806
+ hadNonText = true;
20807
+ work = removeNestedTables(work);
20808
+ }
20809
+ if (/<img\s/i.test(work)) {
20810
+ hadNonText = true;
20811
+ work = work.replace(/<img\s(?:"[^"]*"|'[^']*'|[^>"'])*?>/gi, "");
20812
+ }
20813
+ const lines = work.split(/<br\s*\/?>/gi).map((s) => s.trim()).filter((s) => s.length > 0);
20814
+ return { lines, hadNonText };
20815
+ }
20816
+ function extractTopLevelTables(html) {
20817
+ const result = [];
20818
+ let depth = 0;
20819
+ let start = -1;
20820
+ const re = /<(\/?)table(?:[\s>]|>)/gi;
20821
+ let m;
20822
+ while ((m = re.exec(html)) !== null) {
20823
+ if (m[1] !== "/") {
20824
+ if (depth === 0) start = m.index;
20825
+ depth++;
20826
+ } else {
20827
+ depth--;
20828
+ if (depth === 0 && start >= 0) {
20829
+ result.push(html.slice(start, m.index + m[0].length));
20830
+ start = -1;
20831
+ }
20832
+ if (depth < 0) depth = 0;
20833
+ }
20834
+ }
20835
+ return result;
20836
+ }
20837
+ function removeNestedTables(html) {
20838
+ let result = "";
20839
+ let depth = 0;
20840
+ const re = /<(\/?)table(?:[\s>]|>)/gi;
20841
+ let last = 0;
20842
+ let m;
20843
+ while ((m = re.exec(html)) !== null) {
20844
+ if (m[1] !== "/") {
20845
+ if (depth === 0) result += html.slice(last, m.index);
20846
+ depth++;
20847
+ } else {
20848
+ depth--;
20849
+ if (depth === 0) last = m.index + m[0].length;
20850
+ if (depth < 0) depth = 0;
20851
+ }
20852
+ }
20853
+ if (depth === 0) result += html.slice(last);
20854
+ return result;
20855
+ }
20856
+
20857
+ // src/roundtrip/table-patch.ts
20858
+ function patchGfmTable(table, scanTable, orig, edited, ctx, skip) {
20859
+ const replica = replicateGfmTable(table);
20860
+ if (!replica) return skip("\uD45C \uB80C\uB354 \uACBD\uB85C \uC2DD\uBCC4 \uC2E4\uD328");
20861
+ const origRows = parseGfmTable(orig.lines);
20862
+ const editedRows = parseGfmTable(edited.lines);
20863
+ if (replica.length !== origRows.length || replica.some((row, r) => row.length !== origRows[r].length || row.some((c, j) => c.text !== origRows[r][j]))) {
20864
+ return skip("\uD45C \uC88C\uD45C \uC7AC\uD604 \uBD88\uC77C\uCE58 \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
20865
+ }
20866
+ if (editedRows.length !== origRows.length) return skip("\uD45C \uD589 \uCD94\uAC00/\uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0 (\uD45C \uAD6C\uC870 \uBCC0\uACBD)");
20867
+ let applied = 0;
20868
+ for (let r = 0; r < origRows.length; r++) {
20869
+ if (editedRows[r].length !== origRows[r].length) {
20870
+ skip(`\uD45C ${r + 1}\uD589 \uC5F4 \uC218 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0`);
20871
+ continue;
20872
+ }
20873
+ for (let c = 0; c < origRows[r].length; c++) {
20874
+ if (origRows[r][c] === editedRows[r][c]) continue;
20875
+ const { gridR, gridC } = replica[r][c];
20876
+ const origTokens = extractCellTokens(origRows[r][c]);
20877
+ const editedTokens = extractCellTokens(editedRows[r][c]);
20878
+ if (origTokens !== editedTokens) {
20879
+ skip("\uC140 \uB0B4 \uC774\uBBF8\uC9C0 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
20880
+ continue;
20881
+ }
20882
+ const newLines = unescapeGfmCell(stripCellTokens(editedRows[r][c])).split("\n").map((s) => s.trim()).filter(Boolean);
20883
+ const origLines = unescapeGfmCell(stripCellTokens(origRows[r][c])).split("\n").map((s) => s.trim()).filter(Boolean);
20884
+ const n = applyCellEdit(table, scanTable, gridR, gridC, newLines, ctx, origRows[r][c], editedRows[r][c], origLines.length);
20885
+ if (n > 0 && origTokens) {
20886
+ ctx.skipped.push({
20887
+ reason: "\uC140 \uB0B4 \uC774\uBBF8\uC9C0\xB7\uD14D\uC2A4\uD2B8 \uD63C\uC7AC \u2014 \uD14D\uC2A4\uD2B8\uB9CC \uC801\uC6A9 (\uC774\uBBF8\uC9C0 \uC778\uC811 \uBC30\uCE58\uB294 <br> \uBD84\uB9AC\uB85C \uC7AC\uD604\uB428)",
20888
+ before: summarize(origRows[r][c]),
20889
+ after: summarize(editedRows[r][c])
20890
+ });
20891
+ }
20892
+ applied += n;
20893
+ }
20894
+ }
20895
+ return applied;
20896
+ }
20897
+ function patchHtmlTable(table, scanTable, orig, edited, ctx, skip) {
20898
+ return patchHtmlTableRaw(table, scanTable, orig.raw, edited.raw, ctx, skip, 0);
20899
+ }
20900
+ function patchHtmlTableRaw(table, scanTable, origRaw, editedRaw, ctx, skip, depth) {
20901
+ if (depth > 8) return skip("\uC911\uCCA9\uD45C \uAE4A\uC774 \uCD08\uACFC");
20902
+ if (replicateTableToHtml(table) !== origRaw) return skip("\uD45C \uC88C\uD45C \uC7AC\uD604 \uBD88\uC77C\uCE58 \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
20903
+ const replica = replicateHtmlTable(table);
20904
+ const origRows = parseHtmlTable(origRaw);
20905
+ if (!origRows || origRows.length !== replica.length || origRows.some((r, i) => r.cells.length !== replica[i].cells.length || r.cells.some((c, j) => c.inner !== replica[i].cells[j].inner))) {
20906
+ return skip("\uC140 \uACBD\uACC4 \uBAA8\uD638 (\uB9AC\uD130\uB7F4 \uD0DC\uADF8 \uC758\uC2EC) \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
20907
+ }
20908
+ const editedRows = parseHtmlTable(editedRaw);
20909
+ if (!editedRows) return skip("\uD3B8\uC9D1\uB41C HTML \uD45C \uD30C\uC2F1 \uC2E4\uD328");
20910
+ if (editedRows.length !== replica.length) return skip("\uD45C \uD589 \uCD94\uAC00/\uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0 (\uD45C \uAD6C\uC870 \uBCC0\uACBD)");
20911
+ let applied = 0;
20912
+ for (let r = 0; r < replica.length; r++) {
20913
+ if (editedRows[r].cells.length !== replica[r].cells.length) {
20914
+ skip(`\uD45C ${r + 1}\uD589 \uC140 \uC218 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0`);
20915
+ continue;
20916
+ }
20917
+ for (let c = 0; c < replica[r].cells.length; c++) {
20918
+ const oc = replica[r].cells[c];
20919
+ const ec = editedRows[r].cells[c];
20920
+ if (oc.colSpan !== ec.colSpan || oc.rowSpan !== ec.rowSpan) {
20921
+ skip(`\uC140 \uBCD1\uD569(colspan/rowspan) \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0`);
20922
+ continue;
20923
+ }
20924
+ if (oc.inner === ec.inner) continue;
20925
+ const origContent = htmlCellInnerToLines(oc.inner);
20926
+ const editedContent = htmlCellInnerToLines(ec.inner);
20927
+ if (origContent.hadNonText || editedContent.hadNonText) {
20928
+ if (extractImgTags(oc.inner) !== extractImgTags(ec.inner)) {
20929
+ skip("\uC140 \uB0B4 \uC774\uBBF8\uC9C0 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
20930
+ continue;
20931
+ }
20932
+ const origTables = extractTopLevelTables(oc.inner);
20933
+ const editedTables = extractTopLevelTables(ec.inner);
20934
+ if (origTables.length !== editedTables.length) {
20935
+ skip("\uC140 \uB0B4 \uC911\uCCA9\uD45C \uCD94\uAC00/\uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0");
20936
+ continue;
20937
+ }
20938
+ if (origTables.join("\n") !== editedTables.join("\n")) {
20939
+ applied += patchNestedTables(table, scanTable, oc, origTables, editedTables, ctx, skip, depth);
20940
+ }
20941
+ }
20942
+ if (origContent.lines.join("\n") !== editedContent.lines.join("\n")) {
20943
+ const newLines = editedContent.lines.map((l) => unescapeGfm(l));
20944
+ applied += applyCellEdit(table, scanTable, oc.gridR, oc.gridC, newLines, ctx, oc.inner, ec.inner, origContent.lines.length);
20945
+ }
20946
+ }
20947
+ }
20948
+ return applied;
20949
+ }
20950
+ function patchNestedTables(table, scanTable, oc, origTables, editedTables, ctx, skip, depth) {
20951
+ const irCell = table.cells[oc.gridR]?.[oc.gridC];
20952
+ const scanCell = scanTable.cellByAnchor.get(`${oc.gridR},${oc.gridC}`);
20953
+ const nestedIRs = (irCell?.blocks ?? []).filter((b) => b.type === "table" && b.table).map((b) => b.table);
20954
+ if (!scanCell || nestedIRs.length !== origTables.length || scanCell.tables.length !== origTables.length) {
20955
+ return skip("\uC911\uCCA9\uD45C \uC18C\uC2A4\uB9F5 \uB9E4\uD551 \uC2E4\uD328");
20956
+ }
20957
+ let applied = 0;
20958
+ for (let k = 0; k < origTables.length; k++) {
20959
+ if (origTables[k] === editedTables[k]) continue;
20960
+ applied += patchHtmlTableRaw(nestedIRs[k], scanCell.tables[k], origTables[k], editedTables[k], ctx, skip, depth + 1);
20961
+ }
20962
+ return applied;
20963
+ }
20964
+ function extractImgTags(inner) {
20965
+ return (inner.match(/<img\s(?:"[^"]*"|'[^']*'|[^>"'])*?>/gi) || []).join(" ");
20966
+ }
20967
+ var CELL_TOKEN_RE = /!\[image\]\([^)]*\)|\[이미지: [^\]]*\]/g;
20968
+ function extractCellTokens(text) {
20969
+ return (text.match(CELL_TOKEN_RE) || []).join(" ");
20970
+ }
20971
+ function stripCellTokens(text) {
20972
+ return text.replace(CELL_TOKEN_RE, "");
20973
+ }
20974
+ function patchTextChunkTable(table, scanTable, orig, edited, ctx, skip) {
20975
+ if (table.rows === 1 && table.cols === 1) {
20976
+ const content = sanitizeText(table.cells[0][0].text);
20977
+ const replicaLines = content.split(/\n/).map((line) => {
20978
+ const t = line.trim();
20979
+ if (!t) return "";
20980
+ if (/^\d+\.\s/.test(t)) return `**${escapeGfm(t)}**`;
20981
+ return escapeGfm(t);
20982
+ }).filter(Boolean);
20983
+ if (replicaLines.join("\n") !== orig.lines.join("\n")) return skip("\uD45C \uC88C\uD45C \uC7AC\uD604 \uBD88\uC77C\uCE58 \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
20984
+ if (extractCellTokens(orig.raw) !== extractCellTokens(edited.raw)) return skip("\uC140 \uB0B4 \uC774\uBBF8\uC9C0 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
20985
+ const newLines = edited.lines.map((l) => {
20986
+ const m = l.match(/^\*\*([\s\S]*)\*\*$/);
20987
+ const unwrap = m && /^\d+\.\s/.test(unescapeGfm(m[1]));
20988
+ return stripCellTokens(unescapeGfm(unwrap ? m[1] : l)).trim();
20989
+ }).filter(Boolean);
20990
+ return applyCellEdit(table, scanTable, 0, 0, newLines, ctx, orig.raw, edited.raw, orig.lines.length);
20991
+ }
20992
+ if (table.cols === 1 && table.rows >= 2) {
20993
+ const replica = [];
20994
+ for (let r = 0; r < table.rows; r++) {
20995
+ const line = escapeGfm(sanitizeText(table.cells[r][0].text)).replace(/\n/g, " ");
20996
+ if (line) replica.push({ line, gridR: r });
20997
+ }
20998
+ if (replica.map((x) => x.line).join("\n") !== orig.lines.join("\n")) return skip("\uD45C \uC88C\uD45C \uC7AC\uD604 \uBD88\uC77C\uCE58 \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
20999
+ if (edited.lines.length !== replica.length) return skip("\uD45C \uD589 \uCD94\uAC00/\uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0 (\uD45C \uAD6C\uC870 \uBCC0\uACBD)");
21000
+ let applied = 0;
21001
+ for (let i = 0; i < replica.length; i++) {
21002
+ if (replica[i].line === edited.lines[i]) continue;
21003
+ if (extractCellTokens(replica[i].line) !== extractCellTokens(edited.lines[i])) {
21004
+ skip("\uC140 \uB0B4 \uC774\uBBF8\uC9C0 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
21005
+ continue;
21006
+ }
21007
+ const newLines = [stripCellTokens(unescapeGfm(edited.lines[i])).trim()].filter(Boolean);
21008
+ applied += applyCellEdit(table, scanTable, replica[i].gridR, 0, newLines, ctx, replica[i].line, edited.lines[i], 1);
21009
+ }
21010
+ return applied;
21011
+ }
21012
+ return skip("\uD45C \uB80C\uB354 \uACBD\uB85C \uC2DD\uBCC4 \uC2E4\uD328");
21013
+ }
21014
+ function applyCellEdit(table, scanTable, gridR, gridC, newLines, ctx, before, after, origLineCount) {
21015
+ const skip = (reason) => {
21016
+ ctx.skipped.push({ reason, before: summarize(before), after: summarize(after) });
21017
+ return 0;
21018
+ };
21019
+ const cell = scanTable.cellByAnchor.get(`${gridR},${gridC}`);
21020
+ if (!cell) return skip("\uC140 \uC88C\uD45C \uB9E4\uD551 \uC2E4\uD328 (\uBCD1\uD569 \uC601\uC5ED\uC758 \uBE48 \uCE78\uC774\uAC70\uB098 \uC88C\uD45C \uBD88\uC77C\uCE58)");
21021
+ const irCell = table.cells[gridR]?.[gridC];
21022
+ const scanJoined = cell.paragraphs.map((p) => p.text).filter((t) => normForMatch(t)).join("\n");
21023
+ if (irCell && normForMatch(scanJoined) !== normForMatch(stripCellTokens(irCell.text))) {
21024
+ if (normForMatch(irCell.text) !== "" || normForMatch(scanJoined) !== "") {
21025
+ const flatBlocks = (irCell.blocks ?? []).filter((b) => b.type === "paragraph" || b.type === "heading");
21026
+ const flatJoined = flatBlocks.map((b) => b.text ?? "").join("\n");
21027
+ if (normForMatch(scanJoined) !== normForMatch(flatJoined)) {
21028
+ return skip("\uC140 \uCF58\uD150\uCE20 \uAD6C\uC870 \uBCF5\uC7A1 (\uC911\uCCA9\uD45C/\uAE00\uC0C1\uC790) \u2014 \uB9E4\uD551 \uC2E0\uB8B0 \uBD88\uAC00");
21029
+ }
21030
+ }
21031
+ }
21032
+ const nonEmpty = cell.paragraphs.filter((p) => normForMatch(p.text) !== "");
21033
+ if (origLineCount !== void 0 && nonEmpty.length > 0 && origLineCount !== nonEmpty.length) {
21034
+ return skip("\uC140 \uC904 \uACBD\uACC4 \uB9E4\uD551 \uBAA8\uD638 (\uB9AC\uD130\uB7F4 <br>/\uBB38\uB2E8 \uB0B4 \uC904\uBC14\uAFC8) \u2014 \uBBF8\uC9C0\uC6D0");
21035
+ }
21036
+ const splices = [];
21037
+ let sectionIndex = -1;
21038
+ const unstable = newLines.find((l) => sanitizeText(l) !== l);
21039
+ if (unstable !== void 0) return skip("\uACF5\uBC31 \uC815\uADDC\uD654 \uBD88\uC548\uC815 \uD14D\uC2A4\uD2B8 \u2014 \uD328\uCE58 \uC2DC \uC6D0\uBB38 \uBCF4\uC874 \uBD88\uAC00\uB85C \uBBF8\uC9C0\uC6D0");
21040
+ if (nonEmpty.length === 0) {
21041
+ if (newLines.length === 0) return 0;
21042
+ const target = cell.paragraphs[0];
21043
+ if (!target) return skip("\uBE48 \uC140\uC5D0 \uBB38\uB2E8\uC774 \uC5C6\uC5B4 \uD14D\uC2A4\uD2B8 \uC0BD\uC785 \uBD88\uAC00");
21044
+ const sp = buildParagraphSplices(target, newLines.join(" "), ctx.scans[target.sectionIndex]?.xml);
21045
+ if (sp === null) return skip("\uC140 \uBB38\uB2E8\uC5D0 \uD14D\uC2A4\uD2B8 \uB178\uB4DC\uB97C \uB9CC\uB4E4 \uC218 \uC5C6\uC74C");
21046
+ splices.push(...sp);
21047
+ sectionIndex = target.sectionIndex;
21048
+ if (newLines.length > 1) {
21049
+ ctx.skipped.push({ reason: "\uC140 \uB0B4 \uC904 \uCD94\uAC00\uB294 \uBB38\uB2E8 \uC0DD\uC131 \uBBF8\uC9C0\uC6D0 \u2014 \uD55C \uBB38\uB2E8\uC73C\uB85C \uBCD1\uD569 \uC801\uC6A9", after: summarize(after) });
21050
+ }
21051
+ } else {
21052
+ const assigned = [];
21053
+ for (let i = 0; i < nonEmpty.length; i++) {
21054
+ if (i < newLines.length) {
21055
+ assigned.push(i === nonEmpty.length - 1 && newLines.length > nonEmpty.length ? newLines.slice(i).join(" ") : newLines[i]);
21056
+ } else {
21057
+ assigned.push("");
21058
+ }
21059
+ }
21060
+ if (newLines.length > nonEmpty.length) {
21061
+ ctx.skipped.push({ reason: "\uC140 \uB0B4 \uC904 \uCD94\uAC00\uB294 \uBB38\uB2E8 \uC0DD\uC131 \uBBF8\uC9C0\uC6D0 \u2014 \uB9C8\uC9C0\uB9C9 \uBB38\uB2E8\uC5D0 \uBCD1\uD569 \uC801\uC6A9", after: summarize(after) });
21062
+ }
21063
+ for (let i = 0; i < nonEmpty.length; i++) {
21064
+ if (assigned[i] === nonEmpty[i].text || normForMatch(assigned[i]) === normForMatch(nonEmpty[i].text)) continue;
21065
+ const sp = buildParagraphSplices(nonEmpty[i], assigned[i], ctx.scans[nonEmpty[i].sectionIndex]?.xml);
21066
+ if (sp === null) return skip("\uC140 \uBB38\uB2E8\uC5D0 \uD14D\uC2A4\uD2B8 \uB178\uB4DC\uB97C \uB9CC\uB4E4 \uC218 \uC5C6\uC74C");
21067
+ splices.push(...sp);
21068
+ sectionIndex = nonEmpty[i].sectionIndex;
21069
+ }
21070
+ }
21071
+ if (splices.length === 0) return 0;
21072
+ ctx.sectionSplices[sectionIndex].push(...splices);
21073
+ return 1;
21074
+ }
21075
+
21076
+ // src/roundtrip/patcher.ts
21077
+ async function patchHwpx(original, editedMarkdown, options) {
21078
+ const skipped = [];
21079
+ let applied = 0;
21080
+ let origBlocks;
21081
+ try {
21082
+ const parsed = await parseHwpxDocument(u8ToArrayBuffer(original));
21083
+ origBlocks = parsed.blocks;
21084
+ } catch (err) {
21085
+ return { success: false, applied: 0, skipped, error: `\uC6D0\uBCF8 HWPX \uD30C\uC2F1 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}` };
21086
+ }
21087
+ let zip;
21088
+ try {
21089
+ zip = await JSZip7.loadAsync(original);
21090
+ } catch {
21091
+ return { success: false, applied: 0, skipped, error: "ZIP \uB85C\uB4DC \uC2E4\uD328" };
21092
+ }
21093
+ const sectionPaths = await resolveSectionEntryNames(zip);
21094
+ if (sectionPaths.length === 0) {
21095
+ return { success: false, applied: 0, skipped, error: "HWPX \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4" };
21096
+ }
21097
+ const scans = [];
21098
+ for (let i = 0; i < sectionPaths.length; i++) {
21099
+ const xml = await zip.file(sectionPaths[i]).async("text");
21100
+ scans.push(scanSectionXml(xml, i));
21101
+ }
21102
+ const origUnits = buildOrigUnits(origBlocks);
21103
+ const editedUnits = splitMarkdownUnits(editedMarkdown);
21104
+ const pairs = alignUnits(origUnits.map((u) => u.raw), editedUnits.map((u) => u.raw));
21105
+ const paraMap = resolveParagraphMappings(origBlocks, scans);
21106
+ const scanTables = scans.flatMap((s) => s.tables.filter((t) => t.rows.length > 0));
21107
+ const obTableOrdinals = buildTableOrdinals(origBlocks);
21108
+ const sectionSplices = scans.map(() => []);
21109
+ for (const [oi, ei] of pairs) {
21110
+ if (oi !== null && ei !== null) {
21111
+ const orig = origUnits[oi];
21112
+ const edited = editedUnits[ei];
21113
+ if (orig.raw === edited.raw) continue;
21114
+ applied += handleModifiedUnit(orig, edited, {
21115
+ origBlocks,
21116
+ paraMap,
21117
+ scans,
21118
+ scanTables,
21119
+ obTableOrdinals,
21120
+ sectionSplices,
21121
+ skipped
21122
+ });
21123
+ } else if (oi !== null) {
21124
+ skipped.push({ reason: "\uBE14\uB85D \uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0 (v1) \u2014 \uC6D0\uBCF8 \uC720\uC9C0", before: summarize(origUnits[oi].raw) });
21125
+ } else if (ei !== null) {
21126
+ skipped.push({ reason: "\uBE14\uB85D \uCD94\uAC00\uB294 \uBBF8\uC9C0\uC6D0 (v1)", after: summarize(editedUnits[ei].raw) });
21127
+ }
21128
+ }
21129
+ const replacements = /* @__PURE__ */ new Map();
21130
+ const encoder = new TextEncoder();
21131
+ try {
21132
+ for (let i = 0; i < scans.length; i++) {
21133
+ if (sectionSplices[i].length === 0) continue;
21134
+ const newXml = applySplices(scans[i].xml, sectionSplices[i]);
21135
+ replacements.set(sectionPaths[i], encoder.encode(newXml));
21136
+ }
21137
+ } catch (err) {
21138
+ return { success: false, applied: 0, skipped, error: `\uC18C\uC2A4\uB9F5 splice \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}` };
21139
+ }
21140
+ let data;
21141
+ if (replacements.size === 0) {
21142
+ data = new Uint8Array(original);
21143
+ } else {
21144
+ try {
21145
+ data = patchZipEntries(original, replacements);
21146
+ } catch (err) {
21147
+ return { success: false, applied: 0, skipped, error: `ZIP \uC7AC\uC870\uB9BD \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}` };
21148
+ }
21149
+ }
21150
+ let verification;
21151
+ if (options?.verify !== false) {
21152
+ try {
21153
+ const reparsed = await parseHwpxDocument(u8ToArrayBuffer(data));
21154
+ verification = diffUnitLists(splitMarkdownUnits(reparsed.markdown), editedUnits);
21155
+ } catch (err) {
21156
+ return { success: false, applied, skipped, error: `\uD328\uCE58\uBCF8 \uC7AC\uD30C\uC2F1 \uC2E4\uD328 \u2014 \uD328\uCE58 \uC911\uB2E8: ${err instanceof Error ? err.message : String(err)}` };
21157
+ }
21158
+ }
21159
+ return { success: true, data, applied, skipped, verification };
21160
+ }
21161
+ function buildOrigUnits(blocks) {
21162
+ const units = [];
21163
+ for (let i = 0; i < blocks.length; i++) {
21164
+ const block = blocks[i];
21165
+ let consume = 1;
21166
+ let chunk;
21167
+ if (block.type === "paragraph" && block.text && /^\[별표\s*\d+/.test(sanitizeText(block.text))) {
21168
+ const next = blocks[i + 1];
21169
+ if (next?.type === "paragraph" && next.text && /관련\)?$/.test(next.text)) consume = 2;
21170
+ chunk = blocksToMarkdown(blocks.slice(i, i + consume));
21171
+ } else {
21172
+ chunk = blocksToMarkdown([block]);
21173
+ }
21174
+ if (chunk) {
21175
+ const subUnits = splitMarkdownUnits(chunk);
21176
+ const isFragment = consume === 2 || (block.type === "paragraph" || block.type === "heading") && subUnits.length > 1;
21177
+ for (let s = 0; s < subUnits.length; s++) {
21178
+ const u = { ...subUnits[s], blockIdx: i, fragment: isFragment || void 0 };
21179
+ if (block.type === "table" && block.table?.caption && s === 0 && subUnits.length > 1 && u.kind === "text" && u.raw.startsWith("**")) {
21180
+ u.role = "caption";
21181
+ }
21182
+ units.push(u);
21183
+ }
21184
+ }
21185
+ i += consume - 1;
21186
+ }
21187
+ return units;
21188
+ }
21189
+ function buildTableOrdinals(blocks) {
21190
+ const map = /* @__PURE__ */ new Map();
21191
+ let ordinal = 0;
21192
+ for (let i = 0; i < blocks.length; i++) {
21193
+ if (blocks[i].type === "table" && blocks[i].table) map.set(i, ordinal++);
21194
+ }
21195
+ return map;
21196
+ }
21197
+ function alignUnits(a, b) {
21198
+ const m = a.length, n = b.length;
21199
+ if (m * n > 4e6) {
21200
+ const result2 = [];
21201
+ let pre = 0;
21202
+ while (pre < m && pre < n && a[pre] === b[pre]) {
21203
+ result2.push([pre, pre]);
21204
+ pre++;
21205
+ }
21206
+ let suf = 0;
21207
+ while (suf < m - pre && suf < n - pre && a[m - 1 - suf] === b[n - 1 - suf]) suf++;
21208
+ const aMid = m - pre - suf, bMid = n - pre - suf;
21209
+ if (aMid === bMid) {
21210
+ for (let i2 = 0; i2 < aMid; i2++) result2.push([pre + i2, pre + i2]);
21211
+ } else {
21212
+ for (let i2 = 0; i2 < aMid; i2++) result2.push([pre + i2, null]);
21213
+ for (let j2 = 0; j2 < bMid; j2++) result2.push([null, pre + j2]);
21214
+ }
21215
+ for (let s = suf - 1; s >= 0; s--) result2.push([m - 1 - s, n - 1 - s]);
21216
+ return result2;
21217
+ }
21218
+ const dp = Array.from({ length: m + 1 }, () => new Int32Array(n + 1));
21219
+ for (let i2 = 1; i2 <= m; i2++) {
21220
+ for (let j2 = 1; j2 <= n; j2++) {
21221
+ dp[i2][j2] = a[i2 - 1] === b[j2 - 1] ? dp[i2 - 1][j2 - 1] + 1 : Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
21222
+ }
21223
+ }
21224
+ const matches = [];
21225
+ let i = m, j = n;
21226
+ while (i > 0 && j > 0) {
21227
+ if (a[i - 1] === b[j - 1] && dp[i][j] === dp[i - 1][j - 1] + 1) {
21228
+ matches.push([i - 1, j - 1]);
21229
+ i--;
21230
+ j--;
21231
+ } else if (dp[i - 1][j] >= dp[i][j - 1]) i--;
21232
+ else j--;
21233
+ }
21234
+ matches.reverse();
21235
+ const result = [];
21236
+ let ai = 0, bi = 0;
21237
+ const flushGap = (aEnd, bEnd) => {
21238
+ if (aEnd - ai === bEnd - bi) {
21239
+ while (ai < aEnd) result.push([ai++, bi++]);
21240
+ return;
21241
+ }
21242
+ while (ai < aEnd && bi < bEnd) {
21243
+ const sim = normalizedSimilarity(a[ai], b[bi]);
21244
+ if (sim >= 0.4) {
21245
+ if (aEnd - ai > bEnd - bi && bestSimInRange(a, ai + 1, ai + (aEnd - ai) - (bEnd - bi), b[bi]) > sim) {
21246
+ result.push([ai++, null]);
21247
+ } else if (bEnd - bi > aEnd - ai && bestSimInRange(b, bi + 1, bi + (bEnd - bi) - (aEnd - ai), a[ai]) > sim) {
21248
+ result.push([null, bi++]);
21249
+ } else {
21250
+ result.push([ai++, bi++]);
21251
+ }
21252
+ } else if (aEnd - ai >= bEnd - bi) result.push([ai++, null]);
21253
+ else result.push([null, bi++]);
21254
+ }
21255
+ while (ai < aEnd) result.push([ai++, null]);
21256
+ while (bi < bEnd) result.push([null, bi++]);
21257
+ };
21258
+ for (const [pi, pj] of matches) {
21259
+ flushGap(pi, pj);
21260
+ result.push([ai++, bi++]);
21261
+ }
21262
+ flushGap(m, n);
21263
+ return result;
21264
+ }
21265
+ function bestSimInRange(arr, from, to, target) {
21266
+ let best = 0;
21267
+ for (let k = from; k <= to && k < arr.length; k++) {
21268
+ const s = normalizedSimilarity(arr[k], target);
21269
+ if (s > best) best = s;
21270
+ }
21271
+ return best;
21272
+ }
21273
+ function resolveParagraphMappings(blocks, scans) {
21274
+ const buckets = /* @__PURE__ */ new Map();
21275
+ for (const scan of scans) {
21276
+ for (const para of scan.bodyParagraphs) {
21277
+ const key = normForMatch(para.text);
21278
+ if (!key) continue;
21279
+ let list = buckets.get(key);
21280
+ if (!list) buckets.set(key, list = []);
21281
+ list.push(para);
21282
+ }
21283
+ }
21284
+ const headerNorms = new Set(scans.flatMap((s) => s.headerTexts.map(normForMatch)).filter(Boolean));
21285
+ const footerNorms = new Set(scans.flatMap((s) => s.footerTexts.map(normForMatch)).filter(Boolean));
21286
+ const pageText = /* @__PURE__ */ new Set();
21287
+ for (let i = 0; i < blocks.length; i++) {
21288
+ const b = blocks[i];
21289
+ if (b.type !== "paragraph" && b.type !== "heading" || !b.text || !headerNorms.has(normForMatch(b.text))) break;
21290
+ pageText.add(i);
21291
+ }
21292
+ for (let i = blocks.length - 1; i >= 0; i--) {
21293
+ const b = blocks[i];
21294
+ if (b.type !== "paragraph" && b.type !== "heading" || !b.text || !footerNorms.has(normForMatch(b.text))) break;
21295
+ pageText.add(i);
21296
+ }
21297
+ const counters = /* @__PURE__ */ new Map();
21298
+ const result = /* @__PURE__ */ new Map();
21299
+ for (let i = 0; i < blocks.length; i++) {
21300
+ const b = blocks[i];
21301
+ if (b.type !== "paragraph" && b.type !== "heading" || !b.text) continue;
21302
+ if (pageText.has(i)) {
21303
+ result.set(i, {});
21304
+ continue;
21305
+ }
21306
+ let key = normForMatch(b.text);
21307
+ let prefixStripped = false;
21308
+ if (!buckets.has(key)) {
21309
+ const sp = b.text.indexOf(" ");
21310
+ if (sp > 0) {
21311
+ const alt = normForMatch(b.text.slice(sp + 1));
21312
+ if (alt && buckets.has(alt)) {
21313
+ key = alt;
21314
+ prefixStripped = true;
21315
+ }
21316
+ }
21317
+ }
21318
+ const list = buckets.get(key);
21319
+ if (!list) {
21320
+ result.set(i, {});
21321
+ continue;
21322
+ }
21323
+ const occ = counters.get(key) ?? 0;
21324
+ counters.set(key, occ + 1);
21325
+ result.set(i, occ < list.length ? { para: list[occ], prefixStripped } : {});
21326
+ }
21327
+ return result;
21328
+ }
21329
+ function handleModifiedUnit(orig, edited, ctx) {
21330
+ const block = ctx.origBlocks[orig.blockIdx];
21331
+ const skip = (reason) => {
21332
+ ctx.skipped.push({ reason, before: summarize(orig.raw), after: summarize(edited.raw) });
21333
+ return 0;
21334
+ };
21335
+ if (orig.role === "caption") return skip("\uD45C \uCEA1\uC158 \uC218\uC815\uC740 \uBBF8\uC9C0\uC6D0 (v1)");
21336
+ if (orig.kind === "separator" || orig.kind === "image") return skip("\uC774\uBBF8\uC9C0/\uAD6C\uBD84\uC120 \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
21337
+ if (!block) return skip("\uBE14\uB85D \uB9E4\uD551 \uC2E4\uD328");
21338
+ if (orig.fragment) return skip("\uBB38\uB2E8 \uBD84\uC808(\uAC15\uC81C \uC904\uBC14\uAFC8/\uBCD1\uD569 \uC720\uB2DB) \u2014 \uBD80\uBD84 \uC218\uC815\uC740 \uBBF8\uC9C0\uC6D0 (v1)");
21339
+ if (block.type === "table" && block.table) {
21340
+ if (orig.kind !== edited.kind) return skip("\uD45C \u2194 \uBE44\uD45C \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0 (\uD45C \uAD6C\uC870 \uBCC0\uACBD)");
21341
+ if (ctx.obTableOrdinals.size !== ctx.scanTables.length) return skip("\uD45C \uAC1C\uC218 \uBD88\uC77C\uCE58 \u2014 \uC18C\uC2A4\uB9F5 \uC2E0\uB8B0 \uBD88\uAC00");
21342
+ const ordinal = ctx.obTableOrdinals.get(orig.blockIdx);
21343
+ const scanTable = ordinal !== void 0 ? ctx.scanTables[ordinal] : void 0;
21344
+ if (!scanTable) return skip("\uD45C \uC18C\uC2A4\uB9F5 \uB9E4\uD551 \uC2E4\uD328");
21345
+ if (orig.kind === "gfm-table") return patchGfmTable(block.table, scanTable, orig, edited, ctx, skip);
21346
+ if (orig.kind === "html-table") return patchHtmlTable(block.table, scanTable, orig, edited, ctx, skip);
21347
+ return patchTextChunkTable(block.table, scanTable, orig, edited, ctx, skip);
21348
+ }
21349
+ if ((block.type === "paragraph" || block.type === "heading") && orig.kind === "text" && edited.kind === "text") {
21350
+ return patchParagraphUnit(block, orig, edited, ctx, skip);
21351
+ }
21352
+ return skip("\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uBE14\uB85D \uC720\uD615 \uBCC0\uACBD");
21353
+ }
21354
+ function patchParagraphUnit(block, orig, edited, ctx, skip) {
21355
+ const mapping = ctx.paraMap.get(orig.blockIdx);
21356
+ if (!mapping?.para) return skip("\uBB38\uB2E8 \uC18C\uC2A4\uB9F5 \uB9E4\uD551 \uC2E4\uD328 (\uBA38\uB9AC\uB9D0/\uAE00\uC0C1\uC790/\uCEA1\uC158 \uC601\uC5ED\uC774\uAC70\uB098 \uD14D\uC2A4\uD2B8 \uBD88\uC77C\uCE58)");
21357
+ if (block.text && block.text.includes("\n")) {
21358
+ return skip("\uBB38\uB2E8 \uB0B4 \uAC15\uC81C \uC904\uBC14\uAFC8 \uD3EC\uD568 \u2014 \uC218\uC815 \uC2DC \uC904\uBC14\uAFC8 \uBCF4\uC874 \uBD88\uAC00\uB85C \uBBF8\uC9C0\uC6D0 (v1)");
21359
+ }
21360
+ const origPlain = textUnitToPlain(orig.raw, block);
21361
+ let newPlain = textUnitToPlain(edited.raw, block);
21362
+ if (block.footnoteText) {
21363
+ const noteMatch = newPlain.match(/\s*\(주: ([\s\S]*)\)$/);
21364
+ if (noteMatch) {
21365
+ newPlain = newPlain.slice(0, noteMatch.index).trimEnd();
21366
+ if (normForMatch(noteMatch[1]) !== normForMatch(block.footnoteText)) {
21367
+ ctx.skipped.push({ reason: "\uAC01\uC8FC \uD14D\uC2A4\uD2B8 \uC218\uC815\uC740 \uBBF8\uC9C0\uC6D0 \u2014 \uBCF8\uBB38\uB9CC \uC801\uC6A9", before: block.footnoteText, after: noteMatch[1] });
21368
+ }
21369
+ } else {
21370
+ ctx.skipped.push({ reason: "\uAC01\uC8FC \uD45C\uAE30 \uC0AD\uC81C\uB294 \uBBF8\uC9C0\uC6D0 \u2014 \uAC01\uC8FC \uC720\uC9C0, \uBCF8\uBB38\uB9CC \uC801\uC6A9", before: `(\uC8FC: ${block.footnoteText})` });
21371
+ }
21372
+ }
21373
+ if (mapping.prefixStripped) {
21374
+ const origPrefix = block.text.split(" ", 1)[0];
21375
+ const sp = newPlain.indexOf(" ");
21376
+ const newFirst = sp > 0 ? newPlain.slice(0, sp) : newPlain;
21377
+ if (newFirst === origPrefix || /^(?:[0-90-9a-zA-Z가-힣]{1,6}[.)\]:]|[([][0-90-9a-zA-Z가-힣]{1,6}[)\]][.:]?|[ⅰ-ⅹⅠ-Ⅹ①-⑮][.)\]:]?)$/u.test(newFirst)) {
21378
+ newPlain = sp > 0 ? newPlain.slice(sp + 1) : "";
21379
+ } else {
21380
+ ctx.skipped.push({ reason: "\uC790\uB3D9\uBC88\uD638 \uC811\uB450 \uC2DD\uBCC4 \uC2E4\uD328 \u2014 \uBC88\uD638 \uD3EC\uD568 \uD14D\uC2A4\uD2B8\uB85C \uC801\uC6A9 (\uBDF0\uC5B4\uC5D0\uC11C \uC911\uBCF5 \uD45C\uC2DC \uAC00\uB2A5)", after: summarize(newPlain) });
21381
+ }
21382
+ }
21383
+ if (newPlain === origPlain) return skip("\uD14D\uC2A4\uD2B8 \uC678 \uBCC0\uACBD(\uD5E4\uB529 \uB808\uBCA8/\uC11C\uC2DD)\uB9CC \uAC10\uC9C0 \u2014 \uC2A4\uD0C0\uC77C \uBCC0\uACBD\uC740 \uBBF8\uC9C0\uC6D0");
21384
+ if (sanitizeText(newPlain) !== newPlain) {
21385
+ return skip("\uACF5\uBC31 \uC815\uADDC\uD654 \uBD88\uC548\uC815 \uD14D\uC2A4\uD2B8 \u2014 \uD328\uCE58 \uC2DC \uC6D0\uBB38 \uBCF4\uC874 \uBD88\uAC00\uB85C \uBBF8\uC9C0\uC6D0");
21386
+ }
21387
+ const splices = buildParagraphSplices(mapping.para, newPlain, ctx.scans[mapping.para.sectionIndex]?.xml);
21388
+ if (splices === null) return skip("\uBB38\uB2E8\uC5D0 \uD14D\uC2A4\uD2B8 \uB178\uB4DC\uB97C \uB9CC\uB4E4 \uC218 \uC5C6\uC74C");
21389
+ ctx.sectionSplices[mapping.para.sectionIndex].push(...splices);
21390
+ return 1;
21391
+ }
21392
+ function textUnitToPlain(raw, block) {
21393
+ let text = raw.split("\n").map((l) => l.trim()).filter(Boolean).join(" ");
21394
+ if (block.type === "heading" || block.text && /^\[별표\s*\d+/.test(sanitizeText(block.text))) {
21395
+ text = text.replace(/^#{1,6}\s+/, "");
21396
+ }
21397
+ if (block.href) {
21398
+ const linkMatch = text.match(/^\[([\s\S]*)\]\([^)]*\)$/);
21399
+ if (linkMatch) text = linkMatch[1];
21400
+ }
21401
+ if (/^\*[^*][\s\S]*\*$/.test(text) && block.text && /^\([^)]*조[^)]*관련\)$/.test(sanitizeText(block.text))) {
21402
+ text = text.slice(1, -1);
21403
+ }
21404
+ return unescapeGfm(text);
21405
+ }
21406
+ function diffUnitLists(a, b) {
21407
+ const pairs = alignUnits(a.map((u) => u.raw), b.map((u) => u.raw));
21408
+ const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
21409
+ const diffs = [];
21410
+ for (const [ai, bi] of pairs) {
21411
+ if (ai !== null && bi !== null) {
21412
+ if (a[ai].raw === b[bi].raw) {
21413
+ stats.unchanged++;
21414
+ continue;
21415
+ }
21416
+ stats.modified++;
21417
+ diffs.push({ type: "modified", before: unitToBlock(a[ai]), after: unitToBlock(b[bi]), similarity: normalizedSimilarity(a[ai].raw, b[bi].raw) });
21418
+ } else if (ai !== null) {
21419
+ stats.removed++;
21420
+ diffs.push({ type: "removed", before: unitToBlock(a[ai]) });
21421
+ } else if (bi !== null) {
21422
+ stats.added++;
21423
+ diffs.push({ type: "added", after: unitToBlock(b[bi]) });
21424
+ }
21425
+ }
21426
+ return { stats, diffs };
21427
+ }
21428
+ function unitToBlock(u) {
21429
+ return { type: "paragraph", text: u.raw };
21430
+ }
21431
+ function u8ToArrayBuffer(u8) {
21432
+ return u8.buffer.slice(u8.byteOffset, u8.byteOffset + u8.byteLength);
21433
+ }
21434
+ async function resolveSectionEntryNames(zip) {
21435
+ for (const mp of ["Contents/content.hpf", "content.hpf"]) {
21436
+ const f = zip.file(mp);
21437
+ if (!f) continue;
21438
+ const xml = await f.async("text");
21439
+ const paths = sectionPathsFromManifest(xml).filter((p) => zip.file(p) !== null);
21440
+ if (paths.length > 0) return paths;
21441
+ }
21442
+ return Object.keys(zip.files).filter((n) => /[Ss]ection\d+\.xml$/.test(n)).sort();
21443
+ }
21444
+ function sectionPathsFromManifest(xml) {
21445
+ const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
21446
+ const attr = (tag, name) => {
21447
+ const m = tag.match(new RegExp(`(?:^|\\s)${name}\\s*=\\s*(?:"([^"]*)"|'([^']*)')`));
21448
+ return m ? m[1] ?? m[2] : "";
21449
+ };
21450
+ const idToHref = /* @__PURE__ */ new Map();
21451
+ for (const m of xml.matchAll(/<opf:item(\s(?:"[^"]*"|'[^']*'|[^>"'])*?)\/?>/g)) {
21452
+ const id = attr(m[1], "id");
21453
+ let href = attr(m[1], "href");
21454
+ const mediaType = attr(m[1], "media-type");
21455
+ if (!isSectionId(id) && !mediaType.includes("xml")) continue;
21456
+ if (!href.startsWith("/") && !href.startsWith("Contents/") && isSectionId(id)) href = "Contents/" + href;
21457
+ if (id) idToHref.set(id, href);
21458
+ }
21459
+ const ordered = [];
21460
+ for (const m of xml.matchAll(/<opf:itemref(\s(?:"[^"]*"|'[^']*'|[^>"'])*?)\/?>/g)) {
21461
+ const href = idToHref.get(attr(m[1], "idref"));
21462
+ if (href) ordered.push(href);
21463
+ }
21464
+ if (ordered.length > 0) return ordered;
21465
+ return Array.from(idToHref.entries()).filter(([id]) => isSectionId(id)).sort((a, b) => a[0].localeCompare(b[0])).map(([, href]) => href);
21466
+ }
21467
+
19044
21468
  // src/print/renderer.ts
19045
21469
  import { existsSync } from "fs";
19046
21470
  import MarkdownIt from "markdown-it";
@@ -19281,7 +21705,7 @@ async function parseHwp(buffer, options) {
19281
21705
  async function parsePdf(buffer, options) {
19282
21706
  let parsePdfDocument;
19283
21707
  try {
19284
- const mod = await import("./parser-OMPBVEFU.js");
21708
+ const mod = await import("./parser-4IVYHKSL.js");
19285
21709
  parsePdfDocument = mod.parsePdfDocument;
19286
21710
  } catch {
19287
21711
  return {
@@ -19399,6 +21823,7 @@ export {
19399
21823
  parsePdf,
19400
21824
  parseXls,
19401
21825
  parseXlsx,
21826
+ patchHwpx,
19402
21827
  renderHtml
19403
21828
  };
19404
21829
  //# sourceMappingURL=index.js.map