kordoc 2.2.5 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +16 -4
  2. package/dist/{chunk-UU2O6D3R.js → chunk-JFTFC2BB.js} +2 -2
  3. package/dist/{chunk-JH5XLWJQ.js.map → chunk-JFTFC2BB.js.map} +1 -1
  4. package/dist/{chunk-5Y2Q3BRW.js → chunk-M3E3C5GS.js} +8 -1
  5. package/dist/chunk-M3E3C5GS.js.map +1 -0
  6. package/dist/{chunk-RQWICKON.js → chunk-OEJJPCMM.js} +369 -73
  7. package/dist/chunk-OEJJPCMM.js.map +1 -0
  8. package/dist/{chunk-JH5XLWJQ.js → chunk-Z7UPTVMX.js} +2 -2
  9. package/dist/{chunk-UU2O6D3R.js.map → chunk-Z7UPTVMX.js.map} +1 -1
  10. package/dist/{chunk-OJ4QR33V.cjs → chunk-ZNJPRRIA.cjs} +2 -2
  11. package/dist/{chunk-OJ4QR33V.cjs.map → chunk-ZNJPRRIA.cjs.map} +1 -1
  12. package/dist/cli.js +7 -4
  13. package/dist/cli.js.map +1 -1
  14. package/dist/{detect-GYK3HKD5.js → detect-I7YIS4Q6.js} +4 -2
  15. package/dist/index.cjs +463 -160
  16. package/dist/index.cjs.map +1 -1
  17. package/dist/index.d.cts +4 -2
  18. package/dist/index.d.ts +4 -2
  19. package/dist/index.js +387 -84
  20. package/dist/index.js.map +1 -1
  21. package/dist/mcp.js +5 -5
  22. package/dist/{parser-OIRWPKIQ.js → parser-25LF2S2J.js} +45 -42
  23. package/dist/{parser-OIRWPKIQ.js.map → parser-25LF2S2J.js.map} +1 -1
  24. package/dist/{parser-PXD73E4H.js → parser-4LKJXBPP.js} +45 -42
  25. package/dist/{parser-PXD73E4H.js.map → parser-4LKJXBPP.js.map} +1 -1
  26. package/dist/{parser-CYBX5MP4.cjs → parser-KBQZB3QY.cjs} +61 -58
  27. package/dist/{parser-CYBX5MP4.cjs.map → parser-KBQZB3QY.cjs.map} +1 -1
  28. package/dist/{watch-NSBABJ4A.js → watch-GXRBLW3Y.js} +4 -4
  29. package/package.json +2 -2
  30. package/dist/chunk-5Y2Q3BRW.js.map +0 -1
  31. package/dist/chunk-RQWICKON.js.map +0 -1
  32. /package/dist/{detect-GYK3HKD5.js.map → detect-I7YIS4Q6.js.map} +0 -0
  33. /package/dist/{watch-NSBABJ4A.js.map → watch-GXRBLW3Y.js.map} +0 -0
package/dist/index.cjs CHANGED
@@ -16,7 +16,7 @@
16
16
 
17
17
 
18
18
 
19
- var _chunkOJ4QR33Vcjs = require('./chunk-OJ4QR33V.cjs');
19
+ var _chunkZNJPRRIAcjs = require('./chunk-ZNJPRRIA.cjs');
20
20
 
21
21
 
22
22
  var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
@@ -44,11 +44,17 @@ function isPdfFile(buffer) {
44
44
  const b = magicBytes(buffer);
45
45
  return b[0] === 37 && b[1] === 80 && b[2] === 68 && b[3] === 70;
46
46
  }
47
+ function isHwpmlFile(buffer) {
48
+ const bytes = new Uint8Array(buffer, 0, Math.min(512, buffer.byteLength));
49
+ const head = new TextDecoder("utf-8", { fatal: false }).decode(bytes).replace(/^\uFEFF/, "");
50
+ return head.trimStart().startsWith("<?xml") && head.includes("<HWPML");
51
+ }
47
52
  function detectFormat(buffer) {
48
53
  if (buffer.byteLength < 4) return "unknown";
49
54
  if (isZipFile(buffer)) return "hwpx";
50
55
  if (isOldHwpFile(buffer)) return "hwp";
51
56
  if (isPdfFile(buffer)) return "pdf";
57
+ if (isHwpmlFile(buffer)) return "hwpml";
52
58
  return "unknown";
53
59
  }
54
60
  async function detectZipFormat(buffer) {
@@ -78,7 +84,7 @@ var MAX_XML_DEPTH = 200;
78
84
  function createXmlParser(warnings) {
79
85
  return new (0, _xmldom.DOMParser)({
80
86
  onError(level, msg) {
81
- if (level === "fatalError") throw new (0, _chunkOJ4QR33Vcjs.KordocError)(`XML \uD30C\uC2F1 \uC2E4\uD328: ${msg}`);
87
+ if (level === "fatalError") throw new (0, _chunkZNJPRRIAcjs.KordocError)(`XML \uD30C\uC2F1 \uC2E4\uD328: ${msg}`);
82
88
  _optionalChain([warnings, 'optionalAccess', _2 => _2.push, 'call', _3 => _3({ code: "MALFORMED_XML", message: `XML ${level === "warn" ? "\uACBD\uACE0" : "\uC624\uB958"}: ${msg}` })]);
83
89
  }
84
90
  });
@@ -97,10 +103,10 @@ async function extractHwpxStyles(zip, decompressed) {
97
103
  const xml = await file.async("text");
98
104
  if (decompressed) {
99
105
  decompressed.total += xml.length * 2;
100
- if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
106
+ if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkZNJPRRIAcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
101
107
  }
102
108
  const parser = createXmlParser();
103
- const doc = parser.parseFromString(_chunkOJ4QR33Vcjs.stripDtd.call(void 0, xml), "text/xml");
109
+ const doc = parser.parseFromString(_chunkZNJPRRIAcjs.stripDtd.call(void 0, xml), "text/xml");
104
110
  if (!doc.documentElement) continue;
105
111
  parseCharProperties(doc, result.charProperties);
106
112
  parseStyleElements(doc, result.styles);
@@ -162,7 +168,7 @@ function parseStyleElements(doc, map) {
162
168
  }
163
169
  }
164
170
  async function parseHwpxDocument(buffer, options) {
165
- _chunkOJ4QR33Vcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
171
+ _chunkZNJPRRIAcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
166
172
  let zip;
167
173
  try {
168
174
  zip = await _jszip2.default.loadAsync(buffer);
@@ -171,7 +177,7 @@ async function parseHwpxDocument(buffer, options) {
171
177
  }
172
178
  const actualEntryCount = Object.keys(zip.files).length;
173
179
  if (actualEntryCount > MAX_ZIP_ENTRIES) {
174
- throw new (0, _chunkOJ4QR33Vcjs.KordocError)("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
180
+ throw new (0, _chunkZNJPRRIAcjs.KordocError)("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
175
181
  }
176
182
  const decompressed = { total: 0 };
177
183
  const metadata = {};
@@ -179,11 +185,12 @@ async function parseHwpxDocument(buffer, options) {
179
185
  const styleMap = await extractHwpxStyles(zip, decompressed);
180
186
  const warnings = [];
181
187
  const sectionPaths = await resolveSectionPaths(zip);
182
- if (sectionPaths.length === 0) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
188
+ if (sectionPaths.length === 0) throw new (0, _chunkZNJPRRIAcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
183
189
  metadata.pageCount = sectionPaths.length;
184
190
  const pageFilter = _optionalChain([options, 'optionalAccess', _4 => _4.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sectionPaths.length) : null;
185
191
  const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
186
192
  const blocks = [];
193
+ const nestedTableCounter = { count: 0 };
187
194
  let parsedSections = 0;
188
195
  for (let si = 0; si < sectionPaths.length; si++) {
189
196
  if (pageFilter && !pageFilter.has(si + 1)) continue;
@@ -192,19 +199,19 @@ async function parseHwpxDocument(buffer, options) {
192
199
  try {
193
200
  const xml = await file.async("text");
194
201
  decompressed.total += xml.length * 2;
195
- if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
196
- blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1));
202
+ if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkZNJPRRIAcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
203
+ blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, nestedTableCounter));
197
204
  parsedSections++;
198
205
  _optionalChain([options, 'optionalAccess', _5 => _5.onProgress, 'optionalCall', _6 => _6(parsedSections, totalTarget)]);
199
206
  } catch (secErr) {
200
- if (secErr instanceof _chunkOJ4QR33Vcjs.KordocError) throw secErr;
207
+ if (secErr instanceof _chunkZNJPRRIAcjs.KordocError) throw secErr;
201
208
  warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
202
209
  }
203
210
  }
204
211
  const images = await extractImagesFromZip(zip, blocks, decompressed, warnings);
205
212
  detectHwpxHeadings(blocks, styleMap);
206
213
  const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
207
- const markdown = _chunkOJ4QR33Vcjs.blocksToMarkdown.call(void 0, blocks);
214
+ const markdown = _chunkZNJPRRIAcjs.blocksToMarkdown.call(void 0, blocks);
208
215
  return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
209
216
  }
210
217
  function imageExtToMime(ext) {
@@ -254,16 +261,29 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
254
261
  ref
255
262
  // 절대 경로일 수도 있음
256
263
  ];
264
+ let resolvedPath = null;
265
+ if (!ref.includes(".")) {
266
+ const prefixes = [`BinData/${ref}`, `Contents/BinData/${ref}`];
267
+ for (const prefix of prefixes) {
268
+ const match = zip.file(new RegExp(`^${prefix.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\.[a-zA-Z0-9]+$`));
269
+ if (match.length > 0) {
270
+ resolvedPath = match[0].name;
271
+ break;
272
+ }
273
+ }
274
+ }
257
275
  let found = false;
258
- for (const path of candidates) {
259
- if (_chunkOJ4QR33Vcjs.isPathTraversal.call(void 0, path)) continue;
276
+ const allCandidates = resolvedPath ? [resolvedPath, ...candidates] : candidates;
277
+ for (const path of allCandidates) {
278
+ if (_chunkZNJPRRIAcjs.isPathTraversal.call(void 0, path)) continue;
260
279
  const file = zip.file(path);
261
280
  if (!file) continue;
262
281
  try {
263
282
  const data = await file.async("uint8array");
264
283
  decompressed.total += data.length;
265
- if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
266
- const ext = ref.includes(".") ? ref.split(".").pop() || "png" : "png";
284
+ if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkZNJPRRIAcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
285
+ const actualPath = path;
286
+ const ext = actualPath.includes(".") ? actualPath.split(".").pop() || "png" : "png";
267
287
  const mimeType = imageExtToMime(ext);
268
288
  imageIndex++;
269
289
  const filename = `image_${String(imageIndex).padStart(3, "0")}.${mimeToExt(mimeType)}`;
@@ -273,7 +293,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
273
293
  found = true;
274
294
  break;
275
295
  } catch (err) {
276
- if (err instanceof _chunkOJ4QR33Vcjs.KordocError) throw err;
296
+ if (err instanceof _chunkZNJPRRIAcjs.KordocError) throw err;
277
297
  }
278
298
  }
279
299
  if (!found) {
@@ -293,7 +313,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
293
313
  const xml = await file.async("text");
294
314
  if (decompressed) {
295
315
  decompressed.total += xml.length * 2;
296
- if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
316
+ if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkZNJPRRIAcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
297
317
  }
298
318
  parseDublinCoreMetadata(xml, metadata);
299
319
  if (metadata.title || metadata.author) return;
@@ -303,7 +323,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
303
323
  }
304
324
  function parseDublinCoreMetadata(xml, metadata) {
305
325
  const parser = createXmlParser();
306
- const doc = parser.parseFromString(_chunkOJ4QR33Vcjs.stripDtd.call(void 0, xml), "text/xml");
326
+ const doc = parser.parseFromString(_chunkZNJPRRIAcjs.stripDtd.call(void 0, xml), "text/xml");
307
327
  if (!doc.documentElement) return;
308
328
  const getText = (tagNames) => {
309
329
  for (const tag of tagNames) {
@@ -336,6 +356,7 @@ function extractFromBrokenZip(buffer) {
336
356
  let totalDecompressed = 0;
337
357
  let entryCount = 0;
338
358
  let sectionNum = 0;
359
+ const nestedTableCounter = { count: 0 };
339
360
  while (pos < data.length - 30) {
340
361
  if (data[pos] !== 80 || data[pos + 1] !== 75 || data[pos + 2] !== 3 || data[pos + 3] !== 4) {
341
362
  pos++;
@@ -362,7 +383,7 @@ function extractFromBrokenZip(buffer) {
362
383
  }
363
384
  const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
364
385
  const name = new TextDecoder().decode(nameBytes);
365
- if (_chunkOJ4QR33Vcjs.isPathTraversal.call(void 0, name)) {
386
+ if (_chunkZNJPRRIAcjs.isPathTraversal.call(void 0, name)) {
366
387
  pos = fileStart + compSize;
367
388
  continue;
368
389
  }
@@ -380,15 +401,15 @@ function extractFromBrokenZip(buffer) {
380
401
  continue;
381
402
  }
382
403
  totalDecompressed += content.length * 2;
383
- if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
404
+ if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0, _chunkZNJPRRIAcjs.KordocError)("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
384
405
  sectionNum++;
385
- blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum));
406
+ blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, nestedTableCounter));
386
407
  } catch (e6) {
387
408
  continue;
388
409
  }
389
410
  }
390
- if (blocks.length === 0) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
391
- const markdown = _chunkOJ4QR33Vcjs.blocksToMarkdown.call(void 0, blocks);
411
+ if (blocks.length === 0) throw new (0, _chunkZNJPRRIAcjs.KordocError)("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
412
+ const markdown = _chunkZNJPRRIAcjs.blocksToMarkdown.call(void 0, blocks);
392
413
  return { markdown, blocks, warnings: warnings.length > 0 ? warnings : void 0 };
393
414
  }
394
415
  async function resolveSectionPaths(zip) {
@@ -406,7 +427,7 @@ async function resolveSectionPaths(zip) {
406
427
  }
407
428
  function parseSectionPathsFromManifest(xml) {
408
429
  const parser = createXmlParser();
409
- const doc = parser.parseFromString(_chunkOJ4QR33Vcjs.stripDtd.call(void 0, xml), "text/xml");
430
+ const doc = parser.parseFromString(_chunkZNJPRRIAcjs.stripDtd.call(void 0, xml), "text/xml");
410
431
  const items = doc.getElementsByTagName("opf:item");
411
432
  const spine = doc.getElementsByTagName("opf:itemref");
412
433
  const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
@@ -453,9 +474,9 @@ function detectHwpxHeadings(blocks, styleMap) {
453
474
  let level = 0;
454
475
  if (baseFontSize > 0 && _optionalChain([block, 'access', _15 => _15.style, 'optionalAccess', _16 => _16.fontSize])) {
455
476
  const ratio = block.style.fontSize / baseFontSize;
456
- if (ratio >= _chunkOJ4QR33Vcjs.HEADING_RATIO_H1) level = 1;
457
- else if (ratio >= _chunkOJ4QR33Vcjs.HEADING_RATIO_H2) level = 2;
458
- else if (ratio >= _chunkOJ4QR33Vcjs.HEADING_RATIO_H3) level = 3;
477
+ if (ratio >= _chunkZNJPRRIAcjs.HEADING_RATIO_H1) level = 1;
478
+ else if (ratio >= _chunkZNJPRRIAcjs.HEADING_RATIO_H2) level = 2;
479
+ else if (ratio >= _chunkZNJPRRIAcjs.HEADING_RATIO_H3) level = 3;
459
480
  }
460
481
  const compactText = text.replace(/\s+/g, "");
461
482
  if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
@@ -467,12 +488,40 @@ function detectHwpxHeadings(blocks, styleMap) {
467
488
  }
468
489
  }
469
490
  }
470
- function parseSectionXml(xml, styleMap, warnings, sectionNum) {
491
+ function makeNestedTableMarker(counter, rows) {
492
+ counter.count++;
493
+ const firstRow = _nullishCoalesce(rows[0], () => ( []));
494
+ const hint = firstRow.map((c) => c.text.trim().replace(/\n/g, " ")).filter(Boolean).join(" | ");
495
+ const hintChars = [...hint];
496
+ const truncated = hintChars.length > 60 ? hintChars.slice(0, 60).join("") + "\u2026" : hint;
497
+ return truncated ? `[\uC911\uCCA9 \uD14C\uC774\uBE14 #${counter.count}: ${truncated}]` : `[\uC911\uCCA9 \uD14C\uC774\uBE14 #${counter.count}]`;
498
+ }
499
+ function handleNestedTable(newTable, tableStack, blocks, ctx) {
500
+ const parentTable = tableStack.pop();
501
+ let nestedCols = 0;
502
+ for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
503
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
504
+ blocks.push({ type: "table", table: _chunkZNJPRRIAcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
505
+ if (parentTable.cell) {
506
+ const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
507
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker;
508
+ }
509
+ } else {
510
+ const nestedText = _chunkZNJPRRIAcjs.convertTableToText.call(void 0, newTable.rows);
511
+ if (parentTable.cell) {
512
+ const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
513
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker + "\n" + nestedText;
514
+ }
515
+ }
516
+ return parentTable;
517
+ }
518
+ function parseSectionXml(xml, styleMap, warnings, sectionNum, counter) {
471
519
  const parser = createXmlParser(warnings);
472
- const doc = parser.parseFromString(_chunkOJ4QR33Vcjs.stripDtd.call(void 0, xml), "text/xml");
520
+ const doc = parser.parseFromString(_chunkZNJPRRIAcjs.stripDtd.call(void 0, xml), "text/xml");
473
521
  if (!doc.documentElement) return [];
474
522
  const blocks = [];
475
- walkSection(doc.documentElement, blocks, null, [], styleMap, warnings, sectionNum);
523
+ const ctx = { styleMap, warnings, sectionNum, counter };
524
+ walkSection(doc.documentElement, blocks, null, [], ctx);
476
525
  return blocks;
477
526
  }
478
527
  function extractImageRef(el) {
@@ -493,7 +542,7 @@ function extractImageRef(el) {
493
542
  if (directRef) return directRef;
494
543
  return null;
495
544
  }
496
- function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth = 0) {
545
+ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
497
546
  if (depth > MAX_XML_DEPTH) return;
498
547
  const children = node.childNodes;
499
548
  if (!children) return;
@@ -506,23 +555,12 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
506
555
  case "tbl": {
507
556
  if (tableCtx) tableStack.push(tableCtx);
508
557
  const newTable = { rows: [], currentRow: [], cell: null };
509
- walkSection(el, blocks, newTable, tableStack, styleMap, warnings, sectionNum, depth + 1);
558
+ walkSection(el, blocks, newTable, tableStack, ctx, depth + 1);
510
559
  if (newTable.rows.length > 0) {
511
560
  if (tableStack.length > 0) {
512
- const parentTable = tableStack.pop();
513
- let nestedCols = 0;
514
- for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
515
- if (newTable.rows.length >= 3 && nestedCols >= 2) {
516
- blocks.push({ type: "table", table: _chunkOJ4QR33Vcjs.buildTable.call(void 0, newTable.rows), pageNumber: sectionNum });
517
- } else {
518
- const nestedText = _chunkOJ4QR33Vcjs.convertTableToText.call(void 0, newTable.rows);
519
- if (parentTable.cell) {
520
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
521
- }
522
- }
523
- tableCtx = parentTable;
561
+ tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
524
562
  } else {
525
- blocks.push({ type: "table", table: _chunkOJ4QR33Vcjs.buildTable.call(void 0, newTable.rows), pageNumber: sectionNum });
563
+ blocks.push({ type: "table", table: _chunkZNJPRRIAcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
526
564
  tableCtx = null;
527
565
  }
528
566
  } else {
@@ -533,7 +571,7 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
533
571
  case "tr":
534
572
  if (tableCtx) {
535
573
  tableCtx.currentRow = [];
536
- walkSection(el, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth + 1);
574
+ walkSection(el, blocks, tableCtx, tableStack, ctx, depth + 1);
537
575
  if (tableCtx.currentRow.length > 0) tableCtx.rows.push(tableCtx.currentRow);
538
576
  tableCtx.currentRow = [];
539
577
  }
@@ -541,7 +579,7 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
541
579
  case "tc":
542
580
  if (tableCtx) {
543
581
  tableCtx.cell = { text: "", colSpan: 1, rowSpan: 1 };
544
- walkSection(el, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth + 1);
582
+ walkSection(el, blocks, tableCtx, tableStack, ctx, depth + 1);
545
583
  if (tableCtx.cell) {
546
584
  tableCtx.currentRow.push(tableCtx.cell);
547
585
  tableCtx.cell = null;
@@ -562,24 +600,24 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
562
600
  const cs = isNaN(rawCs) ? 1 : rawCs;
563
601
  const rawRs = parseInt(el.getAttribute("rowSpan") || "1", 10);
564
602
  const rs = isNaN(rawRs) ? 1 : rawRs;
565
- tableCtx.cell.colSpan = clampSpan(cs, _chunkOJ4QR33Vcjs.MAX_COLS);
566
- tableCtx.cell.rowSpan = clampSpan(rs, _chunkOJ4QR33Vcjs.MAX_ROWS);
603
+ tableCtx.cell.colSpan = clampSpan(cs, _chunkZNJPRRIAcjs.MAX_COLS);
604
+ tableCtx.cell.rowSpan = clampSpan(rs, _chunkZNJPRRIAcjs.MAX_ROWS);
567
605
  }
568
606
  break;
569
607
  case "p": {
570
- const { text, href, footnote, style } = extractParagraphInfo(el, styleMap);
608
+ const { text, href, footnote, style } = extractParagraphInfo(el, ctx.styleMap);
571
609
  if (text) {
572
610
  if (_optionalChain([tableCtx, 'optionalAccess', _19 => _19.cell])) {
573
611
  tableCtx.cell.text += (tableCtx.cell.text ? "\n" : "") + text;
574
612
  } else if (!tableCtx) {
575
- const block = { type: "paragraph", text, pageNumber: sectionNum };
613
+ const block = { type: "paragraph", text, pageNumber: ctx.sectionNum };
576
614
  if (style) block.style = style;
577
615
  if (href) block.href = href;
578
616
  if (footnote) block.footnoteText = footnote;
579
617
  blocks.push(block);
580
618
  }
581
619
  }
582
- tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth + 1);
620
+ tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, ctx, depth + 1);
583
621
  break;
584
622
  }
585
623
  // 이미지/그림 — 경로 추출 또는 경고
@@ -588,19 +626,19 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
588
626
  case "drawingObject": {
589
627
  const imgRef = extractImageRef(el);
590
628
  if (imgRef) {
591
- blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
592
- } else if (warnings && sectionNum) {
593
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
629
+ blocks.push({ type: "image", text: imgRef, pageNumber: ctx.sectionNum });
630
+ } else if (ctx.warnings && ctx.sectionNum) {
631
+ ctx.warnings.push({ page: ctx.sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
594
632
  }
595
633
  break;
596
634
  }
597
635
  default:
598
- walkSection(el, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth + 1);
636
+ walkSection(el, blocks, tableCtx, tableStack, ctx, depth + 1);
599
637
  break;
600
638
  }
601
639
  }
602
640
  }
603
- function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth = 0) {
641
+ function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
604
642
  if (depth > MAX_XML_DEPTH) return tableCtx;
605
643
  const children = node.childNodes;
606
644
  if (!children) return tableCtx;
@@ -616,23 +654,12 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
616
654
  if (localTag === "tbl") {
617
655
  if (tableCtx) tableStack.push(tableCtx);
618
656
  const newTable = { rows: [], currentRow: [], cell: null };
619
- walkSection(el, blocks, newTable, tableStack, styleMap, warnings, sectionNum, d + 1);
657
+ walkSection(el, blocks, newTable, tableStack, ctx, d + 1);
620
658
  if (newTable.rows.length > 0) {
621
659
  if (tableStack.length > 0) {
622
- const parentTable = tableStack.pop();
623
- let nestedCols = 0;
624
- for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
625
- if (newTable.rows.length >= 3 && nestedCols >= 2) {
626
- blocks.push({ type: "table", table: _chunkOJ4QR33Vcjs.buildTable.call(void 0, newTable.rows), pageNumber: sectionNum });
627
- } else {
628
- const nestedText = _chunkOJ4QR33Vcjs.convertTableToText.call(void 0, newTable.rows);
629
- if (parentTable.cell) {
630
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
631
- }
632
- }
633
- tableCtx = parentTable;
660
+ tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
634
661
  } else {
635
- blocks.push({ type: "table", table: _chunkOJ4QR33Vcjs.buildTable.call(void 0, newTable.rows), pageNumber: sectionNum });
662
+ blocks.push({ type: "table", table: _chunkZNJPRRIAcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
636
663
  tableCtx = null;
637
664
  }
638
665
  } else {
@@ -641,21 +668,21 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
641
668
  } else if (localTag === "pic" || localTag === "shape" || localTag === "drawingObject") {
642
669
  const drawTextChild = findDescendant(el, "drawText");
643
670
  if (drawTextChild) {
644
- extractDrawTextBlocks(drawTextChild, blocks, styleMap, sectionNum);
671
+ extractDrawTextBlocks(drawTextChild, blocks, ctx.styleMap, ctx.sectionNum);
645
672
  } else {
646
673
  const imgRef = extractImageRef(el);
647
674
  if (imgRef) {
648
- blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
649
- } else if (warnings && sectionNum) {
650
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
675
+ blocks.push({ type: "image", text: imgRef, pageNumber: ctx.sectionNum });
676
+ } else if (ctx.warnings && ctx.sectionNum) {
677
+ ctx.warnings.push({ page: ctx.sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
651
678
  }
652
679
  }
653
680
  } else if (localTag === "drawText") {
654
- extractDrawTextBlocks(el, blocks, styleMap, sectionNum);
681
+ extractDrawTextBlocks(el, blocks, ctx.styleMap, ctx.sectionNum);
655
682
  } else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
656
683
  walkChildren(el, d + 1);
657
684
  } else if (localTag === "run") {
658
- tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, styleMap, warnings, sectionNum, depth + 1);
685
+ tableCtx = walkParagraphChildren(el, blocks, tableCtx, tableStack, ctx, depth + 1);
659
686
  }
660
687
  }
661
688
  };
@@ -740,7 +767,7 @@ function extractParagraphInfo(para, styleMap) {
740
767
  case "hyperlink": {
741
768
  const url = child.getAttribute("url") || child.getAttribute("href") || "";
742
769
  if (url) {
743
- const safe = _chunkOJ4QR33Vcjs.sanitizeHref.call(void 0, url);
770
+ const safe = _chunkZNJPRRIAcjs.sanitizeHref.call(void 0, url);
744
771
  if (safe) href = safe;
745
772
  }
746
773
  walk(child);
@@ -880,7 +907,7 @@ function decompressStream(data) {
880
907
  return _zlib.inflateRawSync.call(void 0, data, opts);
881
908
  }
882
909
  function parseFileHeader(data) {
883
- if (data.length < 40) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("FileHeader\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 40\uBC14\uC774\uD2B8)");
910
+ if (data.length < 40) throw new (0, _chunkZNJPRRIAcjs.KordocError)("FileHeader\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 40\uBC14\uC774\uD2B8)");
884
911
  const sig = data.subarray(0, 32).toString("utf8").replace(/\0+$/, "");
885
912
  return {
886
913
  signature: sig,
@@ -1899,7 +1926,7 @@ function parseHwp5Document(buffer, options) {
1899
1926
  lenientCfb = parseLenientCfb(buffer);
1900
1927
  warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
1901
1928
  } catch (e11) {
1902
- throw new (0, _chunkOJ4QR33Vcjs.KordocError)("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
1929
+ throw new (0, _chunkZNJPRRIAcjs.KordocError)("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
1903
1930
  }
1904
1931
  }
1905
1932
  const findStream = (path) => {
@@ -1910,11 +1937,11 @@ function parseHwp5Document(buffer, options) {
1910
1937
  return lenientCfb.findStream(path);
1911
1938
  };
1912
1939
  const headerData = findStream("/FileHeader");
1913
- if (!headerData) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
1940
+ if (!headerData) throw new (0, _chunkZNJPRRIAcjs.KordocError)("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
1914
1941
  const header = parseFileHeader(headerData);
1915
- if (header.signature !== "HWP Document File") throw new (0, _chunkOJ4QR33Vcjs.KordocError)("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
1916
- if (header.flags & FLAG_ENCRYPTED) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1917
- if (header.flags & FLAG_DRM) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1942
+ if (header.signature !== "HWP Document File") throw new (0, _chunkZNJPRRIAcjs.KordocError)("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
1943
+ if (header.flags & FLAG_ENCRYPTED) throw new (0, _chunkZNJPRRIAcjs.KordocError)("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1944
+ if (header.flags & FLAG_DRM) throw new (0, _chunkZNJPRRIAcjs.KordocError)("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1918
1945
  const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
1919
1946
  const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
1920
1947
  const metadata = {
@@ -1923,11 +1950,12 @@ function parseHwp5Document(buffer, options) {
1923
1950
  if (cfb) extractHwp5Metadata(cfb, metadata);
1924
1951
  const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
1925
1952
  const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
1926
- if (sections.length === 0) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
1953
+ if (sections.length === 0) throw new (0, _chunkZNJPRRIAcjs.KordocError)("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
1927
1954
  metadata.pageCount = sections.length;
1928
1955
  const pageFilter = _optionalChain([options, 'optionalAccess', _21 => _21.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sections.length) : null;
1929
1956
  const totalTarget = pageFilter ? pageFilter.size : sections.length;
1930
1957
  const blocks = [];
1958
+ const nestedTableCounter = { count: 0 };
1931
1959
  let totalDecompressed = 0;
1932
1960
  let parsedSections = 0;
1933
1961
  for (let si = 0; si < sections.length; si++) {
@@ -1936,24 +1964,24 @@ function parseHwp5Document(buffer, options) {
1936
1964
  const sectionData = sections[si];
1937
1965
  const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
1938
1966
  totalDecompressed += data.length;
1939
- if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
1967
+ if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkZNJPRRIAcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
1940
1968
  const records = readRecords(data);
1941
- const sectionBlocks = parseSection(records, docInfo, warnings, si + 1);
1969
+ const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, nestedTableCounter);
1942
1970
  blocks.push(...sectionBlocks);
1943
1971
  parsedSections++;
1944
1972
  _optionalChain([options, 'optionalAccess', _22 => _22.onProgress, 'optionalCall', _23 => _23(parsedSections, totalTarget)]);
1945
1973
  } catch (secErr) {
1946
- if (secErr instanceof _chunkOJ4QR33Vcjs.KordocError) throw secErr;
1974
+ if (secErr instanceof _chunkZNJPRRIAcjs.KordocError) throw secErr;
1947
1975
  warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
1948
1976
  }
1949
1977
  }
1950
1978
  const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
1951
- const flatBlocks = _chunkOJ4QR33Vcjs.flattenLayoutTables.call(void 0, blocks);
1979
+ const flatBlocks = _chunkZNJPRRIAcjs.flattenLayoutTables.call(void 0, blocks);
1952
1980
  if (docInfo) {
1953
1981
  detectHwp5Headings(flatBlocks, docInfo);
1954
1982
  }
1955
1983
  const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
1956
- const markdown = _chunkOJ4QR33Vcjs.blocksToMarkdown.call(void 0, flatBlocks);
1984
+ const markdown = _chunkZNJPRRIAcjs.blocksToMarkdown.call(void 0, flatBlocks);
1957
1985
  return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
1958
1986
  }
1959
1987
  function parseDocInfoStream(cfb, compressed) {
@@ -2013,9 +2041,9 @@ function detectHwp5Headings(blocks, docInfo) {
2013
2041
  let level = 0;
2014
2042
  if (_optionalChain([block, 'access', _28 => _28.style, 'optionalAccess', _29 => _29.fontSize]) && baseFontSize > 0) {
2015
2043
  const ratio = block.style.fontSize / baseFontSize;
2016
- if (ratio >= _chunkOJ4QR33Vcjs.HEADING_RATIO_H1) level = 1;
2017
- else if (ratio >= _chunkOJ4QR33Vcjs.HEADING_RATIO_H2) level = 2;
2018
- else if (ratio >= _chunkOJ4QR33Vcjs.HEADING_RATIO_H3) level = 3;
2044
+ if (ratio >= _chunkZNJPRRIAcjs.HEADING_RATIO_H1) level = 1;
2045
+ else if (ratio >= _chunkZNJPRRIAcjs.HEADING_RATIO_H2) level = 2;
2046
+ else if (ratio >= _chunkZNJPRRIAcjs.HEADING_RATIO_H3) level = 3;
2019
2047
  }
2020
2048
  if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
2021
2049
  if (level === 0) level = 2;
@@ -2100,7 +2128,7 @@ function findSectionsLenient(lcfb, compressed) {
2100
2128
  if (!raw) break;
2101
2129
  const content = compressed ? decompressStream(raw) : raw;
2102
2130
  totalDecompressed += content.length;
2103
- if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
2131
+ if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkZNJPRRIAcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
2104
2132
  sections.push({ idx: i, content });
2105
2133
  }
2106
2134
  if (sections.length === 0) {
@@ -2112,7 +2140,7 @@ function findSectionsLenient(lcfb, compressed) {
2112
2140
  if (raw) {
2113
2141
  const content = compressed ? decompressStream(raw) : raw;
2114
2142
  totalDecompressed += content.length;
2115
- if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
2143
+ if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkZNJPRRIAcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
2116
2144
  sections.push({ idx, content });
2117
2145
  }
2118
2146
  }
@@ -2129,7 +2157,7 @@ function findViewTextSectionsLenient(lcfb, compressed) {
2129
2157
  try {
2130
2158
  const content = decryptViewText(raw, compressed);
2131
2159
  totalDecompressed += content.length;
2132
- if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkOJ4QR33Vcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
2160
+ if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkZNJPRRIAcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
2133
2161
  sections.push({ idx: i, content });
2134
2162
  } catch (e16) {
2135
2163
  break;
@@ -2258,13 +2286,13 @@ function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
2258
2286
  }
2259
2287
  return images;
2260
2288
  }
2261
- function parseSection(records, docInfo, warnings, sectionNum) {
2289
+ function parseSection(records, docInfo, warnings, sectionNum, counter) {
2262
2290
  const blocks = [];
2263
2291
  let i = 0;
2264
2292
  while (i < records.length) {
2265
2293
  const rec = records[i];
2266
2294
  if (rec.tagId === TAG_PARA_HEADER && rec.level === 0) {
2267
- const { paragraph, tables, nextIdx, charShapeIds, paraShapeId } = parseParagraphWithTables(records, i);
2295
+ const { paragraph, tables, nextIdx, charShapeIds, paraShapeId } = parseParagraphWithTables(records, i, counter);
2268
2296
  if (paragraph) {
2269
2297
  const block = { type: "paragraph", text: paragraph, pageNumber: sectionNum };
2270
2298
  if (docInfo && charShapeIds.length > 0) {
@@ -2287,7 +2315,7 @@ function parseSection(records, docInfo, warnings, sectionNum) {
2287
2315
  if (rec.tagId === TAG_CTRL_HEADER && rec.level <= 1 && rec.data.length >= 4) {
2288
2316
  const ctrlId = rec.data.subarray(0, 4).toString("ascii");
2289
2317
  if (ctrlId === " lbt" || ctrlId === "tbl ") {
2290
- const { table, nextIdx } = parseTableBlock(records, i);
2318
+ const { table, nextIdx } = parseTableBlock(records, i, counter);
2291
2319
  if (table) blocks.push({ type: "table", table, pageNumber: sectionNum });
2292
2320
  i = nextIdx;
2293
2321
  continue;
@@ -2317,7 +2345,7 @@ function parseSection(records, docInfo, warnings, sectionNum) {
2317
2345
  if (url && blocks.length > 0) {
2318
2346
  const lastBlock = blocks[blocks.length - 1];
2319
2347
  if (lastBlock.type === "paragraph" && !lastBlock.href) {
2320
- lastBlock.href = _nullishCoalesce(_chunkOJ4QR33Vcjs.sanitizeHref.call(void 0, url), () => ( void 0));
2348
+ lastBlock.href = _nullishCoalesce(_chunkZNJPRRIAcjs.sanitizeHref.call(void 0, url), () => ( void 0));
2321
2349
  }
2322
2350
  }
2323
2351
  }
@@ -2392,7 +2420,7 @@ function resolveCharStyle(charShapeIds, docInfo) {
2392
2420
  if (cs.attrFlags & 2) style.bold = true;
2393
2421
  return style.fontSize || style.bold || style.italic ? style : void 0;
2394
2422
  }
2395
- function parseParagraphWithTables(records, startIdx) {
2423
+ function parseParagraphWithTables(records, startIdx, counter) {
2396
2424
  const startLevel = records[startIdx].level;
2397
2425
  let text = "";
2398
2426
  const tables = [];
@@ -2414,7 +2442,7 @@ function parseParagraphWithTables(records, startIdx) {
2414
2442
  if (rec.tagId === TAG_CTRL_HEADER && rec.data.length >= 4) {
2415
2443
  const ctrlId = rec.data.subarray(0, 4).toString("ascii");
2416
2444
  if (ctrlId === " lbt" || ctrlId === "tbl ") {
2417
- const { table, nextIdx } = parseTableBlock(records, i);
2445
+ const { table, nextIdx } = parseTableBlock(records, i, counter);
2418
2446
  if (table) tables.push(table);
2419
2447
  i = nextIdx;
2420
2448
  continue;
@@ -2425,7 +2453,7 @@ function parseParagraphWithTables(records, startIdx) {
2425
2453
  const trimmed = text.trim();
2426
2454
  return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds, paraShapeId };
2427
2455
  }
2428
- function parseTableBlock(records, startIdx) {
2456
+ function parseTableBlock(records, startIdx, counter) {
2429
2457
  const tableLevel = records[startIdx].level;
2430
2458
  let i = startIdx + 1;
2431
2459
  let rows = 0, cols = 0;
@@ -2435,11 +2463,11 @@ function parseTableBlock(records, startIdx) {
2435
2463
  if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
2436
2464
  if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
2437
2465
  if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
2438
- rows = Math.min(rec.data.readUInt16LE(4), _chunkOJ4QR33Vcjs.MAX_ROWS);
2439
- cols = Math.min(rec.data.readUInt16LE(6), _chunkOJ4QR33Vcjs.MAX_COLS);
2466
+ rows = Math.min(rec.data.readUInt16LE(4), _chunkZNJPRRIAcjs.MAX_ROWS);
2467
+ cols = Math.min(rec.data.readUInt16LE(6), _chunkZNJPRRIAcjs.MAX_COLS);
2440
2468
  }
2441
2469
  if (rec.tagId === TAG_LIST_HEADER) {
2442
- const { cell, nextIdx } = parseCellBlock(records, i, tableLevel);
2470
+ const { cell, nextIdx } = parseCellBlock(records, i, tableLevel, counter);
2443
2471
  if (cell) cells.push(cell);
2444
2472
  i = nextIdx;
2445
2473
  continue;
@@ -2458,9 +2486,9 @@ function parseTableBlock(records, startIdx) {
2458
2486
  return { table: { rows, cols, cells: irCells, hasHeader: rows > 1 }, nextIdx: i };
2459
2487
  }
2460
2488
  const cellRows = arrangeCells(rows, cols, cells);
2461
- return { table: _chunkOJ4QR33Vcjs.buildTable.call(void 0, cellRows), nextIdx: i };
2489
+ return { table: _chunkZNJPRRIAcjs.buildTable.call(void 0, cellRows), nextIdx: i };
2462
2490
  }
2463
- function parseCellBlock(records, startIdx, tableLevel) {
2491
+ function parseCellBlock(records, startIdx, tableLevel, counter) {
2464
2492
  const rec = records[startIdx];
2465
2493
  const cellLevel = rec.level;
2466
2494
  const texts = [];
@@ -2473,8 +2501,8 @@ function parseCellBlock(records, startIdx, tableLevel) {
2473
2501
  rowAddr = rec.data.readUInt16LE(10);
2474
2502
  const cs = rec.data.readUInt16LE(12);
2475
2503
  const rs = rec.data.readUInt16LE(14);
2476
- if (cs > 0) colSpan = Math.min(cs, _chunkOJ4QR33Vcjs.MAX_COLS);
2477
- if (rs > 0) rowSpan = Math.min(rs, _chunkOJ4QR33Vcjs.MAX_ROWS);
2504
+ if (cs > 0) colSpan = Math.min(cs, _chunkZNJPRRIAcjs.MAX_COLS);
2505
+ if (rs > 0) rowSpan = Math.min(rs, _chunkZNJPRRIAcjs.MAX_ROWS);
2478
2506
  }
2479
2507
  let i = startIdx + 1;
2480
2508
  while (i < records.length) {
@@ -2485,6 +2513,17 @@ function parseCellBlock(records, startIdx, tableLevel) {
2485
2513
  const t = extractText(r.data).trim();
2486
2514
  if (t) texts.push(t);
2487
2515
  }
2516
+ if (r.tagId === TAG_CTRL_HEADER && r.data.length >= 4) {
2517
+ const ctrlId = r.data.subarray(0, 4).toString("ascii");
2518
+ if (ctrlId === " lbt" || ctrlId === "tbl ") {
2519
+ if (counter) {
2520
+ counter.count++;
2521
+ texts.push(`[\uC911\uCCA9 \uD14C\uC774\uBE14 #${counter.count}]`);
2522
+ } else {
2523
+ texts.push("[\uC911\uCCA9 \uD14C\uC774\uBE14]");
2524
+ }
2525
+ }
2526
+ }
2488
2527
  i++;
2489
2528
  }
2490
2529
  return { cell: { text: texts.join("\n"), colSpan, rowSpan, colAddr, rowAddr }, nextIdx: i };
@@ -2565,7 +2604,7 @@ function getTextContent(el) {
2565
2604
  return _nullishCoalesce(_optionalChain([el, 'access', _37 => _37.textContent, 'optionalAccess', _38 => _38.trim, 'call', _39 => _39()]), () => ( ""));
2566
2605
  }
2567
2606
  function parseXml(text) {
2568
- return new (0, _xmldom.DOMParser)().parseFromString(_chunkOJ4QR33Vcjs.stripDtd.call(void 0, text), "text/xml");
2607
+ return new (0, _xmldom.DOMParser)().parseFromString(_chunkZNJPRRIAcjs.stripDtd.call(void 0, text), "text/xml");
2569
2608
  }
2570
2609
  function parseSharedStrings(xml) {
2571
2610
  const doc = parseXml(xml);
@@ -2709,7 +2748,7 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
2709
2748
  cellRows.push(row);
2710
2749
  }
2711
2750
  if (cellRows.length > 0) {
2712
- const table = _chunkOJ4QR33Vcjs.buildTable.call(void 0, cellRows);
2751
+ const table = _chunkZNJPRRIAcjs.buildTable.call(void 0, cellRows);
2713
2752
  if (table.rows > 0) {
2714
2753
  blocks.push({ type: "table", table, pageNumber: sheetIndex + 1 });
2715
2754
  }
@@ -2717,12 +2756,12 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
2717
2756
  return blocks;
2718
2757
  }
2719
2758
  async function parseXlsxDocument(buffer, options) {
2720
- _chunkOJ4QR33Vcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE3);
2759
+ _chunkZNJPRRIAcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE3);
2721
2760
  const zip = await _jszip2.default.loadAsync(buffer);
2722
2761
  const warnings = [];
2723
2762
  const workbookFile = zip.file("xl/workbook.xml");
2724
2763
  if (!workbookFile) {
2725
- throw new (0, _chunkOJ4QR33Vcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 XLSX \uD30C\uC77C: xl/workbook.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
2764
+ throw new (0, _chunkZNJPRRIAcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 XLSX \uD30C\uC77C: xl/workbook.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
2726
2765
  }
2727
2766
  let sharedStrings = [];
2728
2767
  const ssFile = zip.file("xl/sharedStrings.xml");
@@ -2731,7 +2770,7 @@ async function parseXlsxDocument(buffer, options) {
2731
2770
  }
2732
2771
  const sheets = parseWorkbook(await workbookFile.async("text"));
2733
2772
  if (sheets.length === 0) {
2734
- throw new (0, _chunkOJ4QR33Vcjs.KordocError)("XLSX \uD30C\uC77C\uC5D0 \uC2DC\uD2B8\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
2773
+ throw new (0, _chunkZNJPRRIAcjs.KordocError)("XLSX \uD30C\uC77C\uC5D0 \uC2DC\uD2B8\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
2735
2774
  }
2736
2775
  let relsMap = /* @__PURE__ */ new Map();
2737
2776
  const relsFile = zip.file("xl/_rels/workbook.xml.rels");
@@ -2803,7 +2842,7 @@ async function parseXlsxDocument(buffer, options) {
2803
2842
  } catch (e20) {
2804
2843
  }
2805
2844
  }
2806
- const markdown = _chunkOJ4QR33Vcjs.blocksToMarkdown.call(void 0, blocks);
2845
+ const markdown = _chunkZNJPRRIAcjs.blocksToMarkdown.call(void 0, blocks);
2807
2846
  return { markdown, blocks, metadata, warnings: warnings.length > 0 ? warnings : void 0 };
2808
2847
  }
2809
2848
 
@@ -2811,21 +2850,21 @@ async function parseXlsxDocument(buffer, options) {
2811
2850
 
2812
2851
 
2813
2852
  var MAX_DECOMPRESS_SIZE4 = 100 * 1024 * 1024;
2814
- function getChildElements(parent, localName2) {
2853
+ function getChildElements(parent, localName3) {
2815
2854
  const result = [];
2816
2855
  const children = parent.childNodes;
2817
2856
  for (let i = 0; i < children.length; i++) {
2818
2857
  const node = children[i];
2819
2858
  if (node.nodeType === 1) {
2820
2859
  const el = node;
2821
- if (el.localName === localName2 || _optionalChain([el, 'access', _45 => _45.tagName, 'optionalAccess', _46 => _46.endsWith, 'call', _47 => _47(`:${localName2}`)])) {
2860
+ if (el.localName === localName3 || _optionalChain([el, 'access', _45 => _45.tagName, 'optionalAccess', _46 => _46.endsWith, 'call', _47 => _47(`:${localName3}`)])) {
2822
2861
  result.push(el);
2823
2862
  }
2824
2863
  }
2825
2864
  }
2826
2865
  return result;
2827
2866
  }
2828
- function findElements(parent, localName2) {
2867
+ function findElements(parent, localName3) {
2829
2868
  const result = [];
2830
2869
  const walk = (node) => {
2831
2870
  const children = node.childNodes;
@@ -2833,7 +2872,7 @@ function findElements(parent, localName2) {
2833
2872
  const child = children[i];
2834
2873
  if (child.nodeType === 1) {
2835
2874
  const el = child;
2836
- if (el.localName === localName2 || _optionalChain([el, 'access', _48 => _48.tagName, 'optionalAccess', _49 => _49.endsWith, 'call', _50 => _50(`:${localName2}`)])) {
2875
+ if (el.localName === localName3 || _optionalChain([el, 'access', _48 => _48.tagName, 'optionalAccess', _49 => _49.endsWith, 'call', _50 => _50(`:${localName3}`)])) {
2837
2876
  result.push(el);
2838
2877
  }
2839
2878
  walk(el);
@@ -2843,16 +2882,16 @@ function findElements(parent, localName2) {
2843
2882
  walk(parent);
2844
2883
  return result;
2845
2884
  }
2846
- function getAttr(el, localName2) {
2885
+ function getAttr(el, localName3) {
2847
2886
  const attrs = el.attributes;
2848
2887
  for (let i = 0; i < attrs.length; i++) {
2849
2888
  const attr = attrs[i];
2850
- if (attr.localName === localName2 || attr.name === localName2) return attr.value;
2889
+ if (attr.localName === localName3 || attr.name === localName3) return attr.value;
2851
2890
  }
2852
2891
  return null;
2853
2892
  }
2854
2893
  function parseXml2(text) {
2855
- return new (0, _xmldom.DOMParser)().parseFromString(_chunkOJ4QR33Vcjs.stripDtd.call(void 0, text), "text/xml");
2894
+ return new (0, _xmldom.DOMParser)().parseFromString(_chunkZNJPRRIAcjs.stripDtd.call(void 0, text), "text/xml");
2856
2895
  }
2857
2896
  function parseStyles(xml) {
2858
2897
  const doc = parseXml2(xml);
@@ -3145,12 +3184,12 @@ async function extractImages(zip, rels, doc) {
3145
3184
  return { blocks, images };
3146
3185
  }
3147
3186
  async function parseDocxDocument(buffer, options) {
3148
- _chunkOJ4QR33Vcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE4);
3187
+ _chunkZNJPRRIAcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE4);
3149
3188
  const zip = await _jszip2.default.loadAsync(buffer);
3150
3189
  const warnings = [];
3151
3190
  const docFile = zip.file("word/document.xml");
3152
3191
  if (!docFile) {
3153
- throw new (0, _chunkOJ4QR33Vcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 DOCX \uD30C\uC77C: word/document.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
3192
+ throw new (0, _chunkZNJPRRIAcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 DOCX \uD30C\uC77C: word/document.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
3154
3193
  }
3155
3194
  let rels = /* @__PURE__ */ new Map();
3156
3195
  const relsFile = zip.file("word/_rels/document.xml.rels");
@@ -3185,7 +3224,7 @@ async function parseDocxDocument(buffer, options) {
3185
3224
  const doc = parseXml2(docXml);
3186
3225
  const body = findElements(doc, "body");
3187
3226
  if (body.length === 0) {
3188
- throw new (0, _chunkOJ4QR33Vcjs.KordocError)("DOCX \uBCF8\uBB38(w:body)\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
3227
+ throw new (0, _chunkZNJPRRIAcjs.KordocError)("DOCX \uBCF8\uBB38(w:body)\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
3189
3228
  }
3190
3229
  const blocks = [];
3191
3230
  const bodyEl = body[0];
@@ -3194,11 +3233,11 @@ async function parseDocxDocument(buffer, options) {
3194
3233
  const node = children[i];
3195
3234
  if (node.nodeType !== 1) continue;
3196
3235
  const el = node;
3197
- const localName2 = _nullishCoalesce(el.localName, () => ( _optionalChain([el, 'access', _65 => _65.tagName, 'optionalAccess', _66 => _66.split, 'call', _67 => _67(":"), 'access', _68 => _68.pop, 'call', _69 => _69()])));
3198
- if (localName2 === "p") {
3236
+ const localName3 = _nullishCoalesce(el.localName, () => ( _optionalChain([el, 'access', _65 => _65.tagName, 'optionalAccess', _66 => _66.split, 'call', _67 => _67(":"), 'access', _68 => _68.pop, 'call', _69 => _69()])));
3237
+ if (localName3 === "p") {
3199
3238
  const block = parseParagraph(el, styles, numbering, footnotes, rels);
3200
3239
  if (block) blocks.push(block);
3201
- } else if (localName2 === "tbl") {
3240
+ } else if (localName3 === "tbl") {
3202
3241
  const block = parseTable(el, styles, numbering, footnotes, rels);
3203
3242
  if (block) blocks.push(block);
3204
3243
  }
@@ -3225,7 +3264,7 @@ async function parseDocxDocument(buffer, options) {
3225
3264
  }
3226
3265
  }
3227
3266
  const outline = blocks.filter((b) => b.type === "heading").map((b) => ({ level: _nullishCoalesce(b.level, () => ( 2)), text: _nullishCoalesce(b.text, () => ( "")) }));
3228
- const markdown = _chunkOJ4QR33Vcjs.blocksToMarkdown.call(void 0, blocks);
3267
+ const markdown = _chunkZNJPRRIAcjs.blocksToMarkdown.call(void 0, blocks);
3229
3268
  return {
3230
3269
  markdown,
3231
3270
  blocks,
@@ -3236,6 +3275,259 @@ async function parseDocxDocument(buffer, options) {
3236
3275
  };
3237
3276
  }
3238
3277
 
3278
+ // src/hwpml/parser.ts
3279
+
3280
+ var MAX_XML_DEPTH2 = 200;
3281
+ var MAX_TABLE_ROWS = 5e3;
3282
+ var MAX_TABLE_COLS = 500;
3283
+ var MAX_HWPML_BYTES = 50 * 1024 * 1024;
3284
+ function parseHwpmlDocument(buffer, options) {
3285
+ if (buffer.byteLength > MAX_HWPML_BYTES) {
3286
+ throw new Error(`HWPML \uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC (${(buffer.byteLength / 1024 / 1024).toFixed(1)}MB > 50MB)`);
3287
+ }
3288
+ const text = new TextDecoder("utf-8").decode(buffer).replace(/^\uFEFF/, "");
3289
+ const normalized = text.replace(/&nbsp;/g, "&#160;");
3290
+ const xml = _chunkZNJPRRIAcjs.stripDtd.call(void 0, normalized);
3291
+ const warnings = [];
3292
+ const parser = new (0, _xmldom.DOMParser)({
3293
+ onError: (_level, msg) => {
3294
+ warnings.push({ message: `HWPML XML \uD30C\uC2F1 \uACBD\uACE0: ${msg}`, code: "MALFORMED_XML" });
3295
+ }
3296
+ });
3297
+ const doc = parser.parseFromString(xml, "text/xml");
3298
+ if (!doc.documentElement) {
3299
+ return { markdown: "", blocks: [], warnings };
3300
+ }
3301
+ const root = doc.documentElement;
3302
+ const metadata = {};
3303
+ const docSummary = findChild(root, "DOCSUMMARY");
3304
+ if (docSummary) {
3305
+ const title = findChild(docSummary, "TITLE");
3306
+ const author = findChild(docSummary, "AUTHOR");
3307
+ const date = findChild(docSummary, "DATE");
3308
+ if (title) metadata.title = textContent(title).trim();
3309
+ if (author) metadata.author = textContent(author).trim();
3310
+ if (date) metadata.createdAt = textContent(date).trim() || void 0;
3311
+ }
3312
+ const paraShapeMap = buildParaShapeMap(root);
3313
+ const body = findChild(root, "BODY");
3314
+ if (!body) {
3315
+ return { markdown: "", blocks: [], metadata, warnings };
3316
+ }
3317
+ const blocks = [];
3318
+ const pageFilter = _optionalChain([options, 'optionalAccess', _70 => _70.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, countSections(body)) : null;
3319
+ let sectionIdx = 0;
3320
+ const children = body.childNodes;
3321
+ for (let i = 0; i < children.length; i++) {
3322
+ const el = children[i];
3323
+ if (el.nodeType !== 1) continue;
3324
+ if (localName(el) !== "SECTION") continue;
3325
+ sectionIdx++;
3326
+ if (pageFilter && !pageFilter.has(sectionIdx)) continue;
3327
+ parseSection2(el, blocks, paraShapeMap, sectionIdx, warnings);
3328
+ }
3329
+ const outline = blocks.filter((b) => b.type === "heading" && b.text).map((b) => ({ level: _nullishCoalesce(b.level, () => ( 1)), text: b.text, pageNumber: b.pageNumber }));
3330
+ const markdown = _chunkZNJPRRIAcjs.blocksToMarkdown.call(void 0, blocks);
3331
+ return {
3332
+ markdown,
3333
+ blocks,
3334
+ metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
3335
+ outline: outline.length > 0 ? outline : void 0,
3336
+ warnings: warnings.length > 0 ? warnings : void 0
3337
+ };
3338
+ }
3339
+ function buildParaShapeMap(root) {
3340
+ const map = /* @__PURE__ */ new Map();
3341
+ const head = findChild(root, "HEAD");
3342
+ if (!head) return map;
3343
+ const mappingTable = findChild(head, "MAPPINGTABLE");
3344
+ if (!mappingTable) return map;
3345
+ const paraShapeList = findChild(mappingTable, "PARASHAPELIST");
3346
+ if (!paraShapeList) return map;
3347
+ const children = paraShapeList.childNodes;
3348
+ for (let i = 0; i < children.length; i++) {
3349
+ const el = children[i];
3350
+ if (el.nodeType !== 1 || localName(el) !== "PARASHAPE") continue;
3351
+ const id = _nullishCoalesce(el.getAttribute("Id"), () => ( ""));
3352
+ const headingType = _nullishCoalesce(el.getAttribute("HeadingType"), () => ( "None"));
3353
+ const level = parseInt(_nullishCoalesce(el.getAttribute("Level"), () => ( "0")), 10);
3354
+ let headingLevel = null;
3355
+ if (headingType === "Outline") {
3356
+ const safeLevel = isNaN(level) ? 0 : Math.max(0, level);
3357
+ headingLevel = Math.min(safeLevel + 1, 6);
3358
+ }
3359
+ map.set(id, { headingLevel });
3360
+ }
3361
+ return map;
3362
+ }
3363
+ function parseSection2(section, blocks, paraShapeMap, sectionNum, warnings) {
3364
+ walkContent(section, blocks, paraShapeMap, sectionNum, warnings, false);
3365
+ }
3366
+ function walkContent(node, blocks, paraShapeMap, sectionNum, warnings, inHeaderFooter, depth = 0) {
3367
+ if (depth > MAX_XML_DEPTH2) return;
3368
+ const children = node.childNodes;
3369
+ for (let i = 0; i < children.length; i++) {
3370
+ const el = children[i];
3371
+ if (el.nodeType !== 1) continue;
3372
+ const tag = localName(el);
3373
+ if (tag === "HEADER" || tag === "FOOTER") {
3374
+ continue;
3375
+ }
3376
+ if (tag === "P") {
3377
+ if (!inHeaderFooter) {
3378
+ parseParagraph2(el, blocks, paraShapeMap, sectionNum);
3379
+ }
3380
+ continue;
3381
+ }
3382
+ if (tag === "TABLE") {
3383
+ if (!inHeaderFooter) {
3384
+ parseTable2(el, blocks, paraShapeMap, sectionNum, warnings);
3385
+ }
3386
+ continue;
3387
+ }
3388
+ if (tag === "PARALIST" || tag === "SECTION" || tag === "COLDEF") {
3389
+ walkContent(el, blocks, paraShapeMap, sectionNum, warnings, inHeaderFooter, depth + 1);
3390
+ continue;
3391
+ }
3392
+ walkContent(el, blocks, paraShapeMap, sectionNum, warnings, inHeaderFooter, depth + 1);
3393
+ }
3394
+ }
3395
+ function parseParagraph2(el, blocks, paraShapeMap, sectionNum) {
3396
+ const paraShapeId = _nullishCoalesce(el.getAttribute("ParaShape"), () => ( ""));
3397
+ const shapeInfo = paraShapeMap.get(paraShapeId);
3398
+ const text = extractParagraphText(el);
3399
+ if (!text) return;
3400
+ if (_optionalChain([shapeInfo, 'optionalAccess', _71 => _71.headingLevel]) != null) {
3401
+ blocks.push({ type: "heading", text, level: shapeInfo.headingLevel, pageNumber: sectionNum });
3402
+ } else {
3403
+ blocks.push({ type: "paragraph", text, pageNumber: sectionNum });
3404
+ }
3405
+ }
3406
+ function extractParagraphText(p) {
3407
+ const parts = [];
3408
+ collectCharText(p, parts);
3409
+ return parts.join("").trim();
3410
+ }
3411
+ function collectCharText(node, parts, depth = 0) {
3412
+ if (depth > MAX_XML_DEPTH2) return;
3413
+ const children = node.childNodes;
3414
+ for (let i = 0; i < children.length; i++) {
3415
+ const el = children[i];
3416
+ if (el.nodeType !== 1) continue;
3417
+ const tag = localName(el);
3418
+ if (tag === "CHAR") {
3419
+ const t = textContent(el);
3420
+ if (t) parts.push(t);
3421
+ } else if (tag === "TABLE" || tag === "PICTURE" || tag === "SHAPEOBJECT") {
3422
+ } else if (tag === "AUTONUM") {
3423
+ } else {
3424
+ collectCharText(el, parts, depth + 1);
3425
+ }
3426
+ }
3427
+ }
3428
+ function parseTable2(el, blocks, paraShapeMap, sectionNum, warnings) {
3429
+ const cells = [];
3430
+ const rowCount = parseInt(_nullishCoalesce(el.getAttribute("RowCount"), () => ( "0")), 10);
3431
+ const colCount = parseInt(_nullishCoalesce(el.getAttribute("ColCount"), () => ( "0")), 10);
3432
+ if (isNaN(rowCount) || isNaN(colCount) || rowCount === 0 || colCount === 0) return;
3433
+ if (rowCount > MAX_TABLE_ROWS || colCount > MAX_TABLE_COLS) {
3434
+ warnings.push({ message: `\uD14C\uC774\uBE14 \uD06C\uAE30 \uCD08\uACFC (${rowCount}x${colCount}) \u2014 \uC2A4\uD0B5`, code: "TRUNCATED_TABLE" });
3435
+ return;
3436
+ }
3437
+ const children = el.childNodes;
3438
+ for (let i = 0; i < children.length; i++) {
3439
+ const rowEl = children[i];
3440
+ if (rowEl.nodeType !== 1 || localName(rowEl) !== "ROW") continue;
3441
+ const rowCells = rowEl.childNodes;
3442
+ for (let j = 0; j < rowCells.length; j++) {
3443
+ const cellEl = rowCells[j];
3444
+ if (cellEl.nodeType !== 1 || localName(cellEl) !== "CELL") continue;
3445
+ const colAddr = parseInt(_nullishCoalesce(cellEl.getAttribute("ColAddr"), () => ( "0")), 10);
3446
+ const rowAddr = parseInt(_nullishCoalesce(cellEl.getAttribute("RowAddr"), () => ( "0")), 10);
3447
+ const colSpan = Math.min(Math.max(1, parseInt(_nullishCoalesce(cellEl.getAttribute("ColSpan"), () => ( "1")), 10) || 1), MAX_TABLE_COLS);
3448
+ const rowSpan = Math.min(Math.max(1, parseInt(_nullishCoalesce(cellEl.getAttribute("RowSpan"), () => ( "1")), 10) || 1), MAX_TABLE_ROWS);
3449
+ const cellText = extractCellText(cellEl);
3450
+ cells.push({ text: cellText, colSpan, rowSpan, colAddr, rowAddr });
3451
+ }
3452
+ }
3453
+ if (cells.length === 0) return;
3454
+ const grid = Array.from({ length: rowCount }, () => Array(colCount).fill(null));
3455
+ for (const cell of cells) {
3456
+ const r = _nullishCoalesce(cell.rowAddr, () => ( 0));
3457
+ const c = _nullishCoalesce(cell.colAddr, () => ( 0));
3458
+ if (isNaN(r) || isNaN(c) || r >= rowCount || c >= colCount) continue;
3459
+ grid[r][c] = cell;
3460
+ for (let dr = 0; dr < cell.rowSpan; dr++) {
3461
+ for (let dc = 0; dc < cell.colSpan; dc++) {
3462
+ if (dr === 0 && dc === 0) continue;
3463
+ if (r + dr < rowCount && c + dc < colCount) {
3464
+ grid[r + dr][c + dc] = { text: "", colSpan: 1, rowSpan: 1 };
3465
+ }
3466
+ }
3467
+ }
3468
+ }
3469
+ const cellRows = grid.map(
3470
+ (row) => row.map((cell) => _nullishCoalesce(cell, () => ( { text: "", colSpan: 1, rowSpan: 1 })))
3471
+ );
3472
+ const table = _chunkZNJPRRIAcjs.buildTable.call(void 0, cellRows);
3473
+ blocks.push({ type: "table", table, pageNumber: sectionNum });
3474
+ }
3475
+ function extractCellText(cellEl) {
3476
+ const textParts = [];
3477
+ collectCellText(cellEl, textParts, 0);
3478
+ return textParts.filter(Boolean).join("\n").trim();
3479
+ }
3480
+ function collectCellText(node, parts, depth) {
3481
+ if (depth > 20) return;
3482
+ const children = node.childNodes;
3483
+ for (let i = 0; i < children.length; i++) {
3484
+ const el = children[i];
3485
+ if (el.nodeType !== 1) continue;
3486
+ const tag = localName(el);
3487
+ if (tag === "P") {
3488
+ const t = extractParagraphText(el);
3489
+ if (t) parts.push(t);
3490
+ } else if (tag === "TABLE") {
3491
+ parts.push("[\uC911\uCCA9 \uD14C\uC774\uBE14]");
3492
+ } else {
3493
+ collectCellText(el, parts, depth + 1);
3494
+ }
3495
+ }
3496
+ }
3497
+ function localName(el) {
3498
+ return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
3499
+ }
3500
+ function findChild(parent, tag) {
3501
+ const children = parent.childNodes;
3502
+ for (let i = 0; i < children.length; i++) {
3503
+ const el = children[i];
3504
+ if (el.nodeType === 1 && localName(el) === tag) return el;
3505
+ }
3506
+ return null;
3507
+ }
3508
+ function textContent(el) {
3509
+ const children = el.childNodes;
3510
+ const parts = [];
3511
+ for (let i = 0; i < children.length; i++) {
3512
+ const node = children[i];
3513
+ if (node.nodeType === 3) {
3514
+ parts.push(node.nodeValue || "");
3515
+ } else if (node.nodeType === 1) {
3516
+ parts.push(textContent(node));
3517
+ }
3518
+ }
3519
+ return parts.join("");
3520
+ }
3521
+ function countSections(body) {
3522
+ let count = 0;
3523
+ const children = body.childNodes;
3524
+ for (let i = 0; i < children.length; i++) {
3525
+ const el = children[i];
3526
+ if (el.nodeType === 1 && localName(el) === "SECTION") count++;
3527
+ }
3528
+ return count;
3529
+ }
3530
+
3239
3531
  // src/form/recognize.ts
3240
3532
  var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
3241
3533
  "\uC131\uBA85",
@@ -3469,7 +3761,7 @@ function fillFormFields(blocks, values) {
3469
3761
  if (block.type !== "table" || !block.table) continue;
3470
3762
  for (let r = 0; r < block.table.rows; r++) {
3471
3763
  for (let c = 0; c < block.table.cols; c++) {
3472
- const cell = _optionalChain([block, 'access', _70 => _70.table, 'access', _71 => _71.cells, 'access', _72 => _72[r], 'optionalAccess', _73 => _73[c]]);
3764
+ const cell = _optionalChain([block, 'access', _72 => _72.table, 'access', _73 => _73.cells, 'access', _74 => _74[r], 'optionalAccess', _75 => _75[c]]);
3473
3765
  if (!cell) continue;
3474
3766
  const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
3475
3767
  if (result) {
@@ -3508,7 +3800,7 @@ function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
3508
3800
  const matchKey = findMatchingKey(normalizedCellLabel, values);
3509
3801
  if (matchKey === void 0) continue;
3510
3802
  const newValue = values.get(matchKey);
3511
- if (_optionalChain([patternFilledCells, 'optionalAccess', _74 => _74.has, 'call', _75 => _75(`${r},${c + 1}`)])) {
3803
+ if (_optionalChain([patternFilledCells, 'optionalAccess', _76 => _76.has, 'call', _77 => _77(`${r},${c + 1}`)])) {
3512
3804
  valueCell.text = newValue + " " + valueCell.text;
3513
3805
  } else {
3514
3806
  valueCell.text = newValue;
@@ -3578,7 +3870,7 @@ async function fillHwpx(hwpxBuffer, values) {
3578
3870
  const normalizedValues = normalizeValues(values);
3579
3871
  const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
3580
3872
  if (sectionFiles.length === 0) {
3581
- throw new (0, _chunkOJ4QR33Vcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
3873
+ throw new (0, _chunkZNJPRRIAcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
3582
3874
  }
3583
3875
  const xmlParser = new (0, _xmldom.DOMParser)();
3584
3876
  const xmlSerializer = new (0, _xmldom.XMLSerializer)();
@@ -3586,7 +3878,7 @@ async function fillHwpx(hwpxBuffer, values) {
3586
3878
  const zipEntry = zip.file(sectionPath);
3587
3879
  if (!zipEntry) continue;
3588
3880
  const rawXml = await zipEntry.async("text");
3589
- const doc = xmlParser.parseFromString(_chunkOJ4QR33Vcjs.stripDtd.call(void 0, rawXml), "text/xml");
3881
+ const doc = xmlParser.parseFromString(_chunkZNJPRRIAcjs.stripDtd.call(void 0, rawXml), "text/xml");
3590
3882
  if (!doc.documentElement) continue;
3591
3883
  let modified = false;
3592
3884
  const tables = findAllElements(doc.documentElement, "tbl");
@@ -3612,10 +3904,10 @@ async function fillHwpx(hwpxBuffer, values) {
3612
3904
  const trEl = rows[rowIdx];
3613
3905
  const cells = findDirectChildren(trEl, "tc");
3614
3906
  for (let colIdx = 0; colIdx < cells.length - 1; colIdx++) {
3615
- const labelText = extractCellText(cells[colIdx]);
3907
+ const labelText = extractCellText2(cells[colIdx]);
3616
3908
  if (!isLabelCell(labelText)) continue;
3617
3909
  const valueCell = cells[colIdx + 1];
3618
- const valueText = extractCellText(valueCell);
3910
+ const valueText = extractCellText2(valueCell);
3619
3911
  if (isKeywordLabel(valueText)) continue;
3620
3912
  const normalizedCellLabel = normalizeLabel(labelText);
3621
3913
  if (!normalizedCellLabel) continue;
@@ -3640,14 +3932,14 @@ async function fillHwpx(hwpxBuffer, values) {
3640
3932
  if (rows.length >= 2) {
3641
3933
  const headerCells = findDirectChildren(rows[0], "tc");
3642
3934
  const allLabels = headerCells.every((cell) => {
3643
- const t = extractCellText(cell).trim();
3935
+ const t = extractCellText2(cell).trim();
3644
3936
  return t.length > 0 && t.length <= 20 && isLabelCell(t);
3645
3937
  });
3646
3938
  if (allLabels) {
3647
3939
  for (let rowIdx = 1; rowIdx < rows.length; rowIdx++) {
3648
3940
  const dataCells = findDirectChildren(rows[rowIdx], "tc");
3649
3941
  for (let colIdx = 0; colIdx < Math.min(headerCells.length, dataCells.length); colIdx++) {
3650
- const headerLabel = normalizeLabel(extractCellText(headerCells[colIdx]));
3942
+ const headerLabel = normalizeLabel(extractCellText2(headerCells[colIdx]));
3651
3943
  const matchKey = findMatchingKey(headerLabel, normalizedValues);
3652
3944
  if (matchKey === void 0) continue;
3653
3945
  if (matchedLabels.has(matchKey)) continue;
@@ -3655,7 +3947,7 @@ async function fillHwpx(hwpxBuffer, values) {
3655
3947
  replaceCellText(dataCells[colIdx], newValue);
3656
3948
  matchedLabels.add(matchKey);
3657
3949
  filled.push({
3658
- label: extractCellText(headerCells[colIdx]).trim(),
3950
+ label: extractCellText2(headerCells[colIdx]).trim(),
3659
3951
  value: newValue,
3660
3952
  row: rowIdx,
3661
3953
  col: colIdx
@@ -3697,7 +3989,7 @@ async function fillHwpx(hwpxBuffer, values) {
3697
3989
  const buffer = await zip.generateAsync({ type: "arraybuffer" });
3698
3990
  return { buffer, filled, unmatched };
3699
3991
  }
3700
- function localName(el) {
3992
+ function localName2(el) {
3701
3993
  return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
3702
3994
  }
3703
3995
  function findAllElements(node, tagLocalName) {
@@ -3708,7 +4000,7 @@ function findAllElements(node, tagLocalName) {
3708
4000
  for (let i = 0; i < children.length; i++) {
3709
4001
  const child = children[i];
3710
4002
  if (child.nodeType !== 1) continue;
3711
- if (localName(child) === tagLocalName) result.push(child);
4003
+ if (localName2(child) === tagLocalName) result.push(child);
3712
4004
  walk(child);
3713
4005
  }
3714
4006
  };
@@ -3721,7 +4013,7 @@ function findDirectChildren(parent, tagLocalName) {
3721
4013
  if (!children) return result;
3722
4014
  for (let i = 0; i < children.length; i++) {
3723
4015
  const child = children[i];
3724
- if (child.nodeType === 1 && localName(child) === tagLocalName) {
4016
+ if (child.nodeType === 1 && localName2(child) === tagLocalName) {
3725
4017
  result.push(child);
3726
4018
  }
3727
4019
  }
@@ -3730,12 +4022,12 @@ function findDirectChildren(parent, tagLocalName) {
3730
4022
  function isInsideTable(el) {
3731
4023
  let parent = el.parentNode;
3732
4024
  while (parent) {
3733
- if (parent.nodeType === 1 && localName(parent) === "tbl") return true;
4025
+ if (parent.nodeType === 1 && localName2(parent) === "tbl") return true;
3734
4026
  parent = parent.parentNode;
3735
4027
  }
3736
4028
  return false;
3737
4029
  }
3738
- function extractCellText(tcEl) {
4030
+ function extractCellText2(tcEl) {
3739
4031
  const parts = [];
3740
4032
  const walk = (node) => {
3741
4033
  const children = node.childNodes;
@@ -3745,7 +4037,7 @@ function extractCellText(tcEl) {
3745
4037
  if (child.nodeType === 3) {
3746
4038
  parts.push(child.textContent || "");
3747
4039
  } else if (child.nodeType === 1) {
3748
- const tag = localName(child);
4040
+ const tag = localName2(child);
3749
4041
  if (tag === "t") walk(child);
3750
4042
  else if (tag === "run" || tag === "r" || tag === "p" || tag === "subList") walk(child);
3751
4043
  else if (tag === "tab") parts.push(" ");
@@ -4447,13 +4739,13 @@ async function parse(input, options) {
4447
4739
  if (typeof input === "string") {
4448
4740
  try {
4449
4741
  const buf = await _promises.readFile.call(void 0, input);
4450
- buffer = _chunkOJ4QR33Vcjs.toArrayBuffer.call(void 0, buf);
4742
+ buffer = _chunkZNJPRRIAcjs.toArrayBuffer.call(void 0, buf);
4451
4743
  } catch (err) {
4452
4744
  const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
4453
4745
  return { success: false, fileType: "unknown", error: msg, code: "PARSE_ERROR" };
4454
4746
  }
4455
4747
  } else if (Buffer.isBuffer(input)) {
4456
- buffer = _chunkOJ4QR33Vcjs.toArrayBuffer.call(void 0, input);
4748
+ buffer = _chunkZNJPRRIAcjs.toArrayBuffer.call(void 0, input);
4457
4749
  } else {
4458
4750
  buffer = input;
4459
4751
  }
@@ -4470,6 +4762,8 @@ async function parse(input, options) {
4470
4762
  }
4471
4763
  case "hwp":
4472
4764
  return parseHwp(buffer, options);
4765
+ case "hwpml":
4766
+ return parseHwpml(buffer, options);
4473
4767
  case "pdf":
4474
4768
  return parsePdf(buffer, options);
4475
4769
  default:
@@ -4479,23 +4773,23 @@ async function parse(input, options) {
4479
4773
  async function parseHwpx(buffer, options) {
4480
4774
  try {
4481
4775
  const { markdown, blocks, metadata, outline, warnings, images } = await parseHwpxDocument(buffer, options);
4482
- return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _76 => _76.length]) ? images : void 0 };
4776
+ return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _78 => _78.length]) ? images : void 0 };
4483
4777
  } catch (err) {
4484
- return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkOJ4QR33Vcjs.classifyError.call(void 0, err) };
4778
+ return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZNJPRRIAcjs.classifyError.call(void 0, err) };
4485
4779
  }
4486
4780
  }
4487
4781
  async function parseHwp(buffer, options) {
4488
4782
  try {
4489
4783
  const { markdown, blocks, metadata, outline, warnings, images } = parseHwp5Document(Buffer.from(buffer), options);
4490
- return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _77 => _77.length]) ? images : void 0 };
4784
+ return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _79 => _79.length]) ? images : void 0 };
4491
4785
  } catch (err) {
4492
- return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code: _chunkOJ4QR33Vcjs.classifyError.call(void 0, err) };
4786
+ return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZNJPRRIAcjs.classifyError.call(void 0, err) };
4493
4787
  }
4494
4788
  }
4495
4789
  async function parsePdf(buffer, options) {
4496
4790
  let parsePdfDocument;
4497
4791
  try {
4498
- const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-CYBX5MP4.cjs")));
4792
+ const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-KBQZB3QY.cjs")));
4499
4793
  parsePdfDocument = mod.parsePdfDocument;
4500
4794
  } catch (e26) {
4501
4795
  return {
@@ -4510,7 +4804,7 @@ async function parsePdf(buffer, options) {
4510
4804
  return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
4511
4805
  } catch (err) {
4512
4806
  const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
4513
- return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: _chunkOJ4QR33Vcjs.classifyError.call(void 0, err), isImageBased };
4807
+ return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZNJPRRIAcjs.classifyError.call(void 0, err), isImageBased };
4514
4808
  }
4515
4809
  }
4516
4810
  async function parseXlsx(buffer, options) {
@@ -4518,24 +4812,32 @@ async function parseXlsx(buffer, options) {
4518
4812
  const { markdown, blocks, metadata, warnings } = await parseXlsxDocument(buffer, options);
4519
4813
  return { success: true, fileType: "xlsx", markdown, blocks, metadata, warnings };
4520
4814
  } catch (err) {
4521
- return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkOJ4QR33Vcjs.classifyError.call(void 0, err) };
4815
+ return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZNJPRRIAcjs.classifyError.call(void 0, err) };
4522
4816
  }
4523
4817
  }
4524
4818
  async function parseDocx(buffer, options) {
4525
4819
  try {
4526
4820
  const { markdown, blocks, metadata, outline, warnings, images } = await parseDocxDocument(buffer, options);
4527
- return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _78 => _78.length]) ? images : void 0 };
4821
+ return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _80 => _80.length]) ? images : void 0 };
4528
4822
  } catch (err) {
4529
- return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkOJ4QR33Vcjs.classifyError.call(void 0, err) };
4823
+ return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZNJPRRIAcjs.classifyError.call(void 0, err) };
4824
+ }
4825
+ }
4826
+ async function parseHwpml(buffer, options) {
4827
+ try {
4828
+ const { markdown, blocks, metadata, outline, warnings } = parseHwpmlDocument(buffer, options);
4829
+ return { success: true, fileType: "hwpml", markdown, blocks, metadata, outline, warnings };
4830
+ } catch (err) {
4831
+ return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZNJPRRIAcjs.classifyError.call(void 0, err) };
4530
4832
  }
4531
4833
  }
4532
4834
  async function fillForm(input, values, outputFormat = "markdown") {
4533
4835
  let buffer;
4534
4836
  if (typeof input === "string") {
4535
4837
  const buf = await _promises.readFile.call(void 0, input);
4536
- buffer = _chunkOJ4QR33Vcjs.toArrayBuffer.call(void 0, buf);
4838
+ buffer = _chunkZNJPRRIAcjs.toArrayBuffer.call(void 0, buf);
4537
4839
  } else if (Buffer.isBuffer(input)) {
4538
- buffer = _chunkOJ4QR33Vcjs.toArrayBuffer.call(void 0, input);
4840
+ buffer = _chunkZNJPRRIAcjs.toArrayBuffer.call(void 0, input);
4539
4841
  } else {
4540
4842
  buffer = input;
4541
4843
  }
@@ -4561,7 +4863,7 @@ async function fillForm(input, values, outputFormat = "markdown") {
4561
4863
  throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
4562
4864
  }
4563
4865
  const fill = fillFormFields(parsed.blocks, values);
4564
- const markdown = _chunkOJ4QR33Vcjs.blocksToMarkdown.call(void 0, fill.blocks);
4866
+ const markdown = _chunkZNJPRRIAcjs.blocksToMarkdown.call(void 0, fill.blocks);
4565
4867
  if (outputFormat === "hwpx") {
4566
4868
  const hwpxBuffer = await markdownToHwpx(markdown);
4567
4869
  return { output: hwpxBuffer, format: "hwpx", fill };
@@ -4591,5 +4893,6 @@ async function fillForm(input, values, outputFormat = "markdown") {
4591
4893
 
4592
4894
 
4593
4895
 
4594
- exports.VERSION = _chunkOJ4QR33Vcjs.VERSION; exports.blocksToMarkdown = _chunkOJ4QR33Vcjs.blocksToMarkdown; exports.compare = compare; exports.detectFormat = detectFormat; exports.detectZipFormat = detectZipFormat; exports.diffBlocks = diffBlocks; exports.extractFormFields = extractFormFields; exports.fillForm = fillForm; exports.fillFormFields = fillFormFields; exports.fillHwpx = fillHwpx; exports.isHwpxFile = isHwpxFile; exports.isLabelCell = isLabelCell; exports.isOldHwpFile = isOldHwpFile; exports.isPdfFile = isPdfFile; exports.isZipFile = isZipFile; exports.markdownToHwpx = markdownToHwpx; exports.parse = parse; exports.parseDocx = parseDocx; exports.parseHwp = parseHwp; exports.parseHwpx = parseHwpx; exports.parsePdf = parsePdf; exports.parseXlsx = parseXlsx;
4896
+
4897
+ exports.VERSION = _chunkZNJPRRIAcjs.VERSION; exports.blocksToMarkdown = _chunkZNJPRRIAcjs.blocksToMarkdown; exports.compare = compare; exports.detectFormat = detectFormat; exports.detectZipFormat = detectZipFormat; exports.diffBlocks = diffBlocks; exports.extractFormFields = extractFormFields; exports.fillForm = fillForm; exports.fillFormFields = fillFormFields; exports.fillHwpx = fillHwpx; exports.isHwpxFile = isHwpxFile; exports.isLabelCell = isLabelCell; exports.isOldHwpFile = isOldHwpFile; exports.isPdfFile = isPdfFile; exports.isZipFile = isZipFile; exports.markdownToHwpx = markdownToHwpx; exports.parse = parse; exports.parseDocx = parseDocx; exports.parseHwp = parseHwp; exports.parseHwpml = parseHwpml; exports.parseHwpx = parseHwpx; exports.parsePdf = parsePdf; exports.parseXlsx = parseXlsx;
4595
4898
  //# sourceMappingURL=index.cjs.map