kordoc 1.9.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -181,13 +181,83 @@ var import_jszip2 = __toESM(require("jszip"), 1);
181
181
  var import_zlib = require("zlib");
182
182
  var import_xmldom = require("@xmldom/xmldom");
183
183
 
184
- // src/table/builder.ts
184
+ // src/utils.ts
185
+ var VERSION = true ? "2.0.0" : "0.0.0-dev";
186
+ function toArrayBuffer(buf) {
187
+ if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
188
+ return buf.buffer;
189
+ }
190
+ return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
191
+ }
192
+ var KordocError = class extends Error {
193
+ constructor(message) {
194
+ super(message);
195
+ this.name = "KordocError";
196
+ }
197
+ };
198
+ function isPathTraversal(name) {
199
+ if (name.includes("\0")) return true;
200
+ const normalized = name.replace(/\\/g, "/");
201
+ return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
202
+ }
203
+ function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
204
+ try {
205
+ const data = new DataView(buffer);
206
+ const len = buffer.byteLength;
207
+ let eocdOffset = -1;
208
+ for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
209
+ if (data.getUint32(i, true) === 101010256) {
210
+ eocdOffset = i;
211
+ break;
212
+ }
213
+ }
214
+ if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
215
+ const entryCount = data.getUint16(eocdOffset + 10, true);
216
+ if (entryCount > maxEntries) {
217
+ throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
218
+ }
219
+ const cdSize = data.getUint32(eocdOffset + 12, true);
220
+ const cdOffset = data.getUint32(eocdOffset + 16, true);
221
+ if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
222
+ let totalUncompressed = 0;
223
+ let pos = cdOffset;
224
+ for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
225
+ if (data.getUint32(pos, true) !== 33639248) break;
226
+ totalUncompressed += data.getUint32(pos + 24, true);
227
+ const nameLen = data.getUint16(pos + 28, true);
228
+ const extraLen = data.getUint16(pos + 30, true);
229
+ const commentLen = data.getUint16(pos + 32, true);
230
+ pos += 46 + nameLen + extraLen + commentLen;
231
+ }
232
+ if (totalUncompressed > maxUncompressedSize) {
233
+ throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
234
+ }
235
+ return { totalUncompressed, entryCount };
236
+ } catch (err) {
237
+ if (err instanceof KordocError) throw err;
238
+ return { totalUncompressed: 0, entryCount: 0 };
239
+ }
240
+ }
185
241
  var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
186
242
  function sanitizeHref(href) {
187
243
  const trimmed = href.trim();
188
244
  if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
189
245
  return trimmed;
190
246
  }
247
+ function classifyError(err) {
248
+ if (!(err instanceof Error)) return "PARSE_ERROR";
249
+ const msg = err.message;
250
+ if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
251
+ if (msg.includes("DRM")) return "DRM_PROTECTED";
252
+ if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
253
+ if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
254
+ if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
255
+ if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
256
+ if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
257
+ return "PARSE_ERROR";
258
+ }
259
+
260
+ // src/table/builder.ts
191
261
  var MAX_COLS = 200;
192
262
  var MAX_ROWS = 1e4;
193
263
  function buildTable(rows) {
@@ -438,82 +508,6 @@ var HEADING_RATIO_H1 = 1.5;
438
508
  var HEADING_RATIO_H2 = 1.3;
439
509
  var HEADING_RATIO_H3 = 1.15;
440
510
 
441
- // src/utils.ts
442
- var VERSION = true ? "1.9.0" : "0.0.0-dev";
443
- function toArrayBuffer(buf) {
444
- if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
445
- return buf.buffer;
446
- }
447
- return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
448
- }
449
- var KordocError = class extends Error {
450
- constructor(message) {
451
- super(message);
452
- this.name = "KordocError";
453
- }
454
- };
455
- function isPathTraversal(name) {
456
- if (name.includes("\0")) return true;
457
- const normalized = name.replace(/\\/g, "/");
458
- return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
459
- }
460
- function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
461
- try {
462
- const data = new DataView(buffer);
463
- const len = buffer.byteLength;
464
- let eocdOffset = -1;
465
- for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
466
- if (data.getUint32(i, true) === 101010256) {
467
- eocdOffset = i;
468
- break;
469
- }
470
- }
471
- if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
472
- const entryCount = data.getUint16(eocdOffset + 10, true);
473
- if (entryCount > maxEntries) {
474
- throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
475
- }
476
- const cdSize = data.getUint32(eocdOffset + 12, true);
477
- const cdOffset = data.getUint32(eocdOffset + 16, true);
478
- if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
479
- let totalUncompressed = 0;
480
- let pos = cdOffset;
481
- for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
482
- if (data.getUint32(pos, true) !== 33639248) break;
483
- totalUncompressed += data.getUint32(pos + 24, true);
484
- const nameLen = data.getUint16(pos + 28, true);
485
- const extraLen = data.getUint16(pos + 30, true);
486
- const commentLen = data.getUint16(pos + 32, true);
487
- pos += 46 + nameLen + extraLen + commentLen;
488
- }
489
- if (totalUncompressed > maxUncompressedSize) {
490
- throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
491
- }
492
- return { totalUncompressed, entryCount };
493
- } catch (err) {
494
- if (err instanceof KordocError) throw err;
495
- return { totalUncompressed: 0, entryCount: 0 };
496
- }
497
- }
498
- var SAFE_HREF_RE2 = /^(?:https?:|mailto:|tel:|#)/i;
499
- function sanitizeHref2(href) {
500
- const trimmed = href.trim();
501
- if (!trimmed || !SAFE_HREF_RE2.test(trimmed)) return null;
502
- return trimmed;
503
- }
504
- function classifyError(err) {
505
- if (!(err instanceof Error)) return "PARSE_ERROR";
506
- const msg = err.message;
507
- if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
508
- if (msg.includes("DRM")) return "DRM_PROTECTED";
509
- if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
510
- if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
511
- if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
512
- if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
513
- if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
514
- return "PARSE_ERROR";
515
- }
516
-
517
511
  // src/hwpx/parser.ts
518
512
  init_page_range();
519
513
  var MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024;
@@ -1120,7 +1114,7 @@ function extractParagraphInfo(para, styleMap) {
1120
1114
  case "hyperlink": {
1121
1115
  const url = child.getAttribute("url") || child.getAttribute("href") || "";
1122
1116
  if (url) {
1123
- const safe = sanitizeHref2(url);
1117
+ const safe = sanitizeHref(url);
1124
1118
  if (safe) href = safe;
1125
1119
  }
1126
1120
  walk(child);
@@ -1213,13 +1207,16 @@ var TAG_TABLE = 77;
1213
1207
  var TAG_DOC_CHAR_SHAPE = 55;
1214
1208
  var TAG_DOC_STYLE = 58;
1215
1209
  var CHAR_LINE = 0;
1210
+ var CHAR_SECTION_BREAK = 10;
1216
1211
  var CHAR_PARA = 13;
1217
1212
  var CHAR_TAB = 9;
1218
1213
  var CHAR_HYPHEN = 30;
1219
1214
  var CHAR_NBSP = 31;
1220
1215
  var CHAR_FIXED_NBSP = 24;
1216
+ var CHAR_FIXED_WIDTH = 25;
1221
1217
  var FLAG_COMPRESSED = 1 << 0;
1222
1218
  var FLAG_ENCRYPTED = 1 << 1;
1219
+ var FLAG_DISTRIBUTION = 1 << 2;
1223
1220
  var FLAG_DRM = 1 << 4;
1224
1221
  var MAX_RECORDS = 5e5;
1225
1222
  function readRecords(data) {
@@ -1314,27 +1311,41 @@ function extractText(data) {
1314
1311
  const ch = data.readUInt16LE(i);
1315
1312
  i += 2;
1316
1313
  switch (ch) {
1314
+ // ── char 타입 (2바이트만, 확장 데이터 없음) ──
1317
1315
  case CHAR_LINE:
1318
1316
  result += "\n";
1319
1317
  break;
1320
- case CHAR_PARA:
1321
- break;
1322
- case CHAR_TAB:
1323
- result += " ";
1318
+ case CHAR_SECTION_BREAK:
1319
+ result += "\n";
1324
1320
  if (i + 14 <= data.length) i += 14;
1325
1321
  break;
1322
+ case CHAR_PARA:
1323
+ break;
1324
+ // 문단 끝
1326
1325
  case CHAR_HYPHEN:
1327
1326
  result += "-";
1328
1327
  break;
1329
1328
  case CHAR_NBSP:
1329
+ result += " ";
1330
+ break;
1330
1331
  case CHAR_FIXED_NBSP:
1332
+ result += "\xA0";
1333
+ break;
1334
+ // 진짜 NBSP
1335
+ case CHAR_FIXED_WIDTH:
1331
1336
  result += " ";
1332
1337
  break;
1338
+ // 고정폭 공백
1339
+ // ── inline 타입 (2바이트 + 14바이트 확장) ──
1340
+ case CHAR_TAB:
1341
+ result += " ";
1342
+ if (i + 14 <= data.length) i += 14;
1343
+ break;
1333
1344
  default:
1334
1345
  if (ch >= 1 && ch <= 31) {
1335
- const isExt = ch >= 1 && ch <= 3 || ch >= 10 && ch <= 18 || ch >= 21 && ch <= 23;
1346
+ const isExtended = ch >= 1 && ch <= 3 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= 18 || ch >= 21 && ch <= 23;
1336
1347
  const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
1337
- if ((isExt || isInline) && i + 14 <= data.length) i += 14;
1348
+ if ((isExtended || isInline) && i + 14 <= data.length) i += 14;
1338
1349
  } else if (ch >= 32) {
1339
1350
  if (ch >= 55296 && ch <= 56319 && i + 1 < data.length) {
1340
1351
  const lo = data.readUInt16LE(i);
@@ -1353,6 +1364,886 @@ function extractText(data) {
1353
1364
  return result;
1354
1365
  }
1355
1366
 
1367
+ // src/hwp5/aes.ts
1368
+ var S_BOX = new Uint8Array([
1369
+ 99,
1370
+ 124,
1371
+ 119,
1372
+ 123,
1373
+ 242,
1374
+ 107,
1375
+ 111,
1376
+ 197,
1377
+ 48,
1378
+ 1,
1379
+ 103,
1380
+ 43,
1381
+ 254,
1382
+ 215,
1383
+ 171,
1384
+ 118,
1385
+ 202,
1386
+ 130,
1387
+ 201,
1388
+ 125,
1389
+ 250,
1390
+ 89,
1391
+ 71,
1392
+ 240,
1393
+ 173,
1394
+ 212,
1395
+ 162,
1396
+ 175,
1397
+ 156,
1398
+ 164,
1399
+ 114,
1400
+ 192,
1401
+ 183,
1402
+ 253,
1403
+ 147,
1404
+ 38,
1405
+ 54,
1406
+ 63,
1407
+ 247,
1408
+ 204,
1409
+ 52,
1410
+ 165,
1411
+ 229,
1412
+ 241,
1413
+ 113,
1414
+ 216,
1415
+ 49,
1416
+ 21,
1417
+ 4,
1418
+ 199,
1419
+ 35,
1420
+ 195,
1421
+ 24,
1422
+ 150,
1423
+ 5,
1424
+ 154,
1425
+ 7,
1426
+ 18,
1427
+ 128,
1428
+ 226,
1429
+ 235,
1430
+ 39,
1431
+ 178,
1432
+ 117,
1433
+ 9,
1434
+ 131,
1435
+ 44,
1436
+ 26,
1437
+ 27,
1438
+ 110,
1439
+ 90,
1440
+ 160,
1441
+ 82,
1442
+ 59,
1443
+ 214,
1444
+ 179,
1445
+ 41,
1446
+ 227,
1447
+ 47,
1448
+ 132,
1449
+ 83,
1450
+ 209,
1451
+ 0,
1452
+ 237,
1453
+ 32,
1454
+ 252,
1455
+ 177,
1456
+ 91,
1457
+ 106,
1458
+ 203,
1459
+ 190,
1460
+ 57,
1461
+ 74,
1462
+ 76,
1463
+ 88,
1464
+ 207,
1465
+ 208,
1466
+ 239,
1467
+ 170,
1468
+ 251,
1469
+ 67,
1470
+ 77,
1471
+ 51,
1472
+ 133,
1473
+ 69,
1474
+ 249,
1475
+ 2,
1476
+ 127,
1477
+ 80,
1478
+ 60,
1479
+ 159,
1480
+ 168,
1481
+ 81,
1482
+ 163,
1483
+ 64,
1484
+ 143,
1485
+ 146,
1486
+ 157,
1487
+ 56,
1488
+ 245,
1489
+ 188,
1490
+ 182,
1491
+ 218,
1492
+ 33,
1493
+ 16,
1494
+ 255,
1495
+ 243,
1496
+ 210,
1497
+ 205,
1498
+ 12,
1499
+ 19,
1500
+ 236,
1501
+ 95,
1502
+ 151,
1503
+ 68,
1504
+ 23,
1505
+ 196,
1506
+ 167,
1507
+ 126,
1508
+ 61,
1509
+ 100,
1510
+ 93,
1511
+ 25,
1512
+ 115,
1513
+ 96,
1514
+ 129,
1515
+ 79,
1516
+ 220,
1517
+ 34,
1518
+ 42,
1519
+ 144,
1520
+ 136,
1521
+ 70,
1522
+ 238,
1523
+ 184,
1524
+ 20,
1525
+ 222,
1526
+ 94,
1527
+ 11,
1528
+ 219,
1529
+ 224,
1530
+ 50,
1531
+ 58,
1532
+ 10,
1533
+ 73,
1534
+ 6,
1535
+ 36,
1536
+ 92,
1537
+ 194,
1538
+ 211,
1539
+ 172,
1540
+ 98,
1541
+ 145,
1542
+ 149,
1543
+ 228,
1544
+ 121,
1545
+ 231,
1546
+ 200,
1547
+ 55,
1548
+ 109,
1549
+ 141,
1550
+ 213,
1551
+ 78,
1552
+ 169,
1553
+ 108,
1554
+ 86,
1555
+ 244,
1556
+ 234,
1557
+ 101,
1558
+ 122,
1559
+ 174,
1560
+ 8,
1561
+ 186,
1562
+ 120,
1563
+ 37,
1564
+ 46,
1565
+ 28,
1566
+ 166,
1567
+ 180,
1568
+ 198,
1569
+ 232,
1570
+ 221,
1571
+ 116,
1572
+ 31,
1573
+ 75,
1574
+ 189,
1575
+ 139,
1576
+ 138,
1577
+ 112,
1578
+ 62,
1579
+ 181,
1580
+ 102,
1581
+ 72,
1582
+ 3,
1583
+ 246,
1584
+ 14,
1585
+ 97,
1586
+ 53,
1587
+ 87,
1588
+ 185,
1589
+ 134,
1590
+ 193,
1591
+ 29,
1592
+ 158,
1593
+ 225,
1594
+ 248,
1595
+ 152,
1596
+ 17,
1597
+ 105,
1598
+ 217,
1599
+ 142,
1600
+ 148,
1601
+ 155,
1602
+ 30,
1603
+ 135,
1604
+ 233,
1605
+ 206,
1606
+ 85,
1607
+ 40,
1608
+ 223,
1609
+ 140,
1610
+ 161,
1611
+ 137,
1612
+ 13,
1613
+ 191,
1614
+ 230,
1615
+ 66,
1616
+ 104,
1617
+ 65,
1618
+ 153,
1619
+ 45,
1620
+ 15,
1621
+ 176,
1622
+ 84,
1623
+ 187,
1624
+ 22
1625
+ ]);
1626
+ var INV_S_BOX = new Uint8Array([
1627
+ 82,
1628
+ 9,
1629
+ 106,
1630
+ 213,
1631
+ 48,
1632
+ 54,
1633
+ 165,
1634
+ 56,
1635
+ 191,
1636
+ 64,
1637
+ 163,
1638
+ 158,
1639
+ 129,
1640
+ 243,
1641
+ 215,
1642
+ 251,
1643
+ 124,
1644
+ 227,
1645
+ 57,
1646
+ 130,
1647
+ 155,
1648
+ 47,
1649
+ 255,
1650
+ 135,
1651
+ 52,
1652
+ 142,
1653
+ 67,
1654
+ 68,
1655
+ 196,
1656
+ 222,
1657
+ 233,
1658
+ 203,
1659
+ 84,
1660
+ 123,
1661
+ 148,
1662
+ 50,
1663
+ 166,
1664
+ 194,
1665
+ 35,
1666
+ 61,
1667
+ 238,
1668
+ 76,
1669
+ 149,
1670
+ 11,
1671
+ 66,
1672
+ 250,
1673
+ 195,
1674
+ 78,
1675
+ 8,
1676
+ 46,
1677
+ 161,
1678
+ 102,
1679
+ 40,
1680
+ 217,
1681
+ 36,
1682
+ 178,
1683
+ 118,
1684
+ 91,
1685
+ 162,
1686
+ 73,
1687
+ 109,
1688
+ 139,
1689
+ 209,
1690
+ 37,
1691
+ 114,
1692
+ 248,
1693
+ 246,
1694
+ 100,
1695
+ 134,
1696
+ 104,
1697
+ 152,
1698
+ 22,
1699
+ 212,
1700
+ 164,
1701
+ 92,
1702
+ 204,
1703
+ 93,
1704
+ 101,
1705
+ 182,
1706
+ 146,
1707
+ 108,
1708
+ 112,
1709
+ 72,
1710
+ 80,
1711
+ 253,
1712
+ 237,
1713
+ 185,
1714
+ 218,
1715
+ 94,
1716
+ 21,
1717
+ 70,
1718
+ 87,
1719
+ 167,
1720
+ 141,
1721
+ 157,
1722
+ 132,
1723
+ 144,
1724
+ 216,
1725
+ 171,
1726
+ 0,
1727
+ 140,
1728
+ 188,
1729
+ 211,
1730
+ 10,
1731
+ 247,
1732
+ 228,
1733
+ 88,
1734
+ 5,
1735
+ 184,
1736
+ 179,
1737
+ 69,
1738
+ 6,
1739
+ 208,
1740
+ 44,
1741
+ 30,
1742
+ 143,
1743
+ 202,
1744
+ 63,
1745
+ 15,
1746
+ 2,
1747
+ 193,
1748
+ 175,
1749
+ 189,
1750
+ 3,
1751
+ 1,
1752
+ 19,
1753
+ 138,
1754
+ 107,
1755
+ 58,
1756
+ 145,
1757
+ 17,
1758
+ 65,
1759
+ 79,
1760
+ 103,
1761
+ 220,
1762
+ 234,
1763
+ 151,
1764
+ 242,
1765
+ 207,
1766
+ 206,
1767
+ 240,
1768
+ 180,
1769
+ 230,
1770
+ 115,
1771
+ 150,
1772
+ 172,
1773
+ 116,
1774
+ 34,
1775
+ 231,
1776
+ 173,
1777
+ 53,
1778
+ 133,
1779
+ 226,
1780
+ 249,
1781
+ 55,
1782
+ 232,
1783
+ 28,
1784
+ 117,
1785
+ 223,
1786
+ 110,
1787
+ 71,
1788
+ 241,
1789
+ 26,
1790
+ 113,
1791
+ 29,
1792
+ 41,
1793
+ 197,
1794
+ 137,
1795
+ 111,
1796
+ 183,
1797
+ 98,
1798
+ 14,
1799
+ 170,
1800
+ 24,
1801
+ 190,
1802
+ 27,
1803
+ 252,
1804
+ 86,
1805
+ 62,
1806
+ 75,
1807
+ 198,
1808
+ 210,
1809
+ 121,
1810
+ 32,
1811
+ 154,
1812
+ 219,
1813
+ 192,
1814
+ 254,
1815
+ 120,
1816
+ 205,
1817
+ 90,
1818
+ 244,
1819
+ 31,
1820
+ 221,
1821
+ 168,
1822
+ 51,
1823
+ 136,
1824
+ 7,
1825
+ 199,
1826
+ 49,
1827
+ 177,
1828
+ 18,
1829
+ 16,
1830
+ 89,
1831
+ 39,
1832
+ 128,
1833
+ 236,
1834
+ 95,
1835
+ 96,
1836
+ 81,
1837
+ 127,
1838
+ 169,
1839
+ 25,
1840
+ 181,
1841
+ 74,
1842
+ 13,
1843
+ 45,
1844
+ 229,
1845
+ 122,
1846
+ 159,
1847
+ 147,
1848
+ 201,
1849
+ 156,
1850
+ 239,
1851
+ 160,
1852
+ 224,
1853
+ 59,
1854
+ 77,
1855
+ 174,
1856
+ 42,
1857
+ 245,
1858
+ 176,
1859
+ 200,
1860
+ 235,
1861
+ 187,
1862
+ 60,
1863
+ 131,
1864
+ 83,
1865
+ 153,
1866
+ 97,
1867
+ 23,
1868
+ 43,
1869
+ 4,
1870
+ 126,
1871
+ 186,
1872
+ 119,
1873
+ 214,
1874
+ 38,
1875
+ 225,
1876
+ 105,
1877
+ 20,
1878
+ 99,
1879
+ 85,
1880
+ 33,
1881
+ 12,
1882
+ 125
1883
+ ]);
1884
+ var RCON = new Uint8Array([1, 2, 4, 8, 16, 32, 64, 128, 27, 54]);
1885
+ function gmul(a, b) {
1886
+ let p = 0;
1887
+ for (let i = 0; i < 8; i++) {
1888
+ if (b & 1) p ^= a;
1889
+ const hi = a & 128;
1890
+ a = a << 1 & 255;
1891
+ if (hi) a ^= 27;
1892
+ b >>= 1;
1893
+ }
1894
+ return p;
1895
+ }
1896
+ function expandKey(key) {
1897
+ const w = new Uint32Array(44);
1898
+ for (let i = 0; i < 4; i++) {
1899
+ w[i] = key[4 * i] << 24 | key[4 * i + 1] << 16 | key[4 * i + 2] << 8 | key[4 * i + 3];
1900
+ }
1901
+ for (let i = 4; i < 44; i++) {
1902
+ let temp = w[i - 1];
1903
+ if (i % 4 === 0) {
1904
+ temp = (temp << 8 | temp >>> 24) >>> 0;
1905
+ temp = S_BOX[temp >>> 24 & 255] << 24 | S_BOX[temp >>> 16 & 255] << 16 | S_BOX[temp >>> 8 & 255] << 8 | S_BOX[temp & 255];
1906
+ temp = (temp ^ RCON[i / 4 - 1] << 24) >>> 0;
1907
+ }
1908
+ w[i] = (w[i - 4] ^ temp) >>> 0;
1909
+ }
1910
+ return w;
1911
+ }
1912
+ function decryptBlock(block, roundKeys) {
1913
+ const s = new Uint8Array(16);
1914
+ for (let i = 0; i < 16; i++) s[i] = block[i];
1915
+ addRoundKey(s, roundKeys, 10);
1916
+ for (let round = 9; round >= 1; round--) {
1917
+ invShiftRows(s);
1918
+ invSubBytes(s);
1919
+ addRoundKey(s, roundKeys, round);
1920
+ invMixColumns(s);
1921
+ }
1922
+ invShiftRows(s);
1923
+ invSubBytes(s);
1924
+ addRoundKey(s, roundKeys, 0);
1925
+ return s;
1926
+ }
1927
+ function addRoundKey(s, w, round) {
1928
+ const base = round * 4;
1929
+ for (let c = 0; c < 4; c++) {
1930
+ const k = w[base + c];
1931
+ s[c * 4] ^= k >>> 24 & 255;
1932
+ s[c * 4 + 1] ^= k >>> 16 & 255;
1933
+ s[c * 4 + 2] ^= k >>> 8 & 255;
1934
+ s[c * 4 + 3] ^= k & 255;
1935
+ }
1936
+ }
1937
+ function invSubBytes(s) {
1938
+ for (let i = 0; i < 16; i++) s[i] = INV_S_BOX[s[i]];
1939
+ }
1940
+ function invShiftRows(s) {
1941
+ let t = s[13];
1942
+ s[13] = s[9];
1943
+ s[9] = s[5];
1944
+ s[5] = s[1];
1945
+ s[1] = t;
1946
+ t = s[2];
1947
+ s[2] = s[10];
1948
+ s[10] = t;
1949
+ t = s[6];
1950
+ s[6] = s[14];
1951
+ s[14] = t;
1952
+ t = s[3];
1953
+ s[3] = s[7];
1954
+ s[7] = s[11];
1955
+ s[11] = s[15];
1956
+ s[15] = t;
1957
+ }
1958
+ function invMixColumns(s) {
1959
+ for (let c = 0; c < 4; c++) {
1960
+ const i = c * 4;
1961
+ const a0 = s[i], a1 = s[i + 1], a2 = s[i + 2], a3 = s[i + 3];
1962
+ s[i] = gmul(a0, 14) ^ gmul(a1, 11) ^ gmul(a2, 13) ^ gmul(a3, 9);
1963
+ s[i + 1] = gmul(a0, 9) ^ gmul(a1, 14) ^ gmul(a2, 11) ^ gmul(a3, 13);
1964
+ s[i + 2] = gmul(a0, 13) ^ gmul(a1, 9) ^ gmul(a2, 14) ^ gmul(a3, 11);
1965
+ s[i + 3] = gmul(a0, 11) ^ gmul(a1, 13) ^ gmul(a2, 9) ^ gmul(a3, 14);
1966
+ }
1967
+ }
1968
+ function aes128EcbDecrypt(data, key) {
1969
+ if (key.length !== 16) throw new Error("AES-128 \uD0A4\uB294 16\uBC14\uC774\uD2B8\uC5EC\uC57C \uD569\uB2C8\uB2E4");
1970
+ if (data.length % 16 !== 0) throw new Error("AES ECB \uC785\uB825\uC740 16\uBC14\uC774\uD2B8\uC758 \uBC30\uC218\uC5EC\uC57C \uD569\uB2C8\uB2E4");
1971
+ const roundKeys = expandKey(key);
1972
+ const out = new Uint8Array(data.length);
1973
+ for (let offset = 0; offset < data.length; offset += 16) {
1974
+ const block = data.subarray(offset, offset + 16);
1975
+ const decrypted = decryptBlock(block, roundKeys);
1976
+ out.set(decrypted, offset);
1977
+ }
1978
+ return out;
1979
+ }
1980
+
1981
+ // src/hwp5/crypto.ts
1982
+ var MsvcLcg = class {
1983
+ seed;
1984
+ constructor(seed) {
1985
+ this.seed = seed >>> 0;
1986
+ }
1987
+ /** 0 ~ 0x7FFF 범위 난수 반환 (MSVC rand() 호환) */
1988
+ rand() {
1989
+ this.seed = Math.imul(this.seed, 214013) + 2531011 >>> 0;
1990
+ return this.seed >>> 16 & 32767;
1991
+ }
1992
+ };
1993
+ function decryptDistributePayload(payload) {
1994
+ if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
1995
+ const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
1996
+ const lcg = new MsvcLcg(seed);
1997
+ const result = new Uint8Array(256);
1998
+ result[0] = payload[0];
1999
+ result[1] = payload[1];
2000
+ result[2] = payload[2];
2001
+ result[3] = payload[3];
2002
+ let i = 4;
2003
+ while (i < 256) {
2004
+ const keyByte = lcg.rand() & 255;
2005
+ const n = (lcg.rand() & 15) + 1;
2006
+ for (let j = 0; j < n && i < 256; j++, i++) {
2007
+ result[i] = payload[i] ^ keyByte;
2008
+ }
2009
+ }
2010
+ return result;
2011
+ }
2012
+ function extractAesKey(decryptedPayload) {
2013
+ const offset = 4 + (decryptedPayload[0] & 15);
2014
+ if (offset + 16 > decryptedPayload.length) {
2015
+ throw new Error("AES \uD0A4 \uCD94\uCD9C \uC2E4\uD328: \uC624\uD504\uC14B\uC774 payload \uBC94\uC704\uB97C \uCD08\uACFC\uD569\uB2C8\uB2E4");
2016
+ }
2017
+ return decryptedPayload.slice(offset, offset + 16);
2018
+ }
2019
+ function parseRecordHeader(data, offset) {
2020
+ if (offset + 4 > data.length) throw new Error("\uB808\uCF54\uB4DC \uD5E4\uB354 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
2021
+ const header = (data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 | data[offset + 3] << 24) >>> 0;
2022
+ const tagId = header & 1023;
2023
+ let size = header >>> 20 & 4095;
2024
+ let headerSize = 4;
2025
+ if (size === 4095) {
2026
+ if (offset + 8 > data.length) throw new Error("\uD655\uC7A5 \uB808\uCF54\uB4DC \uD06C\uAE30 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
2027
+ size = (data[offset + 4] | data[offset + 5] << 8 | data[offset + 6] << 16 | data[offset + 7] << 24) >>> 0;
2028
+ headerSize = 8;
2029
+ }
2030
+ return { tagId, size, headerSize };
2031
+ }
2032
+ var TAG_DISTRIBUTE_DOC_DATA = 16 + 28;
2033
+ function decryptViewText(viewTextRaw, compressed) {
2034
+ const data = new Uint8Array(viewTextRaw);
2035
+ const rec = parseRecordHeader(data, 0);
2036
+ if (rec.tagId !== TAG_DISTRIBUTE_DOC_DATA) {
2037
+ throw new Error(`\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC758 \uCCAB \uB808\uCF54\uB4DC\uAC00 DISTRIBUTE_DOC_DATA(${TAG_DISTRIBUTE_DOC_DATA})\uAC00 \uC544\uB2D9\uB2C8\uB2E4 (\uC2E4\uC81C: ${rec.tagId})`);
2038
+ }
2039
+ const payloadStart = rec.headerSize;
2040
+ const payloadEnd = payloadStart + rec.size;
2041
+ if (payloadEnd > data.length || rec.size < 256) {
2042
+ throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 \uC720\uD6A8\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
2043
+ }
2044
+ const payload = data.subarray(payloadStart, payloadStart + 256);
2045
+ const decryptedPayload = decryptDistributePayload(payload);
2046
+ const aesKey = extractAesKey(decryptedPayload);
2047
+ const encryptedStart = payloadEnd;
2048
+ const encryptedData = data.subarray(encryptedStart);
2049
+ if (encryptedData.length === 0) {
2050
+ throw new Error("\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC5D0 \uC554\uD638\uD654\uB41C \uBCF8\uBB38 \uB370\uC774\uD130\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
2051
+ }
2052
+ const alignedLen = encryptedData.length - encryptedData.length % 16;
2053
+ if (alignedLen === 0) {
2054
+ throw new Error("\uC554\uD638\uD654\uB41C \uB370\uC774\uD130\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (16\uBC14\uC774\uD2B8 \uBBF8\uB9CC)");
2055
+ }
2056
+ const alignedData = encryptedData.subarray(0, alignedLen);
2057
+ const decrypted = aes128EcbDecrypt(alignedData, aesKey);
2058
+ if (compressed) {
2059
+ try {
2060
+ return decompressStream(Buffer.from(decrypted));
2061
+ } catch {
2062
+ return Buffer.from(decrypted);
2063
+ }
2064
+ }
2065
+ return Buffer.from(decrypted);
2066
+ }
2067
+
2068
+ // src/hwp5/cfb-lenient.ts
2069
+ var CFB_MAGIC = Buffer.from([208, 207, 17, 224, 161, 177, 26, 225]);
2070
+ var END_OF_CHAIN = 4294967294;
2071
+ var FREE_SECT = 4294967295;
2072
+ var MAX_CHAIN_LENGTH = 1e6;
2073
+ var MAX_DIR_ENTRIES = 1e5;
2074
+ var MAX_STREAM_SIZE = 100 * 1024 * 1024;
2075
+ function parseLenientCfb(data) {
2076
+ if (data.length < 512) throw new Error("CFB \uD30C\uC77C\uC774 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 512\uBC14\uC774\uD2B8)");
2077
+ if (!data.subarray(0, 8).equals(CFB_MAGIC)) throw new Error("CFB \uB9E4\uC9C1 \uBC14\uC774\uD2B8 \uBD88\uC77C\uCE58");
2078
+ const sectorSizeShift = data.readUInt16LE(30);
2079
+ if (sectorSizeShift < 7 || sectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + sectorSizeShift);
2080
+ const sectorSize = 1 << sectorSizeShift;
2081
+ const miniSectorSizeShift = data.readUInt16LE(32);
2082
+ if (miniSectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uBBF8\uB2C8 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + miniSectorSizeShift);
2083
+ const miniSectorSize = 1 << miniSectorSizeShift;
2084
+ const fatSectorCount = data.readUInt32LE(44);
2085
+ const firstDirSector = data.readUInt32LE(48);
2086
+ const miniStreamCutoff = data.readUInt32LE(56);
2087
+ const firstMiniFatSector = data.readUInt32LE(60);
2088
+ const miniFatSectorCount = data.readUInt32LE(64);
2089
+ const firstDifatSector = data.readUInt32LE(68);
2090
+ const difatSectorCount = data.readUInt32LE(72);
2091
+ function sectorOffset(id) {
2092
+ return 512 + id * sectorSize;
2093
+ }
2094
+ function readSectorData(id) {
2095
+ const off = sectorOffset(id);
2096
+ if (off + sectorSize > data.length) return Buffer.alloc(0);
2097
+ return data.subarray(off, off + sectorSize);
2098
+ }
2099
+ const fatSectors = [];
2100
+ for (let i = 0; i < 109 && fatSectors.length < fatSectorCount; i++) {
2101
+ const sid = data.readUInt32LE(76 + i * 4);
2102
+ if (sid === FREE_SECT || sid === END_OF_CHAIN) break;
2103
+ fatSectors.push(sid);
2104
+ }
2105
+ let difatSector = firstDifatSector;
2106
+ const visitedDifat = /* @__PURE__ */ new Set();
2107
+ for (let d = 0; d < difatSectorCount && difatSector !== END_OF_CHAIN && difatSector !== FREE_SECT; d++) {
2108
+ if (visitedDifat.has(difatSector)) break;
2109
+ visitedDifat.add(difatSector);
2110
+ const buf = readSectorData(difatSector);
2111
+ const entriesPerSector = sectorSize / 4 - 1;
2112
+ for (let i = 0; i < entriesPerSector && fatSectors.length < fatSectorCount; i++) {
2113
+ const sid = buf.readUInt32LE(i * 4);
2114
+ if (sid === FREE_SECT || sid === END_OF_CHAIN) continue;
2115
+ fatSectors.push(sid);
2116
+ }
2117
+ difatSector = buf.readUInt32LE(entriesPerSector * 4);
2118
+ }
2119
+ const entriesPerFatSector = sectorSize / 4;
2120
+ const fatTable = new Uint32Array(fatSectors.length * entriesPerFatSector);
2121
+ for (let fi = 0; fi < fatSectors.length; fi++) {
2122
+ const buf = readSectorData(fatSectors[fi]);
2123
+ for (let i = 0; i < entriesPerFatSector; i++) {
2124
+ fatTable[fi * entriesPerFatSector + i] = i * 4 + 3 < buf.length ? buf.readUInt32LE(i * 4) : FREE_SECT;
2125
+ }
2126
+ }
2127
+ function readChain(startSector, maxBytes) {
2128
+ if (startSector === END_OF_CHAIN || startSector === FREE_SECT) return Buffer.alloc(0);
2129
+ if (maxBytes > MAX_STREAM_SIZE) throw new Error("\uC2A4\uD2B8\uB9BC\uC774 \uB108\uBB34 \uD07D\uB2C8\uB2E4");
2130
+ const chunks = [];
2131
+ let current = startSector;
2132
+ let totalRead = 0;
2133
+ const visited = /* @__PURE__ */ new Set();
2134
+ while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < maxBytes) {
2135
+ if (visited.has(current)) break;
2136
+ if (visited.size > MAX_CHAIN_LENGTH) break;
2137
+ visited.add(current);
2138
+ const buf = readSectorData(current);
2139
+ const remaining = maxBytes - totalRead;
2140
+ chunks.push(remaining < sectorSize ? buf.subarray(0, remaining) : buf);
2141
+ totalRead += Math.min(buf.length, remaining);
2142
+ current = current < fatTable.length ? fatTable[current] : END_OF_CHAIN;
2143
+ }
2144
+ return Buffer.concat(chunks);
2145
+ }
2146
+ let miniFatTable = null;
2147
+ function getMiniFatTable() {
2148
+ if (miniFatTable) return miniFatTable;
2149
+ if (miniFatSectorCount === 0 || firstMiniFatSector === END_OF_CHAIN) {
2150
+ miniFatTable = new Uint32Array(0);
2151
+ return miniFatTable;
2152
+ }
2153
+ const miniFatData = readChain(firstMiniFatSector, miniFatSectorCount * sectorSize);
2154
+ const entries = miniFatData.length / 4;
2155
+ miniFatTable = new Uint32Array(entries);
2156
+ for (let i = 0; i < entries; i++) {
2157
+ miniFatTable[i] = miniFatData.readUInt32LE(i * 4);
2158
+ }
2159
+ return miniFatTable;
2160
+ }
2161
+ const dirData = readChain(firstDirSector, MAX_DIR_ENTRIES * 128);
2162
+ const dirEntries = [];
2163
+ for (let offset = 0; offset + 128 <= dirData.length && dirEntries.length < MAX_DIR_ENTRIES; offset += 128) {
2164
+ const nameLen = dirData.readUInt16LE(offset + 64);
2165
+ if (nameLen <= 0 || nameLen > 64) {
2166
+ dirEntries.push({ name: "", type: 0, startSector: 0, size: 0 });
2167
+ continue;
2168
+ }
2169
+ const nameBytes = nameLen - 2;
2170
+ const name = nameBytes > 0 ? dirData.subarray(offset, offset + nameBytes).toString("utf16le") : "";
2171
+ const type = dirData[offset + 66];
2172
+ const startSector = dirData.readUInt32LE(offset + 116);
2173
+ const size = dirData.readUInt32LE(offset + 120);
2174
+ dirEntries.push({ name, type, startSector, size });
2175
+ }
2176
+ let miniStreamData = null;
2177
+ function getMiniStream() {
2178
+ if (miniStreamData) return miniStreamData;
2179
+ const root = dirEntries[0];
2180
+ if (!root || root.type !== 5) {
2181
+ miniStreamData = Buffer.alloc(0);
2182
+ return miniStreamData;
2183
+ }
2184
+ miniStreamData = readChain(root.startSector, root.size || MAX_STREAM_SIZE);
2185
+ return miniStreamData;
2186
+ }
2187
+ function readMiniStream(startSector, size) {
2188
+ const mft = getMiniFatTable();
2189
+ const ms = getMiniStream();
2190
+ if (mft.length === 0 || ms.length === 0) return Buffer.alloc(0);
2191
+ const chunks = [];
2192
+ let current = startSector;
2193
+ let totalRead = 0;
2194
+ const visited = /* @__PURE__ */ new Set();
2195
+ while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < size) {
2196
+ if (visited.has(current)) break;
2197
+ if (visited.size > MAX_CHAIN_LENGTH) break;
2198
+ visited.add(current);
2199
+ const off = current * miniSectorSize;
2200
+ const remaining = size - totalRead;
2201
+ const chunkSize = Math.min(miniSectorSize, remaining);
2202
+ if (off + chunkSize <= ms.length) {
2203
+ chunks.push(ms.subarray(off, off + chunkSize));
2204
+ }
2205
+ totalRead += chunkSize;
2206
+ current = current < mft.length ? mft[current] : END_OF_CHAIN;
2207
+ }
2208
+ return Buffer.concat(chunks);
2209
+ }
2210
+ function readStreamData(entry) {
2211
+ if (entry.size === 0) return Buffer.alloc(0);
2212
+ if (entry.size < miniStreamCutoff) {
2213
+ const miniResult = readMiniStream(entry.startSector, entry.size);
2214
+ if (miniResult.length > 0) return miniResult;
2215
+ }
2216
+ return readChain(entry.startSector, entry.size);
2217
+ }
2218
+ function findEntryByPath(path) {
2219
+ const parts = path.replace(/^\//, "").split("/");
2220
+ if (parts.length === 1) {
2221
+ return dirEntries.find((e) => e.name === parts[0] && e.type === 2) ?? null;
2222
+ }
2223
+ const storageName = parts[0];
2224
+ const streamName = parts.slice(1).join("/");
2225
+ for (const e of dirEntries) {
2226
+ if (e.type === 2 && e.name === streamName) {
2227
+ return e;
2228
+ }
2229
+ }
2230
+ const lastPart = parts[parts.length - 1];
2231
+ return dirEntries.find((e) => e.type === 2 && e.name === lastPart) ?? null;
2232
+ }
2233
+ return {
2234
+ findStream(path) {
2235
+ const normalized = path.replace(/^\//, "");
2236
+ const entry = findEntryByPath(normalized);
2237
+ if (!entry || entry.type !== 2) return null;
2238
+ const stream = readStreamData(entry);
2239
+ return stream.length > 0 ? stream : null;
2240
+ },
2241
+ entries() {
2242
+ return dirEntries.filter((e) => e.type === 2);
2243
+ }
2244
+ };
2245
+ }
2246
+
1356
2247
  // src/hwp5/parser.ts
1357
2248
  init_page_range();
1358
2249
  var import_module = require("module");
@@ -1362,21 +2253,40 @@ var CFB = require2("cfb");
1362
2253
  var MAX_SECTIONS = 100;
1363
2254
  var MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024;
1364
2255
  function parseHwp5Document(buffer, options) {
1365
- const cfb = CFB.parse(buffer);
1366
- const headerEntry = CFB.find(cfb, "/FileHeader");
1367
- if (!headerEntry?.content) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
1368
- const header = parseFileHeader(Buffer.from(headerEntry.content));
2256
+ let cfb = null;
2257
+ let lenientCfb = null;
2258
+ const warnings = [];
2259
+ try {
2260
+ cfb = CFB.parse(buffer);
2261
+ } catch {
2262
+ try {
2263
+ lenientCfb = parseLenientCfb(buffer);
2264
+ warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
2265
+ } catch {
2266
+ throw new KordocError("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
2267
+ }
2268
+ }
2269
+ const findStream = (path) => {
2270
+ if (cfb) {
2271
+ const entry = CFB.find(cfb, path);
2272
+ return entry?.content ? Buffer.from(entry.content) : null;
2273
+ }
2274
+ return lenientCfb.findStream(path);
2275
+ };
2276
+ const headerData = findStream("/FileHeader");
2277
+ if (!headerData) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
2278
+ const header = parseFileHeader(headerData);
1369
2279
  if (header.signature !== "HWP Document File") throw new KordocError("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
1370
2280
  if (header.flags & FLAG_ENCRYPTED) throw new KordocError("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1371
2281
  if (header.flags & FLAG_DRM) throw new KordocError("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1372
2282
  const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
2283
+ const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
1373
2284
  const metadata = {
1374
2285
  version: `${header.versionMajor}.x`
1375
2286
  };
1376
- extractHwp5Metadata(cfb, metadata);
1377
- const docInfo = parseDocInfoStream(cfb, compressed);
1378
- const warnings = [];
1379
- const sections = findSections(cfb);
2287
+ if (cfb) extractHwp5Metadata(cfb, metadata);
2288
+ const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
2289
+ const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
1380
2290
  if (sections.length === 0) throw new KordocError("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
1381
2291
  metadata.pageCount = sections.length;
1382
2292
  const pageFilter = options?.pages ? parsePageRange(options.pages, sections.length) : null;
@@ -1388,7 +2298,7 @@ function parseHwp5Document(buffer, options) {
1388
2298
  if (pageFilter && !pageFilter.has(si + 1)) continue;
1389
2299
  try {
1390
2300
  const sectionData = sections[si];
1391
- const data = compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
2301
+ const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
1392
2302
  totalDecompressed += data.length;
1393
2303
  if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new KordocError("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
1394
2304
  const records = readRecords(data);
@@ -1401,7 +2311,7 @@ function parseHwp5Document(buffer, options) {
1401
2311
  warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
1402
2312
  }
1403
2313
  }
1404
- const images = extractHwp5Images(cfb, blocks, compressed, warnings);
2314
+ const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
1405
2315
  if (docInfo) {
1406
2316
  detectHwp5Headings(blocks, docInfo);
1407
2317
  }
@@ -1420,6 +2330,15 @@ function parseDocInfoStream(cfb, compressed) {
1420
2330
  return null;
1421
2331
  }
1422
2332
  }
2333
+ function parseDocInfoFromStream(raw, compressed) {
2334
+ if (!raw) return null;
2335
+ try {
2336
+ const data = compressed ? decompressStream(raw) : raw;
2337
+ return parseDocInfo(readRecords(data));
2338
+ } catch {
2339
+ return null;
2340
+ }
2341
+ }
1423
2342
  function detectHwp5Headings(blocks, docInfo) {
1424
2343
  let baseFontSize = 0;
1425
2344
  for (const style of docInfo.styles) {
@@ -1499,6 +2418,20 @@ function extractHwp5Metadata(cfb, metadata) {
1499
2418
  } catch {
1500
2419
  }
1501
2420
  }
2421
+ function findViewTextSections(cfb, compressed) {
2422
+ const sections = [];
2423
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2424
+ const entry = CFB.find(cfb, `/ViewText/Section${i}`);
2425
+ if (!entry?.content) break;
2426
+ try {
2427
+ const decrypted = decryptViewText(Buffer.from(entry.content), compressed);
2428
+ sections.push({ idx: i, content: decrypted });
2429
+ } catch {
2430
+ break;
2431
+ }
2432
+ }
2433
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2434
+ }
1502
2435
  function findSections(cfb) {
1503
2436
  const sections = [];
1504
2437
  for (let i = 0; i < MAX_SECTIONS; i++) {
@@ -1517,6 +2450,38 @@ function findSections(cfb) {
1517
2450
  }
1518
2451
  return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
1519
2452
  }
2453
+ function findSectionsLenient(lcfb, compressed) {
2454
+ const sections = [];
2455
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2456
+ const raw = lcfb.findStream(`/BodyText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
2457
+ if (!raw) break;
2458
+ sections.push({ idx: i, content: compressed ? decompressStream(raw) : raw });
2459
+ }
2460
+ if (sections.length === 0) {
2461
+ for (const e of lcfb.entries()) {
2462
+ if (sections.length >= MAX_SECTIONS) break;
2463
+ if (e.name.startsWith("Section")) {
2464
+ const idx = parseInt(e.name.replace("Section", ""), 10) || 0;
2465
+ const raw = lcfb.findStream(e.name);
2466
+ if (raw) sections.push({ idx, content: compressed ? decompressStream(raw) : raw });
2467
+ }
2468
+ }
2469
+ }
2470
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2471
+ }
2472
+ function findViewTextSectionsLenient(lcfb, compressed) {
2473
+ const sections = [];
2474
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2475
+ const raw = lcfb.findStream(`/ViewText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
2476
+ if (!raw) break;
2477
+ try {
2478
+ sections.push({ idx: i, content: decryptViewText(raw, compressed) });
2479
+ } catch {
2480
+ break;
2481
+ }
2482
+ }
2483
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2484
+ }
1520
2485
  var TAG_SHAPE_COMPONENT = 74;
1521
2486
  function extractBinDataId(records, ctrlIdx) {
1522
2487
  const ctrlLevel = records[ctrlIdx].level;
@@ -1591,6 +2556,53 @@ function extractHwp5Images(cfb, blocks, compressed, warnings) {
1591
2556
  }
1592
2557
  return images;
1593
2558
  }
2559
+ function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
2560
+ const binDataMap = /* @__PURE__ */ new Map();
2561
+ const binRe = /^BIN(\d{4})/i;
2562
+ for (const e of lcfb.entries()) {
2563
+ const match = e.name.match(binRe);
2564
+ if (!match) continue;
2565
+ const idx = parseInt(match[1], 10);
2566
+ let raw = lcfb.findStream(e.name);
2567
+ if (!raw) continue;
2568
+ if (compressed) {
2569
+ try {
2570
+ raw = decompressStream(raw);
2571
+ } catch {
2572
+ }
2573
+ }
2574
+ binDataMap.set(idx, { data: raw, name: e.name });
2575
+ }
2576
+ if (binDataMap.size === 0) return [];
2577
+ const images = [];
2578
+ let imageIndex = 0;
2579
+ for (const block of blocks) {
2580
+ if (block.type !== "image" || !block.text) continue;
2581
+ const binId = parseInt(block.text, 10);
2582
+ if (isNaN(binId)) continue;
2583
+ const bin = binDataMap.get(binId);
2584
+ if (!bin) {
2585
+ warnings.push({ page: block.pageNumber, message: `BinData ${binId} \uFFFD\uFFFD\uFFFD\uC74C`, code: "SKIPPED_IMAGE" });
2586
+ block.type = "paragraph";
2587
+ block.text = `[\uC774\uBBF8\uC9C0: BinData ${binId}]`;
2588
+ continue;
2589
+ }
2590
+ const mime = detectImageMime(bin.data);
2591
+ if (!mime) {
2592
+ warnings.push({ page: block.pageNumber, message: `BinData ${binId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
2593
+ block.type = "paragraph";
2594
+ block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
2595
+ continue;
2596
+ }
2597
+ imageIndex++;
2598
+ const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
2599
+ const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
2600
+ images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
2601
+ block.text = filename;
2602
+ block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
2603
+ }
2604
+ return images;
2605
+ }
1594
2606
  function parseSection(records, docInfo, warnings, sectionNum) {
1595
2607
  const blocks = [];
1596
2608
  let i = 0;
@@ -1627,12 +2639,61 @@ function parseSection(records, docInfo, warnings, sectionNum) {
1627
2639
  }
1628
2640
  } else if (ctrlId === " elo" || ctrlId === "ole ") {
1629
2641
  warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
2642
+ } else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
2643
+ const noteText = extractNoteText(records, i);
2644
+ if (noteText && blocks.length > 0) {
2645
+ const lastBlock = blocks[blocks.length - 1];
2646
+ if (lastBlock.type === "paragraph") {
2647
+ lastBlock.footnoteText = lastBlock.footnoteText ? lastBlock.footnoteText + "; " + noteText : noteText;
2648
+ }
2649
+ }
2650
+ } else if (ctrlId === "%tok" || ctrlId === "klnk") {
2651
+ const url = extractHyperlinkUrl(rec.data);
2652
+ if (url && blocks.length > 0) {
2653
+ const lastBlock = blocks[blocks.length - 1];
2654
+ if (lastBlock.type === "paragraph" && !lastBlock.href) {
2655
+ lastBlock.href = sanitizeHref(url) ?? void 0;
2656
+ }
2657
+ }
1630
2658
  }
1631
2659
  }
1632
2660
  i++;
1633
2661
  }
1634
2662
  return blocks;
1635
2663
  }
2664
+ function extractNoteText(records, ctrlIdx) {
2665
+ const ctrlLevel = records[ctrlIdx].level;
2666
+ const texts = [];
2667
+ for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 100; j++) {
2668
+ const r = records[j];
2669
+ if (r.level <= ctrlLevel) break;
2670
+ if (r.tagId === TAG_PARA_TEXT) {
2671
+ const t = extractText(r.data).trim();
2672
+ if (t) texts.push(t);
2673
+ }
2674
+ }
2675
+ return texts.length > 0 ? texts.join(" ") : null;
2676
+ }
2677
+ function extractHyperlinkUrl(data) {
2678
+ try {
2679
+ const httpSig = Buffer.from("http", "utf16le");
2680
+ const idx = data.indexOf(httpSig);
2681
+ if (idx >= 0) {
2682
+ let end = idx;
2683
+ while (end + 1 < data.length) {
2684
+ const ch = data.readUInt16LE(end);
2685
+ if (ch === 0) break;
2686
+ end += 2;
2687
+ }
2688
+ const url = data.subarray(idx, end).toString("utf16le");
2689
+ if (/^https?:\/\/.+/.test(url) && url.length < 2e3) {
2690
+ return url;
2691
+ }
2692
+ }
2693
+ } catch {
2694
+ }
2695
+ return null;
2696
+ }
1636
2697
  function resolveCharStyle(charShapeIds, docInfo) {
1637
2698
  if (charShapeIds.length === 0 || docInfo.charShapes.length === 0) return void 0;
1638
2699
  const freq = /* @__PURE__ */ new Map();