kordoc 1.9.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -137,13 +137,83 @@ import JSZip2 from "jszip";
137
137
  import { inflateRawSync } from "zlib";
138
138
  import { DOMParser } from "@xmldom/xmldom";
139
139
 
140
- // src/table/builder.ts
140
+ // src/utils.ts
141
+ var VERSION = true ? "2.0.0" : "0.0.0-dev";
142
+ function toArrayBuffer(buf) {
143
+ if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
144
+ return buf.buffer;
145
+ }
146
+ return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
147
+ }
148
+ var KordocError = class extends Error {
149
+ constructor(message) {
150
+ super(message);
151
+ this.name = "KordocError";
152
+ }
153
+ };
154
+ function isPathTraversal(name) {
155
+ if (name.includes("\0")) return true;
156
+ const normalized = name.replace(/\\/g, "/");
157
+ return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
158
+ }
159
+ function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
160
+ try {
161
+ const data = new DataView(buffer);
162
+ const len = buffer.byteLength;
163
+ let eocdOffset = -1;
164
+ for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
165
+ if (data.getUint32(i, true) === 101010256) {
166
+ eocdOffset = i;
167
+ break;
168
+ }
169
+ }
170
+ if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
171
+ const entryCount = data.getUint16(eocdOffset + 10, true);
172
+ if (entryCount > maxEntries) {
173
+ throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
174
+ }
175
+ const cdSize = data.getUint32(eocdOffset + 12, true);
176
+ const cdOffset = data.getUint32(eocdOffset + 16, true);
177
+ if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
178
+ let totalUncompressed = 0;
179
+ let pos = cdOffset;
180
+ for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
181
+ if (data.getUint32(pos, true) !== 33639248) break;
182
+ totalUncompressed += data.getUint32(pos + 24, true);
183
+ const nameLen = data.getUint16(pos + 28, true);
184
+ const extraLen = data.getUint16(pos + 30, true);
185
+ const commentLen = data.getUint16(pos + 32, true);
186
+ pos += 46 + nameLen + extraLen + commentLen;
187
+ }
188
+ if (totalUncompressed > maxUncompressedSize) {
189
+ throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
190
+ }
191
+ return { totalUncompressed, entryCount };
192
+ } catch (err) {
193
+ if (err instanceof KordocError) throw err;
194
+ return { totalUncompressed: 0, entryCount: 0 };
195
+ }
196
+ }
141
197
  var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
142
198
  function sanitizeHref(href) {
143
199
  const trimmed = href.trim();
144
200
  if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
145
201
  return trimmed;
146
202
  }
203
+ function classifyError(err) {
204
+ if (!(err instanceof Error)) return "PARSE_ERROR";
205
+ const msg = err.message;
206
+ if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
207
+ if (msg.includes("DRM")) return "DRM_PROTECTED";
208
+ if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
209
+ if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
210
+ if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
211
+ if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
212
+ if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
213
+ return "PARSE_ERROR";
214
+ }
215
+
216
+ // src/table/builder.ts
147
217
  var MAX_COLS = 200;
148
218
  var MAX_ROWS = 1e4;
149
219
  function buildTable(rows) {
@@ -394,82 +464,6 @@ var HEADING_RATIO_H1 = 1.5;
394
464
  var HEADING_RATIO_H2 = 1.3;
395
465
  var HEADING_RATIO_H3 = 1.15;
396
466
 
397
- // src/utils.ts
398
- var VERSION = true ? "1.9.0" : "0.0.0-dev";
399
- function toArrayBuffer(buf) {
400
- if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
401
- return buf.buffer;
402
- }
403
- return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
404
- }
405
- var KordocError = class extends Error {
406
- constructor(message) {
407
- super(message);
408
- this.name = "KordocError";
409
- }
410
- };
411
- function isPathTraversal(name) {
412
- if (name.includes("\0")) return true;
413
- const normalized = name.replace(/\\/g, "/");
414
- return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
415
- }
416
- function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
417
- try {
418
- const data = new DataView(buffer);
419
- const len = buffer.byteLength;
420
- let eocdOffset = -1;
421
- for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
422
- if (data.getUint32(i, true) === 101010256) {
423
- eocdOffset = i;
424
- break;
425
- }
426
- }
427
- if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
428
- const entryCount = data.getUint16(eocdOffset + 10, true);
429
- if (entryCount > maxEntries) {
430
- throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
431
- }
432
- const cdSize = data.getUint32(eocdOffset + 12, true);
433
- const cdOffset = data.getUint32(eocdOffset + 16, true);
434
- if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
435
- let totalUncompressed = 0;
436
- let pos = cdOffset;
437
- for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
438
- if (data.getUint32(pos, true) !== 33639248) break;
439
- totalUncompressed += data.getUint32(pos + 24, true);
440
- const nameLen = data.getUint16(pos + 28, true);
441
- const extraLen = data.getUint16(pos + 30, true);
442
- const commentLen = data.getUint16(pos + 32, true);
443
- pos += 46 + nameLen + extraLen + commentLen;
444
- }
445
- if (totalUncompressed > maxUncompressedSize) {
446
- throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
447
- }
448
- return { totalUncompressed, entryCount };
449
- } catch (err) {
450
- if (err instanceof KordocError) throw err;
451
- return { totalUncompressed: 0, entryCount: 0 };
452
- }
453
- }
454
- var SAFE_HREF_RE2 = /^(?:https?:|mailto:|tel:|#)/i;
455
- function sanitizeHref2(href) {
456
- const trimmed = href.trim();
457
- if (!trimmed || !SAFE_HREF_RE2.test(trimmed)) return null;
458
- return trimmed;
459
- }
460
- function classifyError(err) {
461
- if (!(err instanceof Error)) return "PARSE_ERROR";
462
- const msg = err.message;
463
- if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
464
- if (msg.includes("DRM")) return "DRM_PROTECTED";
465
- if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
466
- if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
467
- if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
468
- if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
469
- if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
470
- return "PARSE_ERROR";
471
- }
472
-
473
467
  // src/hwpx/parser.ts
474
468
  init_page_range();
475
469
  var MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024;
@@ -1076,7 +1070,7 @@ function extractParagraphInfo(para, styleMap) {
1076
1070
  case "hyperlink": {
1077
1071
  const url = child.getAttribute("url") || child.getAttribute("href") || "";
1078
1072
  if (url) {
1079
- const safe = sanitizeHref2(url);
1073
+ const safe = sanitizeHref(url);
1080
1074
  if (safe) href = safe;
1081
1075
  }
1082
1076
  walk(child);
@@ -1169,13 +1163,16 @@ var TAG_TABLE = 77;
1169
1163
  var TAG_DOC_CHAR_SHAPE = 55;
1170
1164
  var TAG_DOC_STYLE = 58;
1171
1165
  var CHAR_LINE = 0;
1166
+ var CHAR_SECTION_BREAK = 10;
1172
1167
  var CHAR_PARA = 13;
1173
1168
  var CHAR_TAB = 9;
1174
1169
  var CHAR_HYPHEN = 30;
1175
1170
  var CHAR_NBSP = 31;
1176
1171
  var CHAR_FIXED_NBSP = 24;
1172
+ var CHAR_FIXED_WIDTH = 25;
1177
1173
  var FLAG_COMPRESSED = 1 << 0;
1178
1174
  var FLAG_ENCRYPTED = 1 << 1;
1175
+ var FLAG_DISTRIBUTION = 1 << 2;
1179
1176
  var FLAG_DRM = 1 << 4;
1180
1177
  var MAX_RECORDS = 5e5;
1181
1178
  function readRecords(data) {
@@ -1270,27 +1267,41 @@ function extractText(data) {
1270
1267
  const ch = data.readUInt16LE(i);
1271
1268
  i += 2;
1272
1269
  switch (ch) {
1270
+ // ── char 타입 (2바이트만, 확장 데이터 없음) ──
1273
1271
  case CHAR_LINE:
1274
1272
  result += "\n";
1275
1273
  break;
1276
- case CHAR_PARA:
1277
- break;
1278
- case CHAR_TAB:
1279
- result += " ";
1274
+ case CHAR_SECTION_BREAK:
1275
+ result += "\n";
1280
1276
  if (i + 14 <= data.length) i += 14;
1281
1277
  break;
1278
+ case CHAR_PARA:
1279
+ break;
1280
+ // 문단 끝
1282
1281
  case CHAR_HYPHEN:
1283
1282
  result += "-";
1284
1283
  break;
1285
1284
  case CHAR_NBSP:
1285
+ result += " ";
1286
+ break;
1286
1287
  case CHAR_FIXED_NBSP:
1288
+ result += "\xA0";
1289
+ break;
1290
+ // 진짜 NBSP
1291
+ case CHAR_FIXED_WIDTH:
1287
1292
  result += " ";
1288
1293
  break;
1294
+ // 고정폭 공백
1295
+ // ── inline 타입 (2바이트 + 14바이트 확장) ──
1296
+ case CHAR_TAB:
1297
+ result += " ";
1298
+ if (i + 14 <= data.length) i += 14;
1299
+ break;
1289
1300
  default:
1290
1301
  if (ch >= 1 && ch <= 31) {
1291
- const isExt = ch >= 1 && ch <= 3 || ch >= 10 && ch <= 18 || ch >= 21 && ch <= 23;
1302
+ const isExtended = ch >= 1 && ch <= 3 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= 18 || ch >= 21 && ch <= 23;
1292
1303
  const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
1293
- if ((isExt || isInline) && i + 14 <= data.length) i += 14;
1304
+ if ((isExtended || isInline) && i + 14 <= data.length) i += 14;
1294
1305
  } else if (ch >= 32) {
1295
1306
  if (ch >= 55296 && ch <= 56319 && i + 1 < data.length) {
1296
1307
  const lo = data.readUInt16LE(i);
@@ -1309,6 +1320,886 @@ function extractText(data) {
1309
1320
  return result;
1310
1321
  }
1311
1322
 
1323
+ // src/hwp5/aes.ts
1324
+ var S_BOX = new Uint8Array([
1325
+ 99,
1326
+ 124,
1327
+ 119,
1328
+ 123,
1329
+ 242,
1330
+ 107,
1331
+ 111,
1332
+ 197,
1333
+ 48,
1334
+ 1,
1335
+ 103,
1336
+ 43,
1337
+ 254,
1338
+ 215,
1339
+ 171,
1340
+ 118,
1341
+ 202,
1342
+ 130,
1343
+ 201,
1344
+ 125,
1345
+ 250,
1346
+ 89,
1347
+ 71,
1348
+ 240,
1349
+ 173,
1350
+ 212,
1351
+ 162,
1352
+ 175,
1353
+ 156,
1354
+ 164,
1355
+ 114,
1356
+ 192,
1357
+ 183,
1358
+ 253,
1359
+ 147,
1360
+ 38,
1361
+ 54,
1362
+ 63,
1363
+ 247,
1364
+ 204,
1365
+ 52,
1366
+ 165,
1367
+ 229,
1368
+ 241,
1369
+ 113,
1370
+ 216,
1371
+ 49,
1372
+ 21,
1373
+ 4,
1374
+ 199,
1375
+ 35,
1376
+ 195,
1377
+ 24,
1378
+ 150,
1379
+ 5,
1380
+ 154,
1381
+ 7,
1382
+ 18,
1383
+ 128,
1384
+ 226,
1385
+ 235,
1386
+ 39,
1387
+ 178,
1388
+ 117,
1389
+ 9,
1390
+ 131,
1391
+ 44,
1392
+ 26,
1393
+ 27,
1394
+ 110,
1395
+ 90,
1396
+ 160,
1397
+ 82,
1398
+ 59,
1399
+ 214,
1400
+ 179,
1401
+ 41,
1402
+ 227,
1403
+ 47,
1404
+ 132,
1405
+ 83,
1406
+ 209,
1407
+ 0,
1408
+ 237,
1409
+ 32,
1410
+ 252,
1411
+ 177,
1412
+ 91,
1413
+ 106,
1414
+ 203,
1415
+ 190,
1416
+ 57,
1417
+ 74,
1418
+ 76,
1419
+ 88,
1420
+ 207,
1421
+ 208,
1422
+ 239,
1423
+ 170,
1424
+ 251,
1425
+ 67,
1426
+ 77,
1427
+ 51,
1428
+ 133,
1429
+ 69,
1430
+ 249,
1431
+ 2,
1432
+ 127,
1433
+ 80,
1434
+ 60,
1435
+ 159,
1436
+ 168,
1437
+ 81,
1438
+ 163,
1439
+ 64,
1440
+ 143,
1441
+ 146,
1442
+ 157,
1443
+ 56,
1444
+ 245,
1445
+ 188,
1446
+ 182,
1447
+ 218,
1448
+ 33,
1449
+ 16,
1450
+ 255,
1451
+ 243,
1452
+ 210,
1453
+ 205,
1454
+ 12,
1455
+ 19,
1456
+ 236,
1457
+ 95,
1458
+ 151,
1459
+ 68,
1460
+ 23,
1461
+ 196,
1462
+ 167,
1463
+ 126,
1464
+ 61,
1465
+ 100,
1466
+ 93,
1467
+ 25,
1468
+ 115,
1469
+ 96,
1470
+ 129,
1471
+ 79,
1472
+ 220,
1473
+ 34,
1474
+ 42,
1475
+ 144,
1476
+ 136,
1477
+ 70,
1478
+ 238,
1479
+ 184,
1480
+ 20,
1481
+ 222,
1482
+ 94,
1483
+ 11,
1484
+ 219,
1485
+ 224,
1486
+ 50,
1487
+ 58,
1488
+ 10,
1489
+ 73,
1490
+ 6,
1491
+ 36,
1492
+ 92,
1493
+ 194,
1494
+ 211,
1495
+ 172,
1496
+ 98,
1497
+ 145,
1498
+ 149,
1499
+ 228,
1500
+ 121,
1501
+ 231,
1502
+ 200,
1503
+ 55,
1504
+ 109,
1505
+ 141,
1506
+ 213,
1507
+ 78,
1508
+ 169,
1509
+ 108,
1510
+ 86,
1511
+ 244,
1512
+ 234,
1513
+ 101,
1514
+ 122,
1515
+ 174,
1516
+ 8,
1517
+ 186,
1518
+ 120,
1519
+ 37,
1520
+ 46,
1521
+ 28,
1522
+ 166,
1523
+ 180,
1524
+ 198,
1525
+ 232,
1526
+ 221,
1527
+ 116,
1528
+ 31,
1529
+ 75,
1530
+ 189,
1531
+ 139,
1532
+ 138,
1533
+ 112,
1534
+ 62,
1535
+ 181,
1536
+ 102,
1537
+ 72,
1538
+ 3,
1539
+ 246,
1540
+ 14,
1541
+ 97,
1542
+ 53,
1543
+ 87,
1544
+ 185,
1545
+ 134,
1546
+ 193,
1547
+ 29,
1548
+ 158,
1549
+ 225,
1550
+ 248,
1551
+ 152,
1552
+ 17,
1553
+ 105,
1554
+ 217,
1555
+ 142,
1556
+ 148,
1557
+ 155,
1558
+ 30,
1559
+ 135,
1560
+ 233,
1561
+ 206,
1562
+ 85,
1563
+ 40,
1564
+ 223,
1565
+ 140,
1566
+ 161,
1567
+ 137,
1568
+ 13,
1569
+ 191,
1570
+ 230,
1571
+ 66,
1572
+ 104,
1573
+ 65,
1574
+ 153,
1575
+ 45,
1576
+ 15,
1577
+ 176,
1578
+ 84,
1579
+ 187,
1580
+ 22
1581
+ ]);
1582
+ var INV_S_BOX = new Uint8Array([
1583
+ 82,
1584
+ 9,
1585
+ 106,
1586
+ 213,
1587
+ 48,
1588
+ 54,
1589
+ 165,
1590
+ 56,
1591
+ 191,
1592
+ 64,
1593
+ 163,
1594
+ 158,
1595
+ 129,
1596
+ 243,
1597
+ 215,
1598
+ 251,
1599
+ 124,
1600
+ 227,
1601
+ 57,
1602
+ 130,
1603
+ 155,
1604
+ 47,
1605
+ 255,
1606
+ 135,
1607
+ 52,
1608
+ 142,
1609
+ 67,
1610
+ 68,
1611
+ 196,
1612
+ 222,
1613
+ 233,
1614
+ 203,
1615
+ 84,
1616
+ 123,
1617
+ 148,
1618
+ 50,
1619
+ 166,
1620
+ 194,
1621
+ 35,
1622
+ 61,
1623
+ 238,
1624
+ 76,
1625
+ 149,
1626
+ 11,
1627
+ 66,
1628
+ 250,
1629
+ 195,
1630
+ 78,
1631
+ 8,
1632
+ 46,
1633
+ 161,
1634
+ 102,
1635
+ 40,
1636
+ 217,
1637
+ 36,
1638
+ 178,
1639
+ 118,
1640
+ 91,
1641
+ 162,
1642
+ 73,
1643
+ 109,
1644
+ 139,
1645
+ 209,
1646
+ 37,
1647
+ 114,
1648
+ 248,
1649
+ 246,
1650
+ 100,
1651
+ 134,
1652
+ 104,
1653
+ 152,
1654
+ 22,
1655
+ 212,
1656
+ 164,
1657
+ 92,
1658
+ 204,
1659
+ 93,
1660
+ 101,
1661
+ 182,
1662
+ 146,
1663
+ 108,
1664
+ 112,
1665
+ 72,
1666
+ 80,
1667
+ 253,
1668
+ 237,
1669
+ 185,
1670
+ 218,
1671
+ 94,
1672
+ 21,
1673
+ 70,
1674
+ 87,
1675
+ 167,
1676
+ 141,
1677
+ 157,
1678
+ 132,
1679
+ 144,
1680
+ 216,
1681
+ 171,
1682
+ 0,
1683
+ 140,
1684
+ 188,
1685
+ 211,
1686
+ 10,
1687
+ 247,
1688
+ 228,
1689
+ 88,
1690
+ 5,
1691
+ 184,
1692
+ 179,
1693
+ 69,
1694
+ 6,
1695
+ 208,
1696
+ 44,
1697
+ 30,
1698
+ 143,
1699
+ 202,
1700
+ 63,
1701
+ 15,
1702
+ 2,
1703
+ 193,
1704
+ 175,
1705
+ 189,
1706
+ 3,
1707
+ 1,
1708
+ 19,
1709
+ 138,
1710
+ 107,
1711
+ 58,
1712
+ 145,
1713
+ 17,
1714
+ 65,
1715
+ 79,
1716
+ 103,
1717
+ 220,
1718
+ 234,
1719
+ 151,
1720
+ 242,
1721
+ 207,
1722
+ 206,
1723
+ 240,
1724
+ 180,
1725
+ 230,
1726
+ 115,
1727
+ 150,
1728
+ 172,
1729
+ 116,
1730
+ 34,
1731
+ 231,
1732
+ 173,
1733
+ 53,
1734
+ 133,
1735
+ 226,
1736
+ 249,
1737
+ 55,
1738
+ 232,
1739
+ 28,
1740
+ 117,
1741
+ 223,
1742
+ 110,
1743
+ 71,
1744
+ 241,
1745
+ 26,
1746
+ 113,
1747
+ 29,
1748
+ 41,
1749
+ 197,
1750
+ 137,
1751
+ 111,
1752
+ 183,
1753
+ 98,
1754
+ 14,
1755
+ 170,
1756
+ 24,
1757
+ 190,
1758
+ 27,
1759
+ 252,
1760
+ 86,
1761
+ 62,
1762
+ 75,
1763
+ 198,
1764
+ 210,
1765
+ 121,
1766
+ 32,
1767
+ 154,
1768
+ 219,
1769
+ 192,
1770
+ 254,
1771
+ 120,
1772
+ 205,
1773
+ 90,
1774
+ 244,
1775
+ 31,
1776
+ 221,
1777
+ 168,
1778
+ 51,
1779
+ 136,
1780
+ 7,
1781
+ 199,
1782
+ 49,
1783
+ 177,
1784
+ 18,
1785
+ 16,
1786
+ 89,
1787
+ 39,
1788
+ 128,
1789
+ 236,
1790
+ 95,
1791
+ 96,
1792
+ 81,
1793
+ 127,
1794
+ 169,
1795
+ 25,
1796
+ 181,
1797
+ 74,
1798
+ 13,
1799
+ 45,
1800
+ 229,
1801
+ 122,
1802
+ 159,
1803
+ 147,
1804
+ 201,
1805
+ 156,
1806
+ 239,
1807
+ 160,
1808
+ 224,
1809
+ 59,
1810
+ 77,
1811
+ 174,
1812
+ 42,
1813
+ 245,
1814
+ 176,
1815
+ 200,
1816
+ 235,
1817
+ 187,
1818
+ 60,
1819
+ 131,
1820
+ 83,
1821
+ 153,
1822
+ 97,
1823
+ 23,
1824
+ 43,
1825
+ 4,
1826
+ 126,
1827
+ 186,
1828
+ 119,
1829
+ 214,
1830
+ 38,
1831
+ 225,
1832
+ 105,
1833
+ 20,
1834
+ 99,
1835
+ 85,
1836
+ 33,
1837
+ 12,
1838
+ 125
1839
+ ]);
1840
+ var RCON = new Uint8Array([1, 2, 4, 8, 16, 32, 64, 128, 27, 54]);
1841
+ function gmul(a, b) {
1842
+ let p = 0;
1843
+ for (let i = 0; i < 8; i++) {
1844
+ if (b & 1) p ^= a;
1845
+ const hi = a & 128;
1846
+ a = a << 1 & 255;
1847
+ if (hi) a ^= 27;
1848
+ b >>= 1;
1849
+ }
1850
+ return p;
1851
+ }
1852
+ function expandKey(key) {
1853
+ const w = new Uint32Array(44);
1854
+ for (let i = 0; i < 4; i++) {
1855
+ w[i] = key[4 * i] << 24 | key[4 * i + 1] << 16 | key[4 * i + 2] << 8 | key[4 * i + 3];
1856
+ }
1857
+ for (let i = 4; i < 44; i++) {
1858
+ let temp = w[i - 1];
1859
+ if (i % 4 === 0) {
1860
+ temp = (temp << 8 | temp >>> 24) >>> 0;
1861
+ temp = S_BOX[temp >>> 24 & 255] << 24 | S_BOX[temp >>> 16 & 255] << 16 | S_BOX[temp >>> 8 & 255] << 8 | S_BOX[temp & 255];
1862
+ temp = (temp ^ RCON[i / 4 - 1] << 24) >>> 0;
1863
+ }
1864
+ w[i] = (w[i - 4] ^ temp) >>> 0;
1865
+ }
1866
+ return w;
1867
+ }
1868
+ function decryptBlock(block, roundKeys) {
1869
+ const s = new Uint8Array(16);
1870
+ for (let i = 0; i < 16; i++) s[i] = block[i];
1871
+ addRoundKey(s, roundKeys, 10);
1872
+ for (let round = 9; round >= 1; round--) {
1873
+ invShiftRows(s);
1874
+ invSubBytes(s);
1875
+ addRoundKey(s, roundKeys, round);
1876
+ invMixColumns(s);
1877
+ }
1878
+ invShiftRows(s);
1879
+ invSubBytes(s);
1880
+ addRoundKey(s, roundKeys, 0);
1881
+ return s;
1882
+ }
1883
+ function addRoundKey(s, w, round) {
1884
+ const base = round * 4;
1885
+ for (let c = 0; c < 4; c++) {
1886
+ const k = w[base + c];
1887
+ s[c * 4] ^= k >>> 24 & 255;
1888
+ s[c * 4 + 1] ^= k >>> 16 & 255;
1889
+ s[c * 4 + 2] ^= k >>> 8 & 255;
1890
+ s[c * 4 + 3] ^= k & 255;
1891
+ }
1892
+ }
1893
+ function invSubBytes(s) {
1894
+ for (let i = 0; i < 16; i++) s[i] = INV_S_BOX[s[i]];
1895
+ }
1896
+ function invShiftRows(s) {
1897
+ let t = s[13];
1898
+ s[13] = s[9];
1899
+ s[9] = s[5];
1900
+ s[5] = s[1];
1901
+ s[1] = t;
1902
+ t = s[2];
1903
+ s[2] = s[10];
1904
+ s[10] = t;
1905
+ t = s[6];
1906
+ s[6] = s[14];
1907
+ s[14] = t;
1908
+ t = s[3];
1909
+ s[3] = s[7];
1910
+ s[7] = s[11];
1911
+ s[11] = s[15];
1912
+ s[15] = t;
1913
+ }
1914
+ function invMixColumns(s) {
1915
+ for (let c = 0; c < 4; c++) {
1916
+ const i = c * 4;
1917
+ const a0 = s[i], a1 = s[i + 1], a2 = s[i + 2], a3 = s[i + 3];
1918
+ s[i] = gmul(a0, 14) ^ gmul(a1, 11) ^ gmul(a2, 13) ^ gmul(a3, 9);
1919
+ s[i + 1] = gmul(a0, 9) ^ gmul(a1, 14) ^ gmul(a2, 11) ^ gmul(a3, 13);
1920
+ s[i + 2] = gmul(a0, 13) ^ gmul(a1, 9) ^ gmul(a2, 14) ^ gmul(a3, 11);
1921
+ s[i + 3] = gmul(a0, 11) ^ gmul(a1, 13) ^ gmul(a2, 9) ^ gmul(a3, 14);
1922
+ }
1923
+ }
1924
+ function aes128EcbDecrypt(data, key) {
1925
+ if (key.length !== 16) throw new Error("AES-128 \uD0A4\uB294 16\uBC14\uC774\uD2B8\uC5EC\uC57C \uD569\uB2C8\uB2E4");
1926
+ if (data.length % 16 !== 0) throw new Error("AES ECB \uC785\uB825\uC740 16\uBC14\uC774\uD2B8\uC758 \uBC30\uC218\uC5EC\uC57C \uD569\uB2C8\uB2E4");
1927
+ const roundKeys = expandKey(key);
1928
+ const out = new Uint8Array(data.length);
1929
+ for (let offset = 0; offset < data.length; offset += 16) {
1930
+ const block = data.subarray(offset, offset + 16);
1931
+ const decrypted = decryptBlock(block, roundKeys);
1932
+ out.set(decrypted, offset);
1933
+ }
1934
+ return out;
1935
+ }
1936
+
1937
+ // src/hwp5/crypto.ts
1938
+ var MsvcLcg = class {
1939
+ seed;
1940
+ constructor(seed) {
1941
+ this.seed = seed >>> 0;
1942
+ }
1943
+ /** 0 ~ 0x7FFF 범위 난수 반환 (MSVC rand() 호환) */
1944
+ rand() {
1945
+ this.seed = Math.imul(this.seed, 214013) + 2531011 >>> 0;
1946
+ return this.seed >>> 16 & 32767;
1947
+ }
1948
+ };
1949
+ function decryptDistributePayload(payload) {
1950
+ if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
1951
+ const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
1952
+ const lcg = new MsvcLcg(seed);
1953
+ const result = new Uint8Array(256);
1954
+ result[0] = payload[0];
1955
+ result[1] = payload[1];
1956
+ result[2] = payload[2];
1957
+ result[3] = payload[3];
1958
+ let i = 4;
1959
+ while (i < 256) {
1960
+ const keyByte = lcg.rand() & 255;
1961
+ const n = (lcg.rand() & 15) + 1;
1962
+ for (let j = 0; j < n && i < 256; j++, i++) {
1963
+ result[i] = payload[i] ^ keyByte;
1964
+ }
1965
+ }
1966
+ return result;
1967
+ }
1968
+ function extractAesKey(decryptedPayload) {
1969
+ const offset = 4 + (decryptedPayload[0] & 15);
1970
+ if (offset + 16 > decryptedPayload.length) {
1971
+ throw new Error("AES \uD0A4 \uCD94\uCD9C \uC2E4\uD328: \uC624\uD504\uC14B\uC774 payload \uBC94\uC704\uB97C \uCD08\uACFC\uD569\uB2C8\uB2E4");
1972
+ }
1973
+ return decryptedPayload.slice(offset, offset + 16);
1974
+ }
1975
+ function parseRecordHeader(data, offset) {
1976
+ if (offset + 4 > data.length) throw new Error("\uB808\uCF54\uB4DC \uD5E4\uB354 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
1977
+ const header = (data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 | data[offset + 3] << 24) >>> 0;
1978
+ const tagId = header & 1023;
1979
+ let size = header >>> 20 & 4095;
1980
+ let headerSize = 4;
1981
+ if (size === 4095) {
1982
+ if (offset + 8 > data.length) throw new Error("\uD655\uC7A5 \uB808\uCF54\uB4DC \uD06C\uAE30 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
1983
+ size = (data[offset + 4] | data[offset + 5] << 8 | data[offset + 6] << 16 | data[offset + 7] << 24) >>> 0;
1984
+ headerSize = 8;
1985
+ }
1986
+ return { tagId, size, headerSize };
1987
+ }
1988
+ var TAG_DISTRIBUTE_DOC_DATA = 16 + 28;
1989
+ function decryptViewText(viewTextRaw, compressed) {
1990
+ const data = new Uint8Array(viewTextRaw);
1991
+ const rec = parseRecordHeader(data, 0);
1992
+ if (rec.tagId !== TAG_DISTRIBUTE_DOC_DATA) {
1993
+ throw new Error(`\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC758 \uCCAB \uB808\uCF54\uB4DC\uAC00 DISTRIBUTE_DOC_DATA(${TAG_DISTRIBUTE_DOC_DATA})\uAC00 \uC544\uB2D9\uB2C8\uB2E4 (\uC2E4\uC81C: ${rec.tagId})`);
1994
+ }
1995
+ const payloadStart = rec.headerSize;
1996
+ const payloadEnd = payloadStart + rec.size;
1997
+ if (payloadEnd > data.length || rec.size < 256) {
1998
+ throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 \uC720\uD6A8\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1999
+ }
2000
+ const payload = data.subarray(payloadStart, payloadStart + 256);
2001
+ const decryptedPayload = decryptDistributePayload(payload);
2002
+ const aesKey = extractAesKey(decryptedPayload);
2003
+ const encryptedStart = payloadEnd;
2004
+ const encryptedData = data.subarray(encryptedStart);
2005
+ if (encryptedData.length === 0) {
2006
+ throw new Error("\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC5D0 \uC554\uD638\uD654\uB41C \uBCF8\uBB38 \uB370\uC774\uD130\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
2007
+ }
2008
+ const alignedLen = encryptedData.length - encryptedData.length % 16;
2009
+ if (alignedLen === 0) {
2010
+ throw new Error("\uC554\uD638\uD654\uB41C \uB370\uC774\uD130\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (16\uBC14\uC774\uD2B8 \uBBF8\uB9CC)");
2011
+ }
2012
+ const alignedData = encryptedData.subarray(0, alignedLen);
2013
+ const decrypted = aes128EcbDecrypt(alignedData, aesKey);
2014
+ if (compressed) {
2015
+ try {
2016
+ return decompressStream(Buffer.from(decrypted));
2017
+ } catch {
2018
+ return Buffer.from(decrypted);
2019
+ }
2020
+ }
2021
+ return Buffer.from(decrypted);
2022
+ }
2023
+
2024
+ // src/hwp5/cfb-lenient.ts
2025
+ var CFB_MAGIC = Buffer.from([208, 207, 17, 224, 161, 177, 26, 225]);
2026
+ var END_OF_CHAIN = 4294967294;
2027
+ var FREE_SECT = 4294967295;
2028
+ var MAX_CHAIN_LENGTH = 1e6;
2029
+ var MAX_DIR_ENTRIES = 1e5;
2030
+ var MAX_STREAM_SIZE = 100 * 1024 * 1024;
2031
+ function parseLenientCfb(data) {
2032
+ if (data.length < 512) throw new Error("CFB \uD30C\uC77C\uC774 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 512\uBC14\uC774\uD2B8)");
2033
+ if (!data.subarray(0, 8).equals(CFB_MAGIC)) throw new Error("CFB \uB9E4\uC9C1 \uBC14\uC774\uD2B8 \uBD88\uC77C\uCE58");
2034
+ const sectorSizeShift = data.readUInt16LE(30);
2035
+ if (sectorSizeShift < 7 || sectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + sectorSizeShift);
2036
+ const sectorSize = 1 << sectorSizeShift;
2037
+ const miniSectorSizeShift = data.readUInt16LE(32);
2038
+ if (miniSectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uBBF8\uB2C8 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + miniSectorSizeShift);
2039
+ const miniSectorSize = 1 << miniSectorSizeShift;
2040
+ const fatSectorCount = data.readUInt32LE(44);
2041
+ const firstDirSector = data.readUInt32LE(48);
2042
+ const miniStreamCutoff = data.readUInt32LE(56);
2043
+ const firstMiniFatSector = data.readUInt32LE(60);
2044
+ const miniFatSectorCount = data.readUInt32LE(64);
2045
+ const firstDifatSector = data.readUInt32LE(68);
2046
+ const difatSectorCount = data.readUInt32LE(72);
2047
+ function sectorOffset(id) {
2048
+ return 512 + id * sectorSize;
2049
+ }
2050
+ function readSectorData(id) {
2051
+ const off = sectorOffset(id);
2052
+ if (off + sectorSize > data.length) return Buffer.alloc(0);
2053
+ return data.subarray(off, off + sectorSize);
2054
+ }
2055
+ const fatSectors = [];
2056
+ for (let i = 0; i < 109 && fatSectors.length < fatSectorCount; i++) {
2057
+ const sid = data.readUInt32LE(76 + i * 4);
2058
+ if (sid === FREE_SECT || sid === END_OF_CHAIN) break;
2059
+ fatSectors.push(sid);
2060
+ }
2061
+ let difatSector = firstDifatSector;
2062
+ const visitedDifat = /* @__PURE__ */ new Set();
2063
+ for (let d = 0; d < difatSectorCount && difatSector !== END_OF_CHAIN && difatSector !== FREE_SECT; d++) {
2064
+ if (visitedDifat.has(difatSector)) break;
2065
+ visitedDifat.add(difatSector);
2066
+ const buf = readSectorData(difatSector);
2067
+ const entriesPerSector = sectorSize / 4 - 1;
2068
+ for (let i = 0; i < entriesPerSector && fatSectors.length < fatSectorCount; i++) {
2069
+ const sid = buf.readUInt32LE(i * 4);
2070
+ if (sid === FREE_SECT || sid === END_OF_CHAIN) continue;
2071
+ fatSectors.push(sid);
2072
+ }
2073
+ difatSector = buf.readUInt32LE(entriesPerSector * 4);
2074
+ }
2075
+ const entriesPerFatSector = sectorSize / 4;
2076
+ const fatTable = new Uint32Array(fatSectors.length * entriesPerFatSector);
2077
+ for (let fi = 0; fi < fatSectors.length; fi++) {
2078
+ const buf = readSectorData(fatSectors[fi]);
2079
+ for (let i = 0; i < entriesPerFatSector; i++) {
2080
+ fatTable[fi * entriesPerFatSector + i] = i * 4 + 3 < buf.length ? buf.readUInt32LE(i * 4) : FREE_SECT;
2081
+ }
2082
+ }
2083
+ function readChain(startSector, maxBytes) {
2084
+ if (startSector === END_OF_CHAIN || startSector === FREE_SECT) return Buffer.alloc(0);
2085
+ if (maxBytes > MAX_STREAM_SIZE) throw new Error("\uC2A4\uD2B8\uB9BC\uC774 \uB108\uBB34 \uD07D\uB2C8\uB2E4");
2086
+ const chunks = [];
2087
+ let current = startSector;
2088
+ let totalRead = 0;
2089
+ const visited = /* @__PURE__ */ new Set();
2090
+ while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < maxBytes) {
2091
+ if (visited.has(current)) break;
2092
+ if (visited.size > MAX_CHAIN_LENGTH) break;
2093
+ visited.add(current);
2094
+ const buf = readSectorData(current);
2095
+ const remaining = maxBytes - totalRead;
2096
+ chunks.push(remaining < sectorSize ? buf.subarray(0, remaining) : buf);
2097
+ totalRead += Math.min(buf.length, remaining);
2098
+ current = current < fatTable.length ? fatTable[current] : END_OF_CHAIN;
2099
+ }
2100
+ return Buffer.concat(chunks);
2101
+ }
2102
+ let miniFatTable = null;
2103
+ function getMiniFatTable() {
2104
+ if (miniFatTable) return miniFatTable;
2105
+ if (miniFatSectorCount === 0 || firstMiniFatSector === END_OF_CHAIN) {
2106
+ miniFatTable = new Uint32Array(0);
2107
+ return miniFatTable;
2108
+ }
2109
+ const miniFatData = readChain(firstMiniFatSector, miniFatSectorCount * sectorSize);
2110
+ const entries = miniFatData.length / 4;
2111
+ miniFatTable = new Uint32Array(entries);
2112
+ for (let i = 0; i < entries; i++) {
2113
+ miniFatTable[i] = miniFatData.readUInt32LE(i * 4);
2114
+ }
2115
+ return miniFatTable;
2116
+ }
2117
+ const dirData = readChain(firstDirSector, MAX_DIR_ENTRIES * 128);
2118
+ const dirEntries = [];
2119
+ for (let offset = 0; offset + 128 <= dirData.length && dirEntries.length < MAX_DIR_ENTRIES; offset += 128) {
2120
+ const nameLen = dirData.readUInt16LE(offset + 64);
2121
+ if (nameLen <= 0 || nameLen > 64) {
2122
+ dirEntries.push({ name: "", type: 0, startSector: 0, size: 0 });
2123
+ continue;
2124
+ }
2125
+ const nameBytes = nameLen - 2;
2126
+ const name = nameBytes > 0 ? dirData.subarray(offset, offset + nameBytes).toString("utf16le") : "";
2127
+ const type = dirData[offset + 66];
2128
+ const startSector = dirData.readUInt32LE(offset + 116);
2129
+ const size = dirData.readUInt32LE(offset + 120);
2130
+ dirEntries.push({ name, type, startSector, size });
2131
+ }
2132
+ let miniStreamData = null;
2133
+ function getMiniStream() {
2134
+ if (miniStreamData) return miniStreamData;
2135
+ const root = dirEntries[0];
2136
+ if (!root || root.type !== 5) {
2137
+ miniStreamData = Buffer.alloc(0);
2138
+ return miniStreamData;
2139
+ }
2140
+ miniStreamData = readChain(root.startSector, root.size || MAX_STREAM_SIZE);
2141
+ return miniStreamData;
2142
+ }
2143
+ function readMiniStream(startSector, size) {
2144
+ const mft = getMiniFatTable();
2145
+ const ms = getMiniStream();
2146
+ if (mft.length === 0 || ms.length === 0) return Buffer.alloc(0);
2147
+ const chunks = [];
2148
+ let current = startSector;
2149
+ let totalRead = 0;
2150
+ const visited = /* @__PURE__ */ new Set();
2151
+ while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < size) {
2152
+ if (visited.has(current)) break;
2153
+ if (visited.size > MAX_CHAIN_LENGTH) break;
2154
+ visited.add(current);
2155
+ const off = current * miniSectorSize;
2156
+ const remaining = size - totalRead;
2157
+ const chunkSize = Math.min(miniSectorSize, remaining);
2158
+ if (off + chunkSize <= ms.length) {
2159
+ chunks.push(ms.subarray(off, off + chunkSize));
2160
+ }
2161
+ totalRead += chunkSize;
2162
+ current = current < mft.length ? mft[current] : END_OF_CHAIN;
2163
+ }
2164
+ return Buffer.concat(chunks);
2165
+ }
2166
+ function readStreamData(entry) {
2167
+ if (entry.size === 0) return Buffer.alloc(0);
2168
+ if (entry.size < miniStreamCutoff) {
2169
+ const miniResult = readMiniStream(entry.startSector, entry.size);
2170
+ if (miniResult.length > 0) return miniResult;
2171
+ }
2172
+ return readChain(entry.startSector, entry.size);
2173
+ }
2174
+ function findEntryByPath(path) {
2175
+ const parts = path.replace(/^\//, "").split("/");
2176
+ if (parts.length === 1) {
2177
+ return dirEntries.find((e) => e.name === parts[0] && e.type === 2) ?? null;
2178
+ }
2179
+ const storageName = parts[0];
2180
+ const streamName = parts.slice(1).join("/");
2181
+ for (const e of dirEntries) {
2182
+ if (e.type === 2 && e.name === streamName) {
2183
+ return e;
2184
+ }
2185
+ }
2186
+ const lastPart = parts[parts.length - 1];
2187
+ return dirEntries.find((e) => e.type === 2 && e.name === lastPart) ?? null;
2188
+ }
2189
+ return {
2190
+ findStream(path) {
2191
+ const normalized = path.replace(/^\//, "");
2192
+ const entry = findEntryByPath(normalized);
2193
+ if (!entry || entry.type !== 2) return null;
2194
+ const stream = readStreamData(entry);
2195
+ return stream.length > 0 ? stream : null;
2196
+ },
2197
+ entries() {
2198
+ return dirEntries.filter((e) => e.type === 2);
2199
+ }
2200
+ };
2201
+ }
2202
+
1312
2203
  // src/hwp5/parser.ts
1313
2204
  init_page_range();
1314
2205
  import { createRequire } from "module";
@@ -1317,21 +2208,40 @@ var CFB = require2("cfb");
1317
2208
  var MAX_SECTIONS = 100;
1318
2209
  var MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024;
1319
2210
  function parseHwp5Document(buffer, options) {
1320
- const cfb = CFB.parse(buffer);
1321
- const headerEntry = CFB.find(cfb, "/FileHeader");
1322
- if (!headerEntry?.content) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
1323
- const header = parseFileHeader(Buffer.from(headerEntry.content));
2211
+ let cfb = null;
2212
+ let lenientCfb = null;
2213
+ const warnings = [];
2214
+ try {
2215
+ cfb = CFB.parse(buffer);
2216
+ } catch {
2217
+ try {
2218
+ lenientCfb = parseLenientCfb(buffer);
2219
+ warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
2220
+ } catch {
2221
+ throw new KordocError("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
2222
+ }
2223
+ }
2224
+ const findStream = (path) => {
2225
+ if (cfb) {
2226
+ const entry = CFB.find(cfb, path);
2227
+ return entry?.content ? Buffer.from(entry.content) : null;
2228
+ }
2229
+ return lenientCfb.findStream(path);
2230
+ };
2231
+ const headerData = findStream("/FileHeader");
2232
+ if (!headerData) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
2233
+ const header = parseFileHeader(headerData);
1324
2234
  if (header.signature !== "HWP Document File") throw new KordocError("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
1325
2235
  if (header.flags & FLAG_ENCRYPTED) throw new KordocError("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1326
2236
  if (header.flags & FLAG_DRM) throw new KordocError("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1327
2237
  const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
2238
+ const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
1328
2239
  const metadata = {
1329
2240
  version: `${header.versionMajor}.x`
1330
2241
  };
1331
- extractHwp5Metadata(cfb, metadata);
1332
- const docInfo = parseDocInfoStream(cfb, compressed);
1333
- const warnings = [];
1334
- const sections = findSections(cfb);
2242
+ if (cfb) extractHwp5Metadata(cfb, metadata);
2243
+ const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
2244
+ const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
1335
2245
  if (sections.length === 0) throw new KordocError("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
1336
2246
  metadata.pageCount = sections.length;
1337
2247
  const pageFilter = options?.pages ? parsePageRange(options.pages, sections.length) : null;
@@ -1343,7 +2253,7 @@ function parseHwp5Document(buffer, options) {
1343
2253
  if (pageFilter && !pageFilter.has(si + 1)) continue;
1344
2254
  try {
1345
2255
  const sectionData = sections[si];
1346
- const data = compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
2256
+ const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
1347
2257
  totalDecompressed += data.length;
1348
2258
  if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new KordocError("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
1349
2259
  const records = readRecords(data);
@@ -1356,7 +2266,7 @@ function parseHwp5Document(buffer, options) {
1356
2266
  warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
1357
2267
  }
1358
2268
  }
1359
- const images = extractHwp5Images(cfb, blocks, compressed, warnings);
2269
+ const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
1360
2270
  if (docInfo) {
1361
2271
  detectHwp5Headings(blocks, docInfo);
1362
2272
  }
@@ -1375,6 +2285,15 @@ function parseDocInfoStream(cfb, compressed) {
1375
2285
  return null;
1376
2286
  }
1377
2287
  }
2288
+ function parseDocInfoFromStream(raw, compressed) {
2289
+ if (!raw) return null;
2290
+ try {
2291
+ const data = compressed ? decompressStream(raw) : raw;
2292
+ return parseDocInfo(readRecords(data));
2293
+ } catch {
2294
+ return null;
2295
+ }
2296
+ }
1378
2297
  function detectHwp5Headings(blocks, docInfo) {
1379
2298
  let baseFontSize = 0;
1380
2299
  for (const style of docInfo.styles) {
@@ -1454,6 +2373,20 @@ function extractHwp5Metadata(cfb, metadata) {
1454
2373
  } catch {
1455
2374
  }
1456
2375
  }
2376
+ function findViewTextSections(cfb, compressed) {
2377
+ const sections = [];
2378
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2379
+ const entry = CFB.find(cfb, `/ViewText/Section${i}`);
2380
+ if (!entry?.content) break;
2381
+ try {
2382
+ const decrypted = decryptViewText(Buffer.from(entry.content), compressed);
2383
+ sections.push({ idx: i, content: decrypted });
2384
+ } catch {
2385
+ break;
2386
+ }
2387
+ }
2388
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2389
+ }
1457
2390
  function findSections(cfb) {
1458
2391
  const sections = [];
1459
2392
  for (let i = 0; i < MAX_SECTIONS; i++) {
@@ -1472,6 +2405,38 @@ function findSections(cfb) {
1472
2405
  }
1473
2406
  return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
1474
2407
  }
2408
+ function findSectionsLenient(lcfb, compressed) {
2409
+ const sections = [];
2410
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2411
+ const raw = lcfb.findStream(`/BodyText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
2412
+ if (!raw) break;
2413
+ sections.push({ idx: i, content: compressed ? decompressStream(raw) : raw });
2414
+ }
2415
+ if (sections.length === 0) {
2416
+ for (const e of lcfb.entries()) {
2417
+ if (sections.length >= MAX_SECTIONS) break;
2418
+ if (e.name.startsWith("Section")) {
2419
+ const idx = parseInt(e.name.replace("Section", ""), 10) || 0;
2420
+ const raw = lcfb.findStream(e.name);
2421
+ if (raw) sections.push({ idx, content: compressed ? decompressStream(raw) : raw });
2422
+ }
2423
+ }
2424
+ }
2425
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2426
+ }
2427
+ function findViewTextSectionsLenient(lcfb, compressed) {
2428
+ const sections = [];
2429
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2430
+ const raw = lcfb.findStream(`/ViewText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
2431
+ if (!raw) break;
2432
+ try {
2433
+ sections.push({ idx: i, content: decryptViewText(raw, compressed) });
2434
+ } catch {
2435
+ break;
2436
+ }
2437
+ }
2438
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2439
+ }
1475
2440
  var TAG_SHAPE_COMPONENT = 74;
1476
2441
  function extractBinDataId(records, ctrlIdx) {
1477
2442
  const ctrlLevel = records[ctrlIdx].level;
@@ -1546,6 +2511,53 @@ function extractHwp5Images(cfb, blocks, compressed, warnings) {
1546
2511
  }
1547
2512
  return images;
1548
2513
  }
2514
+ function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
2515
+ const binDataMap = /* @__PURE__ */ new Map();
2516
+ const binRe = /^BIN(\d{4})/i;
2517
+ for (const e of lcfb.entries()) {
2518
+ const match = e.name.match(binRe);
2519
+ if (!match) continue;
2520
+ const idx = parseInt(match[1], 10);
2521
+ let raw = lcfb.findStream(e.name);
2522
+ if (!raw) continue;
2523
+ if (compressed) {
2524
+ try {
2525
+ raw = decompressStream(raw);
2526
+ } catch {
2527
+ }
2528
+ }
2529
+ binDataMap.set(idx, { data: raw, name: e.name });
2530
+ }
2531
+ if (binDataMap.size === 0) return [];
2532
+ const images = [];
2533
+ let imageIndex = 0;
2534
+ for (const block of blocks) {
2535
+ if (block.type !== "image" || !block.text) continue;
2536
+ const binId = parseInt(block.text, 10);
2537
+ if (isNaN(binId)) continue;
2538
+ const bin = binDataMap.get(binId);
2539
+ if (!bin) {
2540
+ warnings.push({ page: block.pageNumber, message: `BinData ${binId} \uFFFD\uFFFD\uFFFD\uC74C`, code: "SKIPPED_IMAGE" });
2541
+ block.type = "paragraph";
2542
+ block.text = `[\uC774\uBBF8\uC9C0: BinData ${binId}]`;
2543
+ continue;
2544
+ }
2545
+ const mime = detectImageMime(bin.data);
2546
+ if (!mime) {
2547
+ warnings.push({ page: block.pageNumber, message: `BinData ${binId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
2548
+ block.type = "paragraph";
2549
+ block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
2550
+ continue;
2551
+ }
2552
+ imageIndex++;
2553
+ const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
2554
+ const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
2555
+ images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
2556
+ block.text = filename;
2557
+ block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
2558
+ }
2559
+ return images;
2560
+ }
1549
2561
  function parseSection(records, docInfo, warnings, sectionNum) {
1550
2562
  const blocks = [];
1551
2563
  let i = 0;
@@ -1582,12 +2594,61 @@ function parseSection(records, docInfo, warnings, sectionNum) {
1582
2594
  }
1583
2595
  } else if (ctrlId === " elo" || ctrlId === "ole ") {
1584
2596
  warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
2597
+ } else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
2598
+ const noteText = extractNoteText(records, i);
2599
+ if (noteText && blocks.length > 0) {
2600
+ const lastBlock = blocks[blocks.length - 1];
2601
+ if (lastBlock.type === "paragraph") {
2602
+ lastBlock.footnoteText = lastBlock.footnoteText ? lastBlock.footnoteText + "; " + noteText : noteText;
2603
+ }
2604
+ }
2605
+ } else if (ctrlId === "%tok" || ctrlId === "klnk") {
2606
+ const url = extractHyperlinkUrl(rec.data);
2607
+ if (url && blocks.length > 0) {
2608
+ const lastBlock = blocks[blocks.length - 1];
2609
+ if (lastBlock.type === "paragraph" && !lastBlock.href) {
2610
+ lastBlock.href = sanitizeHref(url) ?? void 0;
2611
+ }
2612
+ }
1585
2613
  }
1586
2614
  }
1587
2615
  i++;
1588
2616
  }
1589
2617
  return blocks;
1590
2618
  }
2619
+ function extractNoteText(records, ctrlIdx) {
2620
+ const ctrlLevel = records[ctrlIdx].level;
2621
+ const texts = [];
2622
+ for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 100; j++) {
2623
+ const r = records[j];
2624
+ if (r.level <= ctrlLevel) break;
2625
+ if (r.tagId === TAG_PARA_TEXT) {
2626
+ const t = extractText(r.data).trim();
2627
+ if (t) texts.push(t);
2628
+ }
2629
+ }
2630
+ return texts.length > 0 ? texts.join(" ") : null;
2631
+ }
2632
+ function extractHyperlinkUrl(data) {
2633
+ try {
2634
+ const httpSig = Buffer.from("http", "utf16le");
2635
+ const idx = data.indexOf(httpSig);
2636
+ if (idx >= 0) {
2637
+ let end = idx;
2638
+ while (end + 1 < data.length) {
2639
+ const ch = data.readUInt16LE(end);
2640
+ if (ch === 0) break;
2641
+ end += 2;
2642
+ }
2643
+ const url = data.subarray(idx, end).toString("utf16le");
2644
+ if (/^https?:\/\/.+/.test(url) && url.length < 2e3) {
2645
+ return url;
2646
+ }
2647
+ }
2648
+ } catch {
2649
+ }
2650
+ return null;
2651
+ }
1591
2652
  function resolveCharStyle(charShapeIds, docInfo) {
1592
2653
  if (charShapeIds.length === 0 || docInfo.charShapes.length === 0) return void 0;
1593
2654
  const freq = /* @__PURE__ */ new Map();