kordoc 1.9.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import {
6
6
  precheckZipSize,
7
7
  sanitizeHref,
8
8
  toArrayBuffer
9
- } from "./chunk-AHW56LNX.js";
9
+ } from "./chunk-UMO6QQO5.js";
10
10
  import {
11
11
  parsePageRange
12
12
  } from "./chunk-MOL7MDBG.js";
@@ -50,12 +50,6 @@ async function detectZipFormat(buffer) {
50
50
  }
51
51
 
52
52
  // src/table/builder.ts
53
- var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
54
- function sanitizeHref2(href) {
55
- const trimmed = href.trim();
56
- if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
57
- return trimmed;
58
- }
59
53
  var MAX_COLS = 200;
60
54
  var MAX_ROWS = 1e4;
61
55
  function buildTable(rows) {
@@ -219,7 +213,7 @@ function blocksToMarkdown(blocks) {
219
213
  continue;
220
214
  }
221
215
  if (block.href) {
222
- const href = sanitizeHref2(block.href);
216
+ const href = sanitizeHref(block.href);
223
217
  if (href) text = `[${text}](${href})`;
224
218
  }
225
219
  if (block.footnoteText) {
@@ -1022,13 +1016,16 @@ var TAG_TABLE = 77;
1022
1016
  var TAG_DOC_CHAR_SHAPE = 55;
1023
1017
  var TAG_DOC_STYLE = 58;
1024
1018
  var CHAR_LINE = 0;
1019
+ var CHAR_SECTION_BREAK = 10;
1025
1020
  var CHAR_PARA = 13;
1026
1021
  var CHAR_TAB = 9;
1027
1022
  var CHAR_HYPHEN = 30;
1028
1023
  var CHAR_NBSP = 31;
1029
1024
  var CHAR_FIXED_NBSP = 24;
1025
+ var CHAR_FIXED_WIDTH = 25;
1030
1026
  var FLAG_COMPRESSED = 1 << 0;
1031
1027
  var FLAG_ENCRYPTED = 1 << 1;
1028
+ var FLAG_DISTRIBUTION = 1 << 2;
1032
1029
  var FLAG_DRM = 1 << 4;
1033
1030
  var MAX_RECORDS = 5e5;
1034
1031
  function readRecords(data) {
@@ -1123,27 +1120,41 @@ function extractText(data) {
1123
1120
  const ch = data.readUInt16LE(i);
1124
1121
  i += 2;
1125
1122
  switch (ch) {
1123
+ // ── char 타입 (2바이트만, 확장 데이터 없음) ──
1126
1124
  case CHAR_LINE:
1127
1125
  result += "\n";
1128
1126
  break;
1129
- case CHAR_PARA:
1130
- break;
1131
- case CHAR_TAB:
1132
- result += " ";
1127
+ case CHAR_SECTION_BREAK:
1128
+ result += "\n";
1133
1129
  if (i + 14 <= data.length) i += 14;
1134
1130
  break;
1131
+ case CHAR_PARA:
1132
+ break;
1133
+ // 문단 끝
1135
1134
  case CHAR_HYPHEN:
1136
1135
  result += "-";
1137
1136
  break;
1138
1137
  case CHAR_NBSP:
1138
+ result += " ";
1139
+ break;
1139
1140
  case CHAR_FIXED_NBSP:
1141
+ result += "\xA0";
1142
+ break;
1143
+ // 진짜 NBSP
1144
+ case CHAR_FIXED_WIDTH:
1140
1145
  result += " ";
1141
1146
  break;
1147
+ // 고정폭 공백
1148
+ // ── inline 타입 (2바이트 + 14바이트 확장) ──
1149
+ case CHAR_TAB:
1150
+ result += " ";
1151
+ if (i + 14 <= data.length) i += 14;
1152
+ break;
1142
1153
  default:
1143
1154
  if (ch >= 1 && ch <= 31) {
1144
- const isExt = ch >= 1 && ch <= 3 || ch >= 10 && ch <= 18 || ch >= 21 && ch <= 23;
1155
+ const isExtended = ch >= 1 && ch <= 3 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= 18 || ch >= 21 && ch <= 23;
1145
1156
  const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
1146
- if ((isExt || isInline) && i + 14 <= data.length) i += 14;
1157
+ if ((isExtended || isInline) && i + 14 <= data.length) i += 14;
1147
1158
  } else if (ch >= 32) {
1148
1159
  if (ch >= 55296 && ch <= 56319 && i + 1 < data.length) {
1149
1160
  const lo = data.readUInt16LE(i);
@@ -1162,6 +1173,886 @@ function extractText(data) {
1162
1173
  return result;
1163
1174
  }
1164
1175
 
1176
+ // src/hwp5/aes.ts
1177
+ var S_BOX = new Uint8Array([
1178
+ 99,
1179
+ 124,
1180
+ 119,
1181
+ 123,
1182
+ 242,
1183
+ 107,
1184
+ 111,
1185
+ 197,
1186
+ 48,
1187
+ 1,
1188
+ 103,
1189
+ 43,
1190
+ 254,
1191
+ 215,
1192
+ 171,
1193
+ 118,
1194
+ 202,
1195
+ 130,
1196
+ 201,
1197
+ 125,
1198
+ 250,
1199
+ 89,
1200
+ 71,
1201
+ 240,
1202
+ 173,
1203
+ 212,
1204
+ 162,
1205
+ 175,
1206
+ 156,
1207
+ 164,
1208
+ 114,
1209
+ 192,
1210
+ 183,
1211
+ 253,
1212
+ 147,
1213
+ 38,
1214
+ 54,
1215
+ 63,
1216
+ 247,
1217
+ 204,
1218
+ 52,
1219
+ 165,
1220
+ 229,
1221
+ 241,
1222
+ 113,
1223
+ 216,
1224
+ 49,
1225
+ 21,
1226
+ 4,
1227
+ 199,
1228
+ 35,
1229
+ 195,
1230
+ 24,
1231
+ 150,
1232
+ 5,
1233
+ 154,
1234
+ 7,
1235
+ 18,
1236
+ 128,
1237
+ 226,
1238
+ 235,
1239
+ 39,
1240
+ 178,
1241
+ 117,
1242
+ 9,
1243
+ 131,
1244
+ 44,
1245
+ 26,
1246
+ 27,
1247
+ 110,
1248
+ 90,
1249
+ 160,
1250
+ 82,
1251
+ 59,
1252
+ 214,
1253
+ 179,
1254
+ 41,
1255
+ 227,
1256
+ 47,
1257
+ 132,
1258
+ 83,
1259
+ 209,
1260
+ 0,
1261
+ 237,
1262
+ 32,
1263
+ 252,
1264
+ 177,
1265
+ 91,
1266
+ 106,
1267
+ 203,
1268
+ 190,
1269
+ 57,
1270
+ 74,
1271
+ 76,
1272
+ 88,
1273
+ 207,
1274
+ 208,
1275
+ 239,
1276
+ 170,
1277
+ 251,
1278
+ 67,
1279
+ 77,
1280
+ 51,
1281
+ 133,
1282
+ 69,
1283
+ 249,
1284
+ 2,
1285
+ 127,
1286
+ 80,
1287
+ 60,
1288
+ 159,
1289
+ 168,
1290
+ 81,
1291
+ 163,
1292
+ 64,
1293
+ 143,
1294
+ 146,
1295
+ 157,
1296
+ 56,
1297
+ 245,
1298
+ 188,
1299
+ 182,
1300
+ 218,
1301
+ 33,
1302
+ 16,
1303
+ 255,
1304
+ 243,
1305
+ 210,
1306
+ 205,
1307
+ 12,
1308
+ 19,
1309
+ 236,
1310
+ 95,
1311
+ 151,
1312
+ 68,
1313
+ 23,
1314
+ 196,
1315
+ 167,
1316
+ 126,
1317
+ 61,
1318
+ 100,
1319
+ 93,
1320
+ 25,
1321
+ 115,
1322
+ 96,
1323
+ 129,
1324
+ 79,
1325
+ 220,
1326
+ 34,
1327
+ 42,
1328
+ 144,
1329
+ 136,
1330
+ 70,
1331
+ 238,
1332
+ 184,
1333
+ 20,
1334
+ 222,
1335
+ 94,
1336
+ 11,
1337
+ 219,
1338
+ 224,
1339
+ 50,
1340
+ 58,
1341
+ 10,
1342
+ 73,
1343
+ 6,
1344
+ 36,
1345
+ 92,
1346
+ 194,
1347
+ 211,
1348
+ 172,
1349
+ 98,
1350
+ 145,
1351
+ 149,
1352
+ 228,
1353
+ 121,
1354
+ 231,
1355
+ 200,
1356
+ 55,
1357
+ 109,
1358
+ 141,
1359
+ 213,
1360
+ 78,
1361
+ 169,
1362
+ 108,
1363
+ 86,
1364
+ 244,
1365
+ 234,
1366
+ 101,
1367
+ 122,
1368
+ 174,
1369
+ 8,
1370
+ 186,
1371
+ 120,
1372
+ 37,
1373
+ 46,
1374
+ 28,
1375
+ 166,
1376
+ 180,
1377
+ 198,
1378
+ 232,
1379
+ 221,
1380
+ 116,
1381
+ 31,
1382
+ 75,
1383
+ 189,
1384
+ 139,
1385
+ 138,
1386
+ 112,
1387
+ 62,
1388
+ 181,
1389
+ 102,
1390
+ 72,
1391
+ 3,
1392
+ 246,
1393
+ 14,
1394
+ 97,
1395
+ 53,
1396
+ 87,
1397
+ 185,
1398
+ 134,
1399
+ 193,
1400
+ 29,
1401
+ 158,
1402
+ 225,
1403
+ 248,
1404
+ 152,
1405
+ 17,
1406
+ 105,
1407
+ 217,
1408
+ 142,
1409
+ 148,
1410
+ 155,
1411
+ 30,
1412
+ 135,
1413
+ 233,
1414
+ 206,
1415
+ 85,
1416
+ 40,
1417
+ 223,
1418
+ 140,
1419
+ 161,
1420
+ 137,
1421
+ 13,
1422
+ 191,
1423
+ 230,
1424
+ 66,
1425
+ 104,
1426
+ 65,
1427
+ 153,
1428
+ 45,
1429
+ 15,
1430
+ 176,
1431
+ 84,
1432
+ 187,
1433
+ 22
1434
+ ]);
1435
+ var INV_S_BOX = new Uint8Array([
1436
+ 82,
1437
+ 9,
1438
+ 106,
1439
+ 213,
1440
+ 48,
1441
+ 54,
1442
+ 165,
1443
+ 56,
1444
+ 191,
1445
+ 64,
1446
+ 163,
1447
+ 158,
1448
+ 129,
1449
+ 243,
1450
+ 215,
1451
+ 251,
1452
+ 124,
1453
+ 227,
1454
+ 57,
1455
+ 130,
1456
+ 155,
1457
+ 47,
1458
+ 255,
1459
+ 135,
1460
+ 52,
1461
+ 142,
1462
+ 67,
1463
+ 68,
1464
+ 196,
1465
+ 222,
1466
+ 233,
1467
+ 203,
1468
+ 84,
1469
+ 123,
1470
+ 148,
1471
+ 50,
1472
+ 166,
1473
+ 194,
1474
+ 35,
1475
+ 61,
1476
+ 238,
1477
+ 76,
1478
+ 149,
1479
+ 11,
1480
+ 66,
1481
+ 250,
1482
+ 195,
1483
+ 78,
1484
+ 8,
1485
+ 46,
1486
+ 161,
1487
+ 102,
1488
+ 40,
1489
+ 217,
1490
+ 36,
1491
+ 178,
1492
+ 118,
1493
+ 91,
1494
+ 162,
1495
+ 73,
1496
+ 109,
1497
+ 139,
1498
+ 209,
1499
+ 37,
1500
+ 114,
1501
+ 248,
1502
+ 246,
1503
+ 100,
1504
+ 134,
1505
+ 104,
1506
+ 152,
1507
+ 22,
1508
+ 212,
1509
+ 164,
1510
+ 92,
1511
+ 204,
1512
+ 93,
1513
+ 101,
1514
+ 182,
1515
+ 146,
1516
+ 108,
1517
+ 112,
1518
+ 72,
1519
+ 80,
1520
+ 253,
1521
+ 237,
1522
+ 185,
1523
+ 218,
1524
+ 94,
1525
+ 21,
1526
+ 70,
1527
+ 87,
1528
+ 167,
1529
+ 141,
1530
+ 157,
1531
+ 132,
1532
+ 144,
1533
+ 216,
1534
+ 171,
1535
+ 0,
1536
+ 140,
1537
+ 188,
1538
+ 211,
1539
+ 10,
1540
+ 247,
1541
+ 228,
1542
+ 88,
1543
+ 5,
1544
+ 184,
1545
+ 179,
1546
+ 69,
1547
+ 6,
1548
+ 208,
1549
+ 44,
1550
+ 30,
1551
+ 143,
1552
+ 202,
1553
+ 63,
1554
+ 15,
1555
+ 2,
1556
+ 193,
1557
+ 175,
1558
+ 189,
1559
+ 3,
1560
+ 1,
1561
+ 19,
1562
+ 138,
1563
+ 107,
1564
+ 58,
1565
+ 145,
1566
+ 17,
1567
+ 65,
1568
+ 79,
1569
+ 103,
1570
+ 220,
1571
+ 234,
1572
+ 151,
1573
+ 242,
1574
+ 207,
1575
+ 206,
1576
+ 240,
1577
+ 180,
1578
+ 230,
1579
+ 115,
1580
+ 150,
1581
+ 172,
1582
+ 116,
1583
+ 34,
1584
+ 231,
1585
+ 173,
1586
+ 53,
1587
+ 133,
1588
+ 226,
1589
+ 249,
1590
+ 55,
1591
+ 232,
1592
+ 28,
1593
+ 117,
1594
+ 223,
1595
+ 110,
1596
+ 71,
1597
+ 241,
1598
+ 26,
1599
+ 113,
1600
+ 29,
1601
+ 41,
1602
+ 197,
1603
+ 137,
1604
+ 111,
1605
+ 183,
1606
+ 98,
1607
+ 14,
1608
+ 170,
1609
+ 24,
1610
+ 190,
1611
+ 27,
1612
+ 252,
1613
+ 86,
1614
+ 62,
1615
+ 75,
1616
+ 198,
1617
+ 210,
1618
+ 121,
1619
+ 32,
1620
+ 154,
1621
+ 219,
1622
+ 192,
1623
+ 254,
1624
+ 120,
1625
+ 205,
1626
+ 90,
1627
+ 244,
1628
+ 31,
1629
+ 221,
1630
+ 168,
1631
+ 51,
1632
+ 136,
1633
+ 7,
1634
+ 199,
1635
+ 49,
1636
+ 177,
1637
+ 18,
1638
+ 16,
1639
+ 89,
1640
+ 39,
1641
+ 128,
1642
+ 236,
1643
+ 95,
1644
+ 96,
1645
+ 81,
1646
+ 127,
1647
+ 169,
1648
+ 25,
1649
+ 181,
1650
+ 74,
1651
+ 13,
1652
+ 45,
1653
+ 229,
1654
+ 122,
1655
+ 159,
1656
+ 147,
1657
+ 201,
1658
+ 156,
1659
+ 239,
1660
+ 160,
1661
+ 224,
1662
+ 59,
1663
+ 77,
1664
+ 174,
1665
+ 42,
1666
+ 245,
1667
+ 176,
1668
+ 200,
1669
+ 235,
1670
+ 187,
1671
+ 60,
1672
+ 131,
1673
+ 83,
1674
+ 153,
1675
+ 97,
1676
+ 23,
1677
+ 43,
1678
+ 4,
1679
+ 126,
1680
+ 186,
1681
+ 119,
1682
+ 214,
1683
+ 38,
1684
+ 225,
1685
+ 105,
1686
+ 20,
1687
+ 99,
1688
+ 85,
1689
+ 33,
1690
+ 12,
1691
+ 125
1692
+ ]);
1693
+ var RCON = new Uint8Array([1, 2, 4, 8, 16, 32, 64, 128, 27, 54]);
1694
+ function gmul(a, b) {
1695
+ let p = 0;
1696
+ for (let i = 0; i < 8; i++) {
1697
+ if (b & 1) p ^= a;
1698
+ const hi = a & 128;
1699
+ a = a << 1 & 255;
1700
+ if (hi) a ^= 27;
1701
+ b >>= 1;
1702
+ }
1703
+ return p;
1704
+ }
1705
+ function expandKey(key) {
1706
+ const w = new Uint32Array(44);
1707
+ for (let i = 0; i < 4; i++) {
1708
+ w[i] = key[4 * i] << 24 | key[4 * i + 1] << 16 | key[4 * i + 2] << 8 | key[4 * i + 3];
1709
+ }
1710
+ for (let i = 4; i < 44; i++) {
1711
+ let temp = w[i - 1];
1712
+ if (i % 4 === 0) {
1713
+ temp = (temp << 8 | temp >>> 24) >>> 0;
1714
+ temp = S_BOX[temp >>> 24 & 255] << 24 | S_BOX[temp >>> 16 & 255] << 16 | S_BOX[temp >>> 8 & 255] << 8 | S_BOX[temp & 255];
1715
+ temp = (temp ^ RCON[i / 4 - 1] << 24) >>> 0;
1716
+ }
1717
+ w[i] = (w[i - 4] ^ temp) >>> 0;
1718
+ }
1719
+ return w;
1720
+ }
1721
+ function decryptBlock(block, roundKeys) {
1722
+ const s = new Uint8Array(16);
1723
+ for (let i = 0; i < 16; i++) s[i] = block[i];
1724
+ addRoundKey(s, roundKeys, 10);
1725
+ for (let round = 9; round >= 1; round--) {
1726
+ invShiftRows(s);
1727
+ invSubBytes(s);
1728
+ addRoundKey(s, roundKeys, round);
1729
+ invMixColumns(s);
1730
+ }
1731
+ invShiftRows(s);
1732
+ invSubBytes(s);
1733
+ addRoundKey(s, roundKeys, 0);
1734
+ return s;
1735
+ }
1736
+ function addRoundKey(s, w, round) {
1737
+ const base = round * 4;
1738
+ for (let c = 0; c < 4; c++) {
1739
+ const k = w[base + c];
1740
+ s[c * 4] ^= k >>> 24 & 255;
1741
+ s[c * 4 + 1] ^= k >>> 16 & 255;
1742
+ s[c * 4 + 2] ^= k >>> 8 & 255;
1743
+ s[c * 4 + 3] ^= k & 255;
1744
+ }
1745
+ }
1746
+ function invSubBytes(s) {
1747
+ for (let i = 0; i < 16; i++) s[i] = INV_S_BOX[s[i]];
1748
+ }
1749
+ function invShiftRows(s) {
1750
+ let t = s[13];
1751
+ s[13] = s[9];
1752
+ s[9] = s[5];
1753
+ s[5] = s[1];
1754
+ s[1] = t;
1755
+ t = s[2];
1756
+ s[2] = s[10];
1757
+ s[10] = t;
1758
+ t = s[6];
1759
+ s[6] = s[14];
1760
+ s[14] = t;
1761
+ t = s[3];
1762
+ s[3] = s[7];
1763
+ s[7] = s[11];
1764
+ s[11] = s[15];
1765
+ s[15] = t;
1766
+ }
1767
+ function invMixColumns(s) {
1768
+ for (let c = 0; c < 4; c++) {
1769
+ const i = c * 4;
1770
+ const a0 = s[i], a1 = s[i + 1], a2 = s[i + 2], a3 = s[i + 3];
1771
+ s[i] = gmul(a0, 14) ^ gmul(a1, 11) ^ gmul(a2, 13) ^ gmul(a3, 9);
1772
+ s[i + 1] = gmul(a0, 9) ^ gmul(a1, 14) ^ gmul(a2, 11) ^ gmul(a3, 13);
1773
+ s[i + 2] = gmul(a0, 13) ^ gmul(a1, 9) ^ gmul(a2, 14) ^ gmul(a3, 11);
1774
+ s[i + 3] = gmul(a0, 11) ^ gmul(a1, 13) ^ gmul(a2, 9) ^ gmul(a3, 14);
1775
+ }
1776
+ }
1777
+ function aes128EcbDecrypt(data, key) {
1778
+ if (key.length !== 16) throw new Error("AES-128 \uD0A4\uB294 16\uBC14\uC774\uD2B8\uC5EC\uC57C \uD569\uB2C8\uB2E4");
1779
+ if (data.length % 16 !== 0) throw new Error("AES ECB \uC785\uB825\uC740 16\uBC14\uC774\uD2B8\uC758 \uBC30\uC218\uC5EC\uC57C \uD569\uB2C8\uB2E4");
1780
+ const roundKeys = expandKey(key);
1781
+ const out = new Uint8Array(data.length);
1782
+ for (let offset = 0; offset < data.length; offset += 16) {
1783
+ const block = data.subarray(offset, offset + 16);
1784
+ const decrypted = decryptBlock(block, roundKeys);
1785
+ out.set(decrypted, offset);
1786
+ }
1787
+ return out;
1788
+ }
1789
+
1790
+ // src/hwp5/crypto.ts
1791
+ var MsvcLcg = class {
1792
+ seed;
1793
+ constructor(seed) {
1794
+ this.seed = seed >>> 0;
1795
+ }
1796
+ /** 0 ~ 0x7FFF 범위 난수 반환 (MSVC rand() 호환) */
1797
+ rand() {
1798
+ this.seed = Math.imul(this.seed, 214013) + 2531011 >>> 0;
1799
+ return this.seed >>> 16 & 32767;
1800
+ }
1801
+ };
1802
+ function decryptDistributePayload(payload) {
1803
+ if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
1804
+ const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
1805
+ const lcg = new MsvcLcg(seed);
1806
+ const result = new Uint8Array(256);
1807
+ result[0] = payload[0];
1808
+ result[1] = payload[1];
1809
+ result[2] = payload[2];
1810
+ result[3] = payload[3];
1811
+ let i = 4;
1812
+ while (i < 256) {
1813
+ const keyByte = lcg.rand() & 255;
1814
+ const n = (lcg.rand() & 15) + 1;
1815
+ for (let j = 0; j < n && i < 256; j++, i++) {
1816
+ result[i] = payload[i] ^ keyByte;
1817
+ }
1818
+ }
1819
+ return result;
1820
+ }
1821
+ function extractAesKey(decryptedPayload) {
1822
+ const offset = 4 + (decryptedPayload[0] & 15);
1823
+ if (offset + 16 > decryptedPayload.length) {
1824
+ throw new Error("AES \uD0A4 \uCD94\uCD9C \uC2E4\uD328: \uC624\uD504\uC14B\uC774 payload \uBC94\uC704\uB97C \uCD08\uACFC\uD569\uB2C8\uB2E4");
1825
+ }
1826
+ return decryptedPayload.slice(offset, offset + 16);
1827
+ }
1828
+ function parseRecordHeader(data, offset) {
1829
+ if (offset + 4 > data.length) throw new Error("\uB808\uCF54\uB4DC \uD5E4\uB354 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
1830
+ const header = (data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 | data[offset + 3] << 24) >>> 0;
1831
+ const tagId = header & 1023;
1832
+ let size = header >>> 20 & 4095;
1833
+ let headerSize = 4;
1834
+ if (size === 4095) {
1835
+ if (offset + 8 > data.length) throw new Error("\uD655\uC7A5 \uB808\uCF54\uB4DC \uD06C\uAE30 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
1836
+ size = (data[offset + 4] | data[offset + 5] << 8 | data[offset + 6] << 16 | data[offset + 7] << 24) >>> 0;
1837
+ headerSize = 8;
1838
+ }
1839
+ return { tagId, size, headerSize };
1840
+ }
1841
+ var TAG_DISTRIBUTE_DOC_DATA = 16 + 28;
1842
+ function decryptViewText(viewTextRaw, compressed) {
1843
+ const data = new Uint8Array(viewTextRaw);
1844
+ const rec = parseRecordHeader(data, 0);
1845
+ if (rec.tagId !== TAG_DISTRIBUTE_DOC_DATA) {
1846
+ throw new Error(`\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC758 \uCCAB \uB808\uCF54\uB4DC\uAC00 DISTRIBUTE_DOC_DATA(${TAG_DISTRIBUTE_DOC_DATA})\uAC00 \uC544\uB2D9\uB2C8\uB2E4 (\uC2E4\uC81C: ${rec.tagId})`);
1847
+ }
1848
+ const payloadStart = rec.headerSize;
1849
+ const payloadEnd = payloadStart + rec.size;
1850
+ if (payloadEnd > data.length || rec.size < 256) {
1851
+ throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 \uC720\uD6A8\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1852
+ }
1853
+ const payload = data.subarray(payloadStart, payloadStart + 256);
1854
+ const decryptedPayload = decryptDistributePayload(payload);
1855
+ const aesKey = extractAesKey(decryptedPayload);
1856
+ const encryptedStart = payloadEnd;
1857
+ const encryptedData = data.subarray(encryptedStart);
1858
+ if (encryptedData.length === 0) {
1859
+ throw new Error("\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC5D0 \uC554\uD638\uD654\uB41C \uBCF8\uBB38 \uB370\uC774\uD130\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
1860
+ }
1861
+ const alignedLen = encryptedData.length - encryptedData.length % 16;
1862
+ if (alignedLen === 0) {
1863
+ throw new Error("\uC554\uD638\uD654\uB41C \uB370\uC774\uD130\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (16\uBC14\uC774\uD2B8 \uBBF8\uB9CC)");
1864
+ }
1865
+ const alignedData = encryptedData.subarray(0, alignedLen);
1866
+ const decrypted = aes128EcbDecrypt(alignedData, aesKey);
1867
+ if (compressed) {
1868
+ try {
1869
+ return decompressStream(Buffer.from(decrypted));
1870
+ } catch {
1871
+ return Buffer.from(decrypted);
1872
+ }
1873
+ }
1874
+ return Buffer.from(decrypted);
1875
+ }
1876
+
1877
+ // src/hwp5/cfb-lenient.ts
1878
+ var CFB_MAGIC = Buffer.from([208, 207, 17, 224, 161, 177, 26, 225]);
1879
+ var END_OF_CHAIN = 4294967294;
1880
+ var FREE_SECT = 4294967295;
1881
+ var MAX_CHAIN_LENGTH = 1e6;
1882
+ var MAX_DIR_ENTRIES = 1e5;
1883
+ var MAX_STREAM_SIZE = 100 * 1024 * 1024;
1884
+ function parseLenientCfb(data) {
1885
+ if (data.length < 512) throw new Error("CFB \uD30C\uC77C\uC774 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 512\uBC14\uC774\uD2B8)");
1886
+ if (!data.subarray(0, 8).equals(CFB_MAGIC)) throw new Error("CFB \uB9E4\uC9C1 \uBC14\uC774\uD2B8 \uBD88\uC77C\uCE58");
1887
+ const sectorSizeShift = data.readUInt16LE(30);
1888
+ if (sectorSizeShift < 7 || sectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + sectorSizeShift);
1889
+ const sectorSize = 1 << sectorSizeShift;
1890
+ const miniSectorSizeShift = data.readUInt16LE(32);
1891
+ if (miniSectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uBBF8\uB2C8 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + miniSectorSizeShift);
1892
+ const miniSectorSize = 1 << miniSectorSizeShift;
1893
+ const fatSectorCount = data.readUInt32LE(44);
1894
+ const firstDirSector = data.readUInt32LE(48);
1895
+ const miniStreamCutoff = data.readUInt32LE(56);
1896
+ const firstMiniFatSector = data.readUInt32LE(60);
1897
+ const miniFatSectorCount = data.readUInt32LE(64);
1898
+ const firstDifatSector = data.readUInt32LE(68);
1899
+ const difatSectorCount = data.readUInt32LE(72);
1900
+ function sectorOffset(id) {
1901
+ return 512 + id * sectorSize;
1902
+ }
1903
+ function readSectorData(id) {
1904
+ const off = sectorOffset(id);
1905
+ if (off + sectorSize > data.length) return Buffer.alloc(0);
1906
+ return data.subarray(off, off + sectorSize);
1907
+ }
1908
+ const fatSectors = [];
1909
+ for (let i = 0; i < 109 && fatSectors.length < fatSectorCount; i++) {
1910
+ const sid = data.readUInt32LE(76 + i * 4);
1911
+ if (sid === FREE_SECT || sid === END_OF_CHAIN) break;
1912
+ fatSectors.push(sid);
1913
+ }
1914
+ let difatSector = firstDifatSector;
1915
+ const visitedDifat = /* @__PURE__ */ new Set();
1916
+ for (let d = 0; d < difatSectorCount && difatSector !== END_OF_CHAIN && difatSector !== FREE_SECT; d++) {
1917
+ if (visitedDifat.has(difatSector)) break;
1918
+ visitedDifat.add(difatSector);
1919
+ const buf = readSectorData(difatSector);
1920
+ const entriesPerSector = sectorSize / 4 - 1;
1921
+ for (let i = 0; i < entriesPerSector && fatSectors.length < fatSectorCount; i++) {
1922
+ const sid = buf.readUInt32LE(i * 4);
1923
+ if (sid === FREE_SECT || sid === END_OF_CHAIN) continue;
1924
+ fatSectors.push(sid);
1925
+ }
1926
+ difatSector = buf.readUInt32LE(entriesPerSector * 4);
1927
+ }
1928
+ const entriesPerFatSector = sectorSize / 4;
1929
+ const fatTable = new Uint32Array(fatSectors.length * entriesPerFatSector);
1930
+ for (let fi = 0; fi < fatSectors.length; fi++) {
1931
+ const buf = readSectorData(fatSectors[fi]);
1932
+ for (let i = 0; i < entriesPerFatSector; i++) {
1933
+ fatTable[fi * entriesPerFatSector + i] = i * 4 + 3 < buf.length ? buf.readUInt32LE(i * 4) : FREE_SECT;
1934
+ }
1935
+ }
1936
+ function readChain(startSector, maxBytes) {
1937
+ if (startSector === END_OF_CHAIN || startSector === FREE_SECT) return Buffer.alloc(0);
1938
+ if (maxBytes > MAX_STREAM_SIZE) throw new Error("\uC2A4\uD2B8\uB9BC\uC774 \uB108\uBB34 \uD07D\uB2C8\uB2E4");
1939
+ const chunks = [];
1940
+ let current = startSector;
1941
+ let totalRead = 0;
1942
+ const visited = /* @__PURE__ */ new Set();
1943
+ while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < maxBytes) {
1944
+ if (visited.has(current)) break;
1945
+ if (visited.size > MAX_CHAIN_LENGTH) break;
1946
+ visited.add(current);
1947
+ const buf = readSectorData(current);
1948
+ const remaining = maxBytes - totalRead;
1949
+ chunks.push(remaining < sectorSize ? buf.subarray(0, remaining) : buf);
1950
+ totalRead += Math.min(buf.length, remaining);
1951
+ current = current < fatTable.length ? fatTable[current] : END_OF_CHAIN;
1952
+ }
1953
+ return Buffer.concat(chunks);
1954
+ }
1955
+ let miniFatTable = null;
1956
+ function getMiniFatTable() {
1957
+ if (miniFatTable) return miniFatTable;
1958
+ if (miniFatSectorCount === 0 || firstMiniFatSector === END_OF_CHAIN) {
1959
+ miniFatTable = new Uint32Array(0);
1960
+ return miniFatTable;
1961
+ }
1962
+ const miniFatData = readChain(firstMiniFatSector, miniFatSectorCount * sectorSize);
1963
+ const entries = miniFatData.length / 4;
1964
+ miniFatTable = new Uint32Array(entries);
1965
+ for (let i = 0; i < entries; i++) {
1966
+ miniFatTable[i] = miniFatData.readUInt32LE(i * 4);
1967
+ }
1968
+ return miniFatTable;
1969
+ }
1970
+ const dirData = readChain(firstDirSector, MAX_DIR_ENTRIES * 128);
1971
+ const dirEntries = [];
1972
+ for (let offset = 0; offset + 128 <= dirData.length && dirEntries.length < MAX_DIR_ENTRIES; offset += 128) {
1973
+ const nameLen = dirData.readUInt16LE(offset + 64);
1974
+ if (nameLen <= 0 || nameLen > 64) {
1975
+ dirEntries.push({ name: "", type: 0, startSector: 0, size: 0 });
1976
+ continue;
1977
+ }
1978
+ const nameBytes = nameLen - 2;
1979
+ const name = nameBytes > 0 ? dirData.subarray(offset, offset + nameBytes).toString("utf16le") : "";
1980
+ const type = dirData[offset + 66];
1981
+ const startSector = dirData.readUInt32LE(offset + 116);
1982
+ const size = dirData.readUInt32LE(offset + 120);
1983
+ dirEntries.push({ name, type, startSector, size });
1984
+ }
1985
+ let miniStreamData = null;
1986
+ function getMiniStream() {
1987
+ if (miniStreamData) return miniStreamData;
1988
+ const root = dirEntries[0];
1989
+ if (!root || root.type !== 5) {
1990
+ miniStreamData = Buffer.alloc(0);
1991
+ return miniStreamData;
1992
+ }
1993
+ miniStreamData = readChain(root.startSector, root.size || MAX_STREAM_SIZE);
1994
+ return miniStreamData;
1995
+ }
1996
+ function readMiniStream(startSector, size) {
1997
+ const mft = getMiniFatTable();
1998
+ const ms = getMiniStream();
1999
+ if (mft.length === 0 || ms.length === 0) return Buffer.alloc(0);
2000
+ const chunks = [];
2001
+ let current = startSector;
2002
+ let totalRead = 0;
2003
+ const visited = /* @__PURE__ */ new Set();
2004
+ while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < size) {
2005
+ if (visited.has(current)) break;
2006
+ if (visited.size > MAX_CHAIN_LENGTH) break;
2007
+ visited.add(current);
2008
+ const off = current * miniSectorSize;
2009
+ const remaining = size - totalRead;
2010
+ const chunkSize = Math.min(miniSectorSize, remaining);
2011
+ if (off + chunkSize <= ms.length) {
2012
+ chunks.push(ms.subarray(off, off + chunkSize));
2013
+ }
2014
+ totalRead += chunkSize;
2015
+ current = current < mft.length ? mft[current] : END_OF_CHAIN;
2016
+ }
2017
+ return Buffer.concat(chunks);
2018
+ }
2019
+ function readStreamData(entry) {
2020
+ if (entry.size === 0) return Buffer.alloc(0);
2021
+ if (entry.size < miniStreamCutoff) {
2022
+ const miniResult = readMiniStream(entry.startSector, entry.size);
2023
+ if (miniResult.length > 0) return miniResult;
2024
+ }
2025
+ return readChain(entry.startSector, entry.size);
2026
+ }
2027
+ function findEntryByPath(path) {
2028
+ const parts = path.replace(/^\//, "").split("/");
2029
+ if (parts.length === 1) {
2030
+ return dirEntries.find((e) => e.name === parts[0] && e.type === 2) ?? null;
2031
+ }
2032
+ const storageName = parts[0];
2033
+ const streamName = parts.slice(1).join("/");
2034
+ for (const e of dirEntries) {
2035
+ if (e.type === 2 && e.name === streamName) {
2036
+ return e;
2037
+ }
2038
+ }
2039
+ const lastPart = parts[parts.length - 1];
2040
+ return dirEntries.find((e) => e.type === 2 && e.name === lastPart) ?? null;
2041
+ }
2042
+ return {
2043
+ findStream(path) {
2044
+ const normalized = path.replace(/^\//, "");
2045
+ const entry = findEntryByPath(normalized);
2046
+ if (!entry || entry.type !== 2) return null;
2047
+ const stream = readStreamData(entry);
2048
+ return stream.length > 0 ? stream : null;
2049
+ },
2050
+ entries() {
2051
+ return dirEntries.filter((e) => e.type === 2);
2052
+ }
2053
+ };
2054
+ }
2055
+
1165
2056
  // src/hwp5/parser.ts
1166
2057
  import { createRequire } from "module";
1167
2058
  var require2 = createRequire(import.meta.url);
@@ -1169,21 +2060,40 @@ var CFB = require2("cfb");
1169
2060
  var MAX_SECTIONS = 100;
1170
2061
  var MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024;
1171
2062
  function parseHwp5Document(buffer, options) {
1172
- const cfb = CFB.parse(buffer);
1173
- const headerEntry = CFB.find(cfb, "/FileHeader");
1174
- if (!headerEntry?.content) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
1175
- const header = parseFileHeader(Buffer.from(headerEntry.content));
2063
+ let cfb = null;
2064
+ let lenientCfb = null;
2065
+ const warnings = [];
2066
+ try {
2067
+ cfb = CFB.parse(buffer);
2068
+ } catch {
2069
+ try {
2070
+ lenientCfb = parseLenientCfb(buffer);
2071
+ warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
2072
+ } catch {
2073
+ throw new KordocError("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
2074
+ }
2075
+ }
2076
+ const findStream = (path) => {
2077
+ if (cfb) {
2078
+ const entry = CFB.find(cfb, path);
2079
+ return entry?.content ? Buffer.from(entry.content) : null;
2080
+ }
2081
+ return lenientCfb.findStream(path);
2082
+ };
2083
+ const headerData = findStream("/FileHeader");
2084
+ if (!headerData) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
2085
+ const header = parseFileHeader(headerData);
1176
2086
  if (header.signature !== "HWP Document File") throw new KordocError("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
1177
2087
  if (header.flags & FLAG_ENCRYPTED) throw new KordocError("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1178
2088
  if (header.flags & FLAG_DRM) throw new KordocError("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1179
2089
  const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
2090
+ const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
1180
2091
  const metadata = {
1181
2092
  version: `${header.versionMajor}.x`
1182
2093
  };
1183
- extractHwp5Metadata(cfb, metadata);
1184
- const docInfo = parseDocInfoStream(cfb, compressed);
1185
- const warnings = [];
1186
- const sections = findSections(cfb);
2094
+ if (cfb) extractHwp5Metadata(cfb, metadata);
2095
+ const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
2096
+ const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
1187
2097
  if (sections.length === 0) throw new KordocError("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
1188
2098
  metadata.pageCount = sections.length;
1189
2099
  const pageFilter = options?.pages ? parsePageRange(options.pages, sections.length) : null;
@@ -1195,7 +2105,7 @@ function parseHwp5Document(buffer, options) {
1195
2105
  if (pageFilter && !pageFilter.has(si + 1)) continue;
1196
2106
  try {
1197
2107
  const sectionData = sections[si];
1198
- const data = compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
2108
+ const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
1199
2109
  totalDecompressed += data.length;
1200
2110
  if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new KordocError("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
1201
2111
  const records = readRecords(data);
@@ -1208,7 +2118,7 @@ function parseHwp5Document(buffer, options) {
1208
2118
  warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
1209
2119
  }
1210
2120
  }
1211
- const images = extractHwp5Images(cfb, blocks, compressed, warnings);
2121
+ const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
1212
2122
  if (docInfo) {
1213
2123
  detectHwp5Headings(blocks, docInfo);
1214
2124
  }
@@ -1227,6 +2137,15 @@ function parseDocInfoStream(cfb, compressed) {
1227
2137
  return null;
1228
2138
  }
1229
2139
  }
2140
+ function parseDocInfoFromStream(raw, compressed) {
2141
+ if (!raw) return null;
2142
+ try {
2143
+ const data = compressed ? decompressStream(raw) : raw;
2144
+ return parseDocInfo(readRecords(data));
2145
+ } catch {
2146
+ return null;
2147
+ }
2148
+ }
1230
2149
  function detectHwp5Headings(blocks, docInfo) {
1231
2150
  let baseFontSize = 0;
1232
2151
  for (const style of docInfo.styles) {
@@ -1320,6 +2239,20 @@ function extractHwp5MetadataOnly(buffer) {
1320
2239
  metadata.pageCount = sections.length;
1321
2240
  return metadata;
1322
2241
  }
2242
+ function findViewTextSections(cfb, compressed) {
2243
+ const sections = [];
2244
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2245
+ const entry = CFB.find(cfb, `/ViewText/Section${i}`);
2246
+ if (!entry?.content) break;
2247
+ try {
2248
+ const decrypted = decryptViewText(Buffer.from(entry.content), compressed);
2249
+ sections.push({ idx: i, content: decrypted });
2250
+ } catch {
2251
+ break;
2252
+ }
2253
+ }
2254
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2255
+ }
1323
2256
  function findSections(cfb) {
1324
2257
  const sections = [];
1325
2258
  for (let i = 0; i < MAX_SECTIONS; i++) {
@@ -1338,6 +2271,38 @@ function findSections(cfb) {
1338
2271
  }
1339
2272
  return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
1340
2273
  }
2274
+ function findSectionsLenient(lcfb, compressed) {
2275
+ const sections = [];
2276
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2277
+ const raw = lcfb.findStream(`/BodyText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
2278
+ if (!raw) break;
2279
+ sections.push({ idx: i, content: compressed ? decompressStream(raw) : raw });
2280
+ }
2281
+ if (sections.length === 0) {
2282
+ for (const e of lcfb.entries()) {
2283
+ if (sections.length >= MAX_SECTIONS) break;
2284
+ if (e.name.startsWith("Section")) {
2285
+ const idx = parseInt(e.name.replace("Section", ""), 10) || 0;
2286
+ const raw = lcfb.findStream(e.name);
2287
+ if (raw) sections.push({ idx, content: compressed ? decompressStream(raw) : raw });
2288
+ }
2289
+ }
2290
+ }
2291
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2292
+ }
2293
+ function findViewTextSectionsLenient(lcfb, compressed) {
2294
+ const sections = [];
2295
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2296
+ const raw = lcfb.findStream(`/ViewText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
2297
+ if (!raw) break;
2298
+ try {
2299
+ sections.push({ idx: i, content: decryptViewText(raw, compressed) });
2300
+ } catch {
2301
+ break;
2302
+ }
2303
+ }
2304
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2305
+ }
1341
2306
  var TAG_SHAPE_COMPONENT = 74;
1342
2307
  function extractBinDataId(records, ctrlIdx) {
1343
2308
  const ctrlLevel = records[ctrlIdx].level;
@@ -1412,6 +2377,53 @@ function extractHwp5Images(cfb, blocks, compressed, warnings) {
1412
2377
  }
1413
2378
  return images;
1414
2379
  }
2380
+ function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
2381
+ const binDataMap = /* @__PURE__ */ new Map();
2382
+ const binRe = /^BIN(\d{4})/i;
2383
+ for (const e of lcfb.entries()) {
2384
+ const match = e.name.match(binRe);
2385
+ if (!match) continue;
2386
+ const idx = parseInt(match[1], 10);
2387
+ let raw = lcfb.findStream(e.name);
2388
+ if (!raw) continue;
2389
+ if (compressed) {
2390
+ try {
2391
+ raw = decompressStream(raw);
2392
+ } catch {
2393
+ }
2394
+ }
2395
+ binDataMap.set(idx, { data: raw, name: e.name });
2396
+ }
2397
+ if (binDataMap.size === 0) return [];
2398
+ const images = [];
2399
+ let imageIndex = 0;
2400
+ for (const block of blocks) {
2401
+ if (block.type !== "image" || !block.text) continue;
2402
+ const binId = parseInt(block.text, 10);
2403
+ if (isNaN(binId)) continue;
2404
+ const bin = binDataMap.get(binId);
2405
+ if (!bin) {
2406
+ warnings.push({ page: block.pageNumber, message: `BinData ${binId} \uFFFD\uFFFD\uFFFD\uC74C`, code: "SKIPPED_IMAGE" });
2407
+ block.type = "paragraph";
2408
+ block.text = `[\uC774\uBBF8\uC9C0: BinData ${binId}]`;
2409
+ continue;
2410
+ }
2411
+ const mime = detectImageMime(bin.data);
2412
+ if (!mime) {
2413
+ warnings.push({ page: block.pageNumber, message: `BinData ${binId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
2414
+ block.type = "paragraph";
2415
+ block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
2416
+ continue;
2417
+ }
2418
+ imageIndex++;
2419
+ const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
2420
+ const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
2421
+ images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
2422
+ block.text = filename;
2423
+ block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
2424
+ }
2425
+ return images;
2426
+ }
1415
2427
  function parseSection(records, docInfo, warnings, sectionNum) {
1416
2428
  const blocks = [];
1417
2429
  let i = 0;
@@ -1448,12 +2460,61 @@ function parseSection(records, docInfo, warnings, sectionNum) {
1448
2460
  }
1449
2461
  } else if (ctrlId === " elo" || ctrlId === "ole ") {
1450
2462
  warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
2463
+ } else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
2464
+ const noteText = extractNoteText(records, i);
2465
+ if (noteText && blocks.length > 0) {
2466
+ const lastBlock = blocks[blocks.length - 1];
2467
+ if (lastBlock.type === "paragraph") {
2468
+ lastBlock.footnoteText = lastBlock.footnoteText ? lastBlock.footnoteText + "; " + noteText : noteText;
2469
+ }
2470
+ }
2471
+ } else if (ctrlId === "%tok" || ctrlId === "klnk") {
2472
+ const url = extractHyperlinkUrl(rec.data);
2473
+ if (url && blocks.length > 0) {
2474
+ const lastBlock = blocks[blocks.length - 1];
2475
+ if (lastBlock.type === "paragraph" && !lastBlock.href) {
2476
+ lastBlock.href = sanitizeHref(url) ?? void 0;
2477
+ }
2478
+ }
1451
2479
  }
1452
2480
  }
1453
2481
  i++;
1454
2482
  }
1455
2483
  return blocks;
1456
2484
  }
2485
+ function extractNoteText(records, ctrlIdx) {
2486
+ const ctrlLevel = records[ctrlIdx].level;
2487
+ const texts = [];
2488
+ for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 100; j++) {
2489
+ const r = records[j];
2490
+ if (r.level <= ctrlLevel) break;
2491
+ if (r.tagId === TAG_PARA_TEXT) {
2492
+ const t = extractText(r.data).trim();
2493
+ if (t) texts.push(t);
2494
+ }
2495
+ }
2496
+ return texts.length > 0 ? texts.join(" ") : null;
2497
+ }
2498
+ function extractHyperlinkUrl(data) {
2499
+ try {
2500
+ const httpSig = Buffer.from("http", "utf16le");
2501
+ const idx = data.indexOf(httpSig);
2502
+ if (idx >= 0) {
2503
+ let end = idx;
2504
+ while (end + 1 < data.length) {
2505
+ const ch = data.readUInt16LE(end);
2506
+ if (ch === 0) break;
2507
+ end += 2;
2508
+ }
2509
+ const url = data.subarray(idx, end).toString("utf16le");
2510
+ if (/^https?:\/\/.+/.test(url) && url.length < 2e3) {
2511
+ return url;
2512
+ }
2513
+ }
2514
+ } catch {
2515
+ }
2516
+ return null;
2517
+ }
1457
2518
  function resolveCharStyle(charShapeIds, docInfo) {
1458
2519
  if (charShapeIds.length === 0 || docInfo.charShapes.length === 0) return void 0;
1459
2520
  const freq = /* @__PURE__ */ new Map();
@@ -4302,4 +5363,4 @@ export {
4302
5363
  extractFormFields,
4303
5364
  parse
4304
5365
  };
4305
- //# sourceMappingURL=chunk-MDRW3HYC.js.map
5366
+ //# sourceMappingURL=chunk-UUHAAZYN.js.map