kordoc 1.8.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import {
6
6
  precheckZipSize,
7
7
  sanitizeHref,
8
8
  toArrayBuffer
9
- } from "./chunk-UUKFY5P5.js";
9
+ } from "./chunk-UMO6QQO5.js";
10
10
  import {
11
11
  parsePageRange
12
12
  } from "./chunk-MOL7MDBG.js";
@@ -50,34 +50,29 @@ async function detectZipFormat(buffer) {
50
50
  }
51
51
 
52
52
  // src/table/builder.ts
53
- var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
54
- function sanitizeHref2(href) {
55
- const trimmed = href.trim();
56
- if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
57
- return trimmed;
58
- }
59
53
  var MAX_COLS = 200;
60
54
  var MAX_ROWS = 1e4;
61
55
  function buildTable(rows) {
62
56
  if (rows.length > MAX_ROWS) rows = rows.slice(0, MAX_ROWS);
63
57
  const numRows = rows.length;
64
- const tempOccupied = /* @__PURE__ */ new Set();
58
+ const hasAddr = rows.some((row) => row.some((c) => c.colAddr !== void 0 && c.rowAddr !== void 0));
59
+ if (hasAddr) return buildTableDirect(rows, numRows);
65
60
  let maxCols = 0;
61
+ const tempOccupied = Array.from({ length: numRows }, () => []);
66
62
  for (let rowIdx = 0; rowIdx < numRows; rowIdx++) {
67
63
  let colIdx = 0;
68
64
  for (const cell of rows[rowIdx]) {
69
- while (colIdx < MAX_COLS && tempOccupied.has(rowIdx * MAX_COLS + colIdx)) colIdx++;
65
+ while (colIdx < MAX_COLS && tempOccupied[rowIdx][colIdx]) colIdx++;
70
66
  if (colIdx >= MAX_COLS) break;
71
67
  for (let r = rowIdx; r < Math.min(rowIdx + cell.rowSpan, numRows); r++) {
72
68
  for (let c = colIdx; c < Math.min(colIdx + cell.colSpan, MAX_COLS); c++) {
73
- tempOccupied.add(r * MAX_COLS + c);
69
+ tempOccupied[r][c] = true;
74
70
  }
75
71
  }
76
72
  colIdx += cell.colSpan;
77
73
  if (colIdx > maxCols) maxCols = colIdx;
78
74
  }
79
75
  }
80
- tempOccupied.clear();
81
76
  if (maxCols === 0) return { rows: 0, cols: 0, cells: [], hasHeader: false };
82
77
  const grid = Array.from(
83
78
  { length: numRows },
@@ -105,6 +100,40 @@ function buildTable(rows) {
105
100
  cellIdx++;
106
101
  }
107
102
  }
103
+ return trimAndReturn(grid, numRows, maxCols);
104
+ }
105
+ function buildTableDirect(rows, numRows) {
106
+ let maxCols = 0;
107
+ for (const row of rows) {
108
+ for (const cell of row) {
109
+ const end = (cell.colAddr ?? 0) + cell.colSpan;
110
+ if (end > maxCols) maxCols = end;
111
+ }
112
+ }
113
+ if (maxCols === 0) return { rows: 0, cols: 0, cells: [], hasHeader: false };
114
+ const grid = Array.from(
115
+ { length: numRows },
116
+ () => Array.from({ length: maxCols }, () => ({ text: "", colSpan: 1, rowSpan: 1 }))
117
+ );
118
+ for (const row of rows) {
119
+ for (const cell of row) {
120
+ const r = cell.rowAddr ?? 0;
121
+ const c = cell.colAddr ?? 0;
122
+ if (r >= numRows || c >= maxCols) continue;
123
+ grid[r][c] = { text: cell.text.trim(), colSpan: cell.colSpan, rowSpan: cell.rowSpan };
124
+ for (let dr = 0; dr < cell.rowSpan; dr++) {
125
+ for (let dc = 0; dc < cell.colSpan; dc++) {
126
+ if (dr === 0 && dc === 0) continue;
127
+ if (r + dr < numRows && c + dc < maxCols) {
128
+ grid[r + dr][c + dc] = { text: "", colSpan: 1, rowSpan: 1 };
129
+ }
130
+ }
131
+ }
132
+ }
133
+ }
134
+ return trimAndReturn(grid, numRows, maxCols);
135
+ }
136
+ function trimAndReturn(grid, numRows, maxCols) {
108
137
  let effectiveCols = maxCols;
109
138
  while (effectiveCols > 0) {
110
139
  const colEmpty = grid.every((row) => !row[effectiveCols - 1]?.text?.trim());
@@ -184,7 +213,7 @@ function blocksToMarkdown(blocks) {
184
213
  continue;
185
214
  }
186
215
  if (block.href) {
187
- const href = sanitizeHref2(block.href);
216
+ const href = sanitizeHref(block.href);
188
217
  if (href) text = `[${text}](${href})`;
189
218
  }
190
219
  if (block.footnoteText) {
@@ -220,12 +249,10 @@ function tableToMarkdown(table) {
220
249
  const display = Array.from({ length: numRows }, () => Array(numCols).fill(""));
221
250
  const skip = /* @__PURE__ */ new Set();
222
251
  for (let r = 0; r < numRows; r++) {
223
- let cellIdx = 0;
224
252
  for (let c = 0; c < numCols; c++) {
225
253
  if (skip.has(`${r},${c}`)) continue;
226
- const cell = cells[r]?.[cellIdx];
227
- if (!cell) break;
228
- cellIdx++;
254
+ const cell = cells[r]?.[c];
255
+ if (!cell) continue;
229
256
  display[r][c] = sanitizeText(cell.text).replace(/\n/g, "<br>");
230
257
  for (let dr = 0; dr < cell.rowSpan; dr++) {
231
258
  for (let dc = 0; dc < cell.colSpan; dc++) {
@@ -762,6 +789,14 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
762
789
  }
763
790
  }
764
791
  break;
792
+ case "cellAddr":
793
+ if (tableCtx?.cell) {
794
+ const ca = parseInt(el.getAttribute("colAddr") || "", 10);
795
+ const ra = parseInt(el.getAttribute("rowAddr") || "", 10);
796
+ if (!isNaN(ca)) tableCtx.cell.colAddr = ca;
797
+ if (!isNaN(ra)) tableCtx.cell.rowAddr = ra;
798
+ }
799
+ break;
765
800
  case "cellSpan":
766
801
  if (tableCtx?.cell) {
767
802
  const cs = parseInt(el.getAttribute("colSpan") || "1", 10);
@@ -981,13 +1016,16 @@ var TAG_TABLE = 77;
981
1016
  var TAG_DOC_CHAR_SHAPE = 55;
982
1017
  var TAG_DOC_STYLE = 58;
983
1018
  var CHAR_LINE = 0;
1019
+ var CHAR_SECTION_BREAK = 10;
984
1020
  var CHAR_PARA = 13;
985
1021
  var CHAR_TAB = 9;
986
1022
  var CHAR_HYPHEN = 30;
987
1023
  var CHAR_NBSP = 31;
988
1024
  var CHAR_FIXED_NBSP = 24;
1025
+ var CHAR_FIXED_WIDTH = 25;
989
1026
  var FLAG_COMPRESSED = 1 << 0;
990
1027
  var FLAG_ENCRYPTED = 1 << 1;
1028
+ var FLAG_DISTRIBUTION = 1 << 2;
991
1029
  var FLAG_DRM = 1 << 4;
992
1030
  var MAX_RECORDS = 5e5;
993
1031
  function readRecords(data) {
@@ -1082,27 +1120,41 @@ function extractText(data) {
1082
1120
  const ch = data.readUInt16LE(i);
1083
1121
  i += 2;
1084
1122
  switch (ch) {
1123
+ // ── char 타입 (2바이트만, 확장 데이터 없음) ──
1085
1124
  case CHAR_LINE:
1086
1125
  result += "\n";
1087
1126
  break;
1088
- case CHAR_PARA:
1089
- break;
1090
- case CHAR_TAB:
1091
- result += " ";
1127
+ case CHAR_SECTION_BREAK:
1128
+ result += "\n";
1092
1129
  if (i + 14 <= data.length) i += 14;
1093
1130
  break;
1131
+ case CHAR_PARA:
1132
+ break;
1133
+ // 문단 끝
1094
1134
  case CHAR_HYPHEN:
1095
1135
  result += "-";
1096
1136
  break;
1097
1137
  case CHAR_NBSP:
1138
+ result += " ";
1139
+ break;
1098
1140
  case CHAR_FIXED_NBSP:
1141
+ result += "\xA0";
1142
+ break;
1143
+ // 진짜 NBSP
1144
+ case CHAR_FIXED_WIDTH:
1099
1145
  result += " ";
1100
1146
  break;
1147
+ // 고정폭 공백
1148
+ // ── inline 타입 (2바이트 + 14바이트 확장) ──
1149
+ case CHAR_TAB:
1150
+ result += " ";
1151
+ if (i + 14 <= data.length) i += 14;
1152
+ break;
1101
1153
  default:
1102
1154
  if (ch >= 1 && ch <= 31) {
1103
- const isExt = ch >= 1 && ch <= 3 || ch >= 10 && ch <= 18 || ch >= 21 && ch <= 23;
1155
+ const isExtended = ch >= 1 && ch <= 3 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= 18 || ch >= 21 && ch <= 23;
1104
1156
  const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
1105
- if ((isExt || isInline) && i + 14 <= data.length) i += 14;
1157
+ if ((isExtended || isInline) && i + 14 <= data.length) i += 14;
1106
1158
  } else if (ch >= 32) {
1107
1159
  if (ch >= 55296 && ch <= 56319 && i + 1 < data.length) {
1108
1160
  const lo = data.readUInt16LE(i);
@@ -1121,6 +1173,886 @@ function extractText(data) {
1121
1173
  return result;
1122
1174
  }
1123
1175
 
1176
+ // src/hwp5/aes.ts
1177
+ var S_BOX = new Uint8Array([
1178
+ 99,
1179
+ 124,
1180
+ 119,
1181
+ 123,
1182
+ 242,
1183
+ 107,
1184
+ 111,
1185
+ 197,
1186
+ 48,
1187
+ 1,
1188
+ 103,
1189
+ 43,
1190
+ 254,
1191
+ 215,
1192
+ 171,
1193
+ 118,
1194
+ 202,
1195
+ 130,
1196
+ 201,
1197
+ 125,
1198
+ 250,
1199
+ 89,
1200
+ 71,
1201
+ 240,
1202
+ 173,
1203
+ 212,
1204
+ 162,
1205
+ 175,
1206
+ 156,
1207
+ 164,
1208
+ 114,
1209
+ 192,
1210
+ 183,
1211
+ 253,
1212
+ 147,
1213
+ 38,
1214
+ 54,
1215
+ 63,
1216
+ 247,
1217
+ 204,
1218
+ 52,
1219
+ 165,
1220
+ 229,
1221
+ 241,
1222
+ 113,
1223
+ 216,
1224
+ 49,
1225
+ 21,
1226
+ 4,
1227
+ 199,
1228
+ 35,
1229
+ 195,
1230
+ 24,
1231
+ 150,
1232
+ 5,
1233
+ 154,
1234
+ 7,
1235
+ 18,
1236
+ 128,
1237
+ 226,
1238
+ 235,
1239
+ 39,
1240
+ 178,
1241
+ 117,
1242
+ 9,
1243
+ 131,
1244
+ 44,
1245
+ 26,
1246
+ 27,
1247
+ 110,
1248
+ 90,
1249
+ 160,
1250
+ 82,
1251
+ 59,
1252
+ 214,
1253
+ 179,
1254
+ 41,
1255
+ 227,
1256
+ 47,
1257
+ 132,
1258
+ 83,
1259
+ 209,
1260
+ 0,
1261
+ 237,
1262
+ 32,
1263
+ 252,
1264
+ 177,
1265
+ 91,
1266
+ 106,
1267
+ 203,
1268
+ 190,
1269
+ 57,
1270
+ 74,
1271
+ 76,
1272
+ 88,
1273
+ 207,
1274
+ 208,
1275
+ 239,
1276
+ 170,
1277
+ 251,
1278
+ 67,
1279
+ 77,
1280
+ 51,
1281
+ 133,
1282
+ 69,
1283
+ 249,
1284
+ 2,
1285
+ 127,
1286
+ 80,
1287
+ 60,
1288
+ 159,
1289
+ 168,
1290
+ 81,
1291
+ 163,
1292
+ 64,
1293
+ 143,
1294
+ 146,
1295
+ 157,
1296
+ 56,
1297
+ 245,
1298
+ 188,
1299
+ 182,
1300
+ 218,
1301
+ 33,
1302
+ 16,
1303
+ 255,
1304
+ 243,
1305
+ 210,
1306
+ 205,
1307
+ 12,
1308
+ 19,
1309
+ 236,
1310
+ 95,
1311
+ 151,
1312
+ 68,
1313
+ 23,
1314
+ 196,
1315
+ 167,
1316
+ 126,
1317
+ 61,
1318
+ 100,
1319
+ 93,
1320
+ 25,
1321
+ 115,
1322
+ 96,
1323
+ 129,
1324
+ 79,
1325
+ 220,
1326
+ 34,
1327
+ 42,
1328
+ 144,
1329
+ 136,
1330
+ 70,
1331
+ 238,
1332
+ 184,
1333
+ 20,
1334
+ 222,
1335
+ 94,
1336
+ 11,
1337
+ 219,
1338
+ 224,
1339
+ 50,
1340
+ 58,
1341
+ 10,
1342
+ 73,
1343
+ 6,
1344
+ 36,
1345
+ 92,
1346
+ 194,
1347
+ 211,
1348
+ 172,
1349
+ 98,
1350
+ 145,
1351
+ 149,
1352
+ 228,
1353
+ 121,
1354
+ 231,
1355
+ 200,
1356
+ 55,
1357
+ 109,
1358
+ 141,
1359
+ 213,
1360
+ 78,
1361
+ 169,
1362
+ 108,
1363
+ 86,
1364
+ 244,
1365
+ 234,
1366
+ 101,
1367
+ 122,
1368
+ 174,
1369
+ 8,
1370
+ 186,
1371
+ 120,
1372
+ 37,
1373
+ 46,
1374
+ 28,
1375
+ 166,
1376
+ 180,
1377
+ 198,
1378
+ 232,
1379
+ 221,
1380
+ 116,
1381
+ 31,
1382
+ 75,
1383
+ 189,
1384
+ 139,
1385
+ 138,
1386
+ 112,
1387
+ 62,
1388
+ 181,
1389
+ 102,
1390
+ 72,
1391
+ 3,
1392
+ 246,
1393
+ 14,
1394
+ 97,
1395
+ 53,
1396
+ 87,
1397
+ 185,
1398
+ 134,
1399
+ 193,
1400
+ 29,
1401
+ 158,
1402
+ 225,
1403
+ 248,
1404
+ 152,
1405
+ 17,
1406
+ 105,
1407
+ 217,
1408
+ 142,
1409
+ 148,
1410
+ 155,
1411
+ 30,
1412
+ 135,
1413
+ 233,
1414
+ 206,
1415
+ 85,
1416
+ 40,
1417
+ 223,
1418
+ 140,
1419
+ 161,
1420
+ 137,
1421
+ 13,
1422
+ 191,
1423
+ 230,
1424
+ 66,
1425
+ 104,
1426
+ 65,
1427
+ 153,
1428
+ 45,
1429
+ 15,
1430
+ 176,
1431
+ 84,
1432
+ 187,
1433
+ 22
1434
+ ]);
1435
+ var INV_S_BOX = new Uint8Array([
1436
+ 82,
1437
+ 9,
1438
+ 106,
1439
+ 213,
1440
+ 48,
1441
+ 54,
1442
+ 165,
1443
+ 56,
1444
+ 191,
1445
+ 64,
1446
+ 163,
1447
+ 158,
1448
+ 129,
1449
+ 243,
1450
+ 215,
1451
+ 251,
1452
+ 124,
1453
+ 227,
1454
+ 57,
1455
+ 130,
1456
+ 155,
1457
+ 47,
1458
+ 255,
1459
+ 135,
1460
+ 52,
1461
+ 142,
1462
+ 67,
1463
+ 68,
1464
+ 196,
1465
+ 222,
1466
+ 233,
1467
+ 203,
1468
+ 84,
1469
+ 123,
1470
+ 148,
1471
+ 50,
1472
+ 166,
1473
+ 194,
1474
+ 35,
1475
+ 61,
1476
+ 238,
1477
+ 76,
1478
+ 149,
1479
+ 11,
1480
+ 66,
1481
+ 250,
1482
+ 195,
1483
+ 78,
1484
+ 8,
1485
+ 46,
1486
+ 161,
1487
+ 102,
1488
+ 40,
1489
+ 217,
1490
+ 36,
1491
+ 178,
1492
+ 118,
1493
+ 91,
1494
+ 162,
1495
+ 73,
1496
+ 109,
1497
+ 139,
1498
+ 209,
1499
+ 37,
1500
+ 114,
1501
+ 248,
1502
+ 246,
1503
+ 100,
1504
+ 134,
1505
+ 104,
1506
+ 152,
1507
+ 22,
1508
+ 212,
1509
+ 164,
1510
+ 92,
1511
+ 204,
1512
+ 93,
1513
+ 101,
1514
+ 182,
1515
+ 146,
1516
+ 108,
1517
+ 112,
1518
+ 72,
1519
+ 80,
1520
+ 253,
1521
+ 237,
1522
+ 185,
1523
+ 218,
1524
+ 94,
1525
+ 21,
1526
+ 70,
1527
+ 87,
1528
+ 167,
1529
+ 141,
1530
+ 157,
1531
+ 132,
1532
+ 144,
1533
+ 216,
1534
+ 171,
1535
+ 0,
1536
+ 140,
1537
+ 188,
1538
+ 211,
1539
+ 10,
1540
+ 247,
1541
+ 228,
1542
+ 88,
1543
+ 5,
1544
+ 184,
1545
+ 179,
1546
+ 69,
1547
+ 6,
1548
+ 208,
1549
+ 44,
1550
+ 30,
1551
+ 143,
1552
+ 202,
1553
+ 63,
1554
+ 15,
1555
+ 2,
1556
+ 193,
1557
+ 175,
1558
+ 189,
1559
+ 3,
1560
+ 1,
1561
+ 19,
1562
+ 138,
1563
+ 107,
1564
+ 58,
1565
+ 145,
1566
+ 17,
1567
+ 65,
1568
+ 79,
1569
+ 103,
1570
+ 220,
1571
+ 234,
1572
+ 151,
1573
+ 242,
1574
+ 207,
1575
+ 206,
1576
+ 240,
1577
+ 180,
1578
+ 230,
1579
+ 115,
1580
+ 150,
1581
+ 172,
1582
+ 116,
1583
+ 34,
1584
+ 231,
1585
+ 173,
1586
+ 53,
1587
+ 133,
1588
+ 226,
1589
+ 249,
1590
+ 55,
1591
+ 232,
1592
+ 28,
1593
+ 117,
1594
+ 223,
1595
+ 110,
1596
+ 71,
1597
+ 241,
1598
+ 26,
1599
+ 113,
1600
+ 29,
1601
+ 41,
1602
+ 197,
1603
+ 137,
1604
+ 111,
1605
+ 183,
1606
+ 98,
1607
+ 14,
1608
+ 170,
1609
+ 24,
1610
+ 190,
1611
+ 27,
1612
+ 252,
1613
+ 86,
1614
+ 62,
1615
+ 75,
1616
+ 198,
1617
+ 210,
1618
+ 121,
1619
+ 32,
1620
+ 154,
1621
+ 219,
1622
+ 192,
1623
+ 254,
1624
+ 120,
1625
+ 205,
1626
+ 90,
1627
+ 244,
1628
+ 31,
1629
+ 221,
1630
+ 168,
1631
+ 51,
1632
+ 136,
1633
+ 7,
1634
+ 199,
1635
+ 49,
1636
+ 177,
1637
+ 18,
1638
+ 16,
1639
+ 89,
1640
+ 39,
1641
+ 128,
1642
+ 236,
1643
+ 95,
1644
+ 96,
1645
+ 81,
1646
+ 127,
1647
+ 169,
1648
+ 25,
1649
+ 181,
1650
+ 74,
1651
+ 13,
1652
+ 45,
1653
+ 229,
1654
+ 122,
1655
+ 159,
1656
+ 147,
1657
+ 201,
1658
+ 156,
1659
+ 239,
1660
+ 160,
1661
+ 224,
1662
+ 59,
1663
+ 77,
1664
+ 174,
1665
+ 42,
1666
+ 245,
1667
+ 176,
1668
+ 200,
1669
+ 235,
1670
+ 187,
1671
+ 60,
1672
+ 131,
1673
+ 83,
1674
+ 153,
1675
+ 97,
1676
+ 23,
1677
+ 43,
1678
+ 4,
1679
+ 126,
1680
+ 186,
1681
+ 119,
1682
+ 214,
1683
+ 38,
1684
+ 225,
1685
+ 105,
1686
+ 20,
1687
+ 99,
1688
+ 85,
1689
+ 33,
1690
+ 12,
1691
+ 125
1692
+ ]);
1693
+ var RCON = new Uint8Array([1, 2, 4, 8, 16, 32, 64, 128, 27, 54]);
1694
+ function gmul(a, b) {
1695
+ let p = 0;
1696
+ for (let i = 0; i < 8; i++) {
1697
+ if (b & 1) p ^= a;
1698
+ const hi = a & 128;
1699
+ a = a << 1 & 255;
1700
+ if (hi) a ^= 27;
1701
+ b >>= 1;
1702
+ }
1703
+ return p;
1704
+ }
1705
+ function expandKey(key) {
1706
+ const w = new Uint32Array(44);
1707
+ for (let i = 0; i < 4; i++) {
1708
+ w[i] = key[4 * i] << 24 | key[4 * i + 1] << 16 | key[4 * i + 2] << 8 | key[4 * i + 3];
1709
+ }
1710
+ for (let i = 4; i < 44; i++) {
1711
+ let temp = w[i - 1];
1712
+ if (i % 4 === 0) {
1713
+ temp = (temp << 8 | temp >>> 24) >>> 0;
1714
+ temp = S_BOX[temp >>> 24 & 255] << 24 | S_BOX[temp >>> 16 & 255] << 16 | S_BOX[temp >>> 8 & 255] << 8 | S_BOX[temp & 255];
1715
+ temp = (temp ^ RCON[i / 4 - 1] << 24) >>> 0;
1716
+ }
1717
+ w[i] = (w[i - 4] ^ temp) >>> 0;
1718
+ }
1719
+ return w;
1720
+ }
1721
+ function decryptBlock(block, roundKeys) {
1722
+ const s = new Uint8Array(16);
1723
+ for (let i = 0; i < 16; i++) s[i] = block[i];
1724
+ addRoundKey(s, roundKeys, 10);
1725
+ for (let round = 9; round >= 1; round--) {
1726
+ invShiftRows(s);
1727
+ invSubBytes(s);
1728
+ addRoundKey(s, roundKeys, round);
1729
+ invMixColumns(s);
1730
+ }
1731
+ invShiftRows(s);
1732
+ invSubBytes(s);
1733
+ addRoundKey(s, roundKeys, 0);
1734
+ return s;
1735
+ }
1736
+ function addRoundKey(s, w, round) {
1737
+ const base = round * 4;
1738
+ for (let c = 0; c < 4; c++) {
1739
+ const k = w[base + c];
1740
+ s[c * 4] ^= k >>> 24 & 255;
1741
+ s[c * 4 + 1] ^= k >>> 16 & 255;
1742
+ s[c * 4 + 2] ^= k >>> 8 & 255;
1743
+ s[c * 4 + 3] ^= k & 255;
1744
+ }
1745
+ }
1746
+ function invSubBytes(s) {
1747
+ for (let i = 0; i < 16; i++) s[i] = INV_S_BOX[s[i]];
1748
+ }
1749
+ function invShiftRows(s) {
1750
+ let t = s[13];
1751
+ s[13] = s[9];
1752
+ s[9] = s[5];
1753
+ s[5] = s[1];
1754
+ s[1] = t;
1755
+ t = s[2];
1756
+ s[2] = s[10];
1757
+ s[10] = t;
1758
+ t = s[6];
1759
+ s[6] = s[14];
1760
+ s[14] = t;
1761
+ t = s[3];
1762
+ s[3] = s[7];
1763
+ s[7] = s[11];
1764
+ s[11] = s[15];
1765
+ s[15] = t;
1766
+ }
1767
+ function invMixColumns(s) {
1768
+ for (let c = 0; c < 4; c++) {
1769
+ const i = c * 4;
1770
+ const a0 = s[i], a1 = s[i + 1], a2 = s[i + 2], a3 = s[i + 3];
1771
+ s[i] = gmul(a0, 14) ^ gmul(a1, 11) ^ gmul(a2, 13) ^ gmul(a3, 9);
1772
+ s[i + 1] = gmul(a0, 9) ^ gmul(a1, 14) ^ gmul(a2, 11) ^ gmul(a3, 13);
1773
+ s[i + 2] = gmul(a0, 13) ^ gmul(a1, 9) ^ gmul(a2, 14) ^ gmul(a3, 11);
1774
+ s[i + 3] = gmul(a0, 11) ^ gmul(a1, 13) ^ gmul(a2, 9) ^ gmul(a3, 14);
1775
+ }
1776
+ }
1777
+ function aes128EcbDecrypt(data, key) {
1778
+ if (key.length !== 16) throw new Error("AES-128 \uD0A4\uB294 16\uBC14\uC774\uD2B8\uC5EC\uC57C \uD569\uB2C8\uB2E4");
1779
+ if (data.length % 16 !== 0) throw new Error("AES ECB \uC785\uB825\uC740 16\uBC14\uC774\uD2B8\uC758 \uBC30\uC218\uC5EC\uC57C \uD569\uB2C8\uB2E4");
1780
+ const roundKeys = expandKey(key);
1781
+ const out = new Uint8Array(data.length);
1782
+ for (let offset = 0; offset < data.length; offset += 16) {
1783
+ const block = data.subarray(offset, offset + 16);
1784
+ const decrypted = decryptBlock(block, roundKeys);
1785
+ out.set(decrypted, offset);
1786
+ }
1787
+ return out;
1788
+ }
1789
+
1790
+ // src/hwp5/crypto.ts
1791
+ var MsvcLcg = class {
1792
+ seed;
1793
+ constructor(seed) {
1794
+ this.seed = seed >>> 0;
1795
+ }
1796
+ /** 0 ~ 0x7FFF 범위 난수 반환 (MSVC rand() 호환) */
1797
+ rand() {
1798
+ this.seed = Math.imul(this.seed, 214013) + 2531011 >>> 0;
1799
+ return this.seed >>> 16 & 32767;
1800
+ }
1801
+ };
1802
+ function decryptDistributePayload(payload) {
1803
+ if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
1804
+ const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
1805
+ const lcg = new MsvcLcg(seed);
1806
+ const result = new Uint8Array(256);
1807
+ result[0] = payload[0];
1808
+ result[1] = payload[1];
1809
+ result[2] = payload[2];
1810
+ result[3] = payload[3];
1811
+ let i = 4;
1812
+ while (i < 256) {
1813
+ const keyByte = lcg.rand() & 255;
1814
+ const n = (lcg.rand() & 15) + 1;
1815
+ for (let j = 0; j < n && i < 256; j++, i++) {
1816
+ result[i] = payload[i] ^ keyByte;
1817
+ }
1818
+ }
1819
+ return result;
1820
+ }
1821
+ function extractAesKey(decryptedPayload) {
1822
+ const offset = 4 + (decryptedPayload[0] & 15);
1823
+ if (offset + 16 > decryptedPayload.length) {
1824
+ throw new Error("AES \uD0A4 \uCD94\uCD9C \uC2E4\uD328: \uC624\uD504\uC14B\uC774 payload \uBC94\uC704\uB97C \uCD08\uACFC\uD569\uB2C8\uB2E4");
1825
+ }
1826
+ return decryptedPayload.slice(offset, offset + 16);
1827
+ }
1828
+ function parseRecordHeader(data, offset) {
1829
+ if (offset + 4 > data.length) throw new Error("\uB808\uCF54\uB4DC \uD5E4\uB354 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
1830
+ const header = (data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 | data[offset + 3] << 24) >>> 0;
1831
+ const tagId = header & 1023;
1832
+ let size = header >>> 20 & 4095;
1833
+ let headerSize = 4;
1834
+ if (size === 4095) {
1835
+ if (offset + 8 > data.length) throw new Error("\uD655\uC7A5 \uB808\uCF54\uB4DC \uD06C\uAE30 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
1836
+ size = (data[offset + 4] | data[offset + 5] << 8 | data[offset + 6] << 16 | data[offset + 7] << 24) >>> 0;
1837
+ headerSize = 8;
1838
+ }
1839
+ return { tagId, size, headerSize };
1840
+ }
1841
+ var TAG_DISTRIBUTE_DOC_DATA = 16 + 28;
1842
+ function decryptViewText(viewTextRaw, compressed) {
1843
+ const data = new Uint8Array(viewTextRaw);
1844
+ const rec = parseRecordHeader(data, 0);
1845
+ if (rec.tagId !== TAG_DISTRIBUTE_DOC_DATA) {
1846
+ throw new Error(`\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC758 \uCCAB \uB808\uCF54\uB4DC\uAC00 DISTRIBUTE_DOC_DATA(${TAG_DISTRIBUTE_DOC_DATA})\uAC00 \uC544\uB2D9\uB2C8\uB2E4 (\uC2E4\uC81C: ${rec.tagId})`);
1847
+ }
1848
+ const payloadStart = rec.headerSize;
1849
+ const payloadEnd = payloadStart + rec.size;
1850
+ if (payloadEnd > data.length || rec.size < 256) {
1851
+ throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 \uC720\uD6A8\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1852
+ }
1853
+ const payload = data.subarray(payloadStart, payloadStart + 256);
1854
+ const decryptedPayload = decryptDistributePayload(payload);
1855
+ const aesKey = extractAesKey(decryptedPayload);
1856
+ const encryptedStart = payloadEnd;
1857
+ const encryptedData = data.subarray(encryptedStart);
1858
+ if (encryptedData.length === 0) {
1859
+ throw new Error("\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC5D0 \uC554\uD638\uD654\uB41C \uBCF8\uBB38 \uB370\uC774\uD130\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
1860
+ }
1861
+ const alignedLen = encryptedData.length - encryptedData.length % 16;
1862
+ if (alignedLen === 0) {
1863
+ throw new Error("\uC554\uD638\uD654\uB41C \uB370\uC774\uD130\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (16\uBC14\uC774\uD2B8 \uBBF8\uB9CC)");
1864
+ }
1865
+ const alignedData = encryptedData.subarray(0, alignedLen);
1866
+ const decrypted = aes128EcbDecrypt(alignedData, aesKey);
1867
+ if (compressed) {
1868
+ try {
1869
+ return decompressStream(Buffer.from(decrypted));
1870
+ } catch {
1871
+ return Buffer.from(decrypted);
1872
+ }
1873
+ }
1874
+ return Buffer.from(decrypted);
1875
+ }
1876
+
1877
+ // src/hwp5/cfb-lenient.ts
1878
+ var CFB_MAGIC = Buffer.from([208, 207, 17, 224, 161, 177, 26, 225]);
1879
+ var END_OF_CHAIN = 4294967294;
1880
+ var FREE_SECT = 4294967295;
1881
+ var MAX_CHAIN_LENGTH = 1e6;
1882
+ var MAX_DIR_ENTRIES = 1e5;
1883
+ var MAX_STREAM_SIZE = 100 * 1024 * 1024;
1884
+ function parseLenientCfb(data) {
1885
+ if (data.length < 512) throw new Error("CFB \uD30C\uC77C\uC774 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 512\uBC14\uC774\uD2B8)");
1886
+ if (!data.subarray(0, 8).equals(CFB_MAGIC)) throw new Error("CFB \uB9E4\uC9C1 \uBC14\uC774\uD2B8 \uBD88\uC77C\uCE58");
1887
+ const sectorSizeShift = data.readUInt16LE(30);
1888
+ if (sectorSizeShift < 7 || sectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + sectorSizeShift);
1889
+ const sectorSize = 1 << sectorSizeShift;
1890
+ const miniSectorSizeShift = data.readUInt16LE(32);
1891
+ if (miniSectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uBBF8\uB2C8 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + miniSectorSizeShift);
1892
+ const miniSectorSize = 1 << miniSectorSizeShift;
1893
+ const fatSectorCount = data.readUInt32LE(44);
1894
+ const firstDirSector = data.readUInt32LE(48);
1895
+ const miniStreamCutoff = data.readUInt32LE(56);
1896
+ const firstMiniFatSector = data.readUInt32LE(60);
1897
+ const miniFatSectorCount = data.readUInt32LE(64);
1898
+ const firstDifatSector = data.readUInt32LE(68);
1899
+ const difatSectorCount = data.readUInt32LE(72);
1900
+ function sectorOffset(id) {
1901
+ return 512 + id * sectorSize;
1902
+ }
1903
+ function readSectorData(id) {
1904
+ const off = sectorOffset(id);
1905
+ if (off + sectorSize > data.length) return Buffer.alloc(0);
1906
+ return data.subarray(off, off + sectorSize);
1907
+ }
1908
+ const fatSectors = [];
1909
+ for (let i = 0; i < 109 && fatSectors.length < fatSectorCount; i++) {
1910
+ const sid = data.readUInt32LE(76 + i * 4);
1911
+ if (sid === FREE_SECT || sid === END_OF_CHAIN) break;
1912
+ fatSectors.push(sid);
1913
+ }
1914
+ let difatSector = firstDifatSector;
1915
+ const visitedDifat = /* @__PURE__ */ new Set();
1916
+ for (let d = 0; d < difatSectorCount && difatSector !== END_OF_CHAIN && difatSector !== FREE_SECT; d++) {
1917
+ if (visitedDifat.has(difatSector)) break;
1918
+ visitedDifat.add(difatSector);
1919
+ const buf = readSectorData(difatSector);
1920
+ const entriesPerSector = sectorSize / 4 - 1;
1921
+ for (let i = 0; i < entriesPerSector && fatSectors.length < fatSectorCount; i++) {
1922
+ const sid = buf.readUInt32LE(i * 4);
1923
+ if (sid === FREE_SECT || sid === END_OF_CHAIN) continue;
1924
+ fatSectors.push(sid);
1925
+ }
1926
+ difatSector = buf.readUInt32LE(entriesPerSector * 4);
1927
+ }
1928
+ const entriesPerFatSector = sectorSize / 4;
1929
+ const fatTable = new Uint32Array(fatSectors.length * entriesPerFatSector);
1930
+ for (let fi = 0; fi < fatSectors.length; fi++) {
1931
+ const buf = readSectorData(fatSectors[fi]);
1932
+ for (let i = 0; i < entriesPerFatSector; i++) {
1933
+ fatTable[fi * entriesPerFatSector + i] = i * 4 + 3 < buf.length ? buf.readUInt32LE(i * 4) : FREE_SECT;
1934
+ }
1935
+ }
1936
+ function readChain(startSector, maxBytes) {
1937
+ if (startSector === END_OF_CHAIN || startSector === FREE_SECT) return Buffer.alloc(0);
1938
+ if (maxBytes > MAX_STREAM_SIZE) throw new Error("\uC2A4\uD2B8\uB9BC\uC774 \uB108\uBB34 \uD07D\uB2C8\uB2E4");
1939
+ const chunks = [];
1940
+ let current = startSector;
1941
+ let totalRead = 0;
1942
+ const visited = /* @__PURE__ */ new Set();
1943
+ while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < maxBytes) {
1944
+ if (visited.has(current)) break;
1945
+ if (visited.size > MAX_CHAIN_LENGTH) break;
1946
+ visited.add(current);
1947
+ const buf = readSectorData(current);
1948
+ const remaining = maxBytes - totalRead;
1949
+ chunks.push(remaining < sectorSize ? buf.subarray(0, remaining) : buf);
1950
+ totalRead += Math.min(buf.length, remaining);
1951
+ current = current < fatTable.length ? fatTable[current] : END_OF_CHAIN;
1952
+ }
1953
+ return Buffer.concat(chunks);
1954
+ }
1955
+ let miniFatTable = null;
1956
+ function getMiniFatTable() {
1957
+ if (miniFatTable) return miniFatTable;
1958
+ if (miniFatSectorCount === 0 || firstMiniFatSector === END_OF_CHAIN) {
1959
+ miniFatTable = new Uint32Array(0);
1960
+ return miniFatTable;
1961
+ }
1962
+ const miniFatData = readChain(firstMiniFatSector, miniFatSectorCount * sectorSize);
1963
+ const entries = miniFatData.length / 4;
1964
+ miniFatTable = new Uint32Array(entries);
1965
+ for (let i = 0; i < entries; i++) {
1966
+ miniFatTable[i] = miniFatData.readUInt32LE(i * 4);
1967
+ }
1968
+ return miniFatTable;
1969
+ }
1970
+ const dirData = readChain(firstDirSector, MAX_DIR_ENTRIES * 128);
1971
+ const dirEntries = [];
1972
+ for (let offset = 0; offset + 128 <= dirData.length && dirEntries.length < MAX_DIR_ENTRIES; offset += 128) {
1973
+ const nameLen = dirData.readUInt16LE(offset + 64);
1974
+ if (nameLen <= 0 || nameLen > 64) {
1975
+ dirEntries.push({ name: "", type: 0, startSector: 0, size: 0 });
1976
+ continue;
1977
+ }
1978
+ const nameBytes = nameLen - 2;
1979
+ const name = nameBytes > 0 ? dirData.subarray(offset, offset + nameBytes).toString("utf16le") : "";
1980
+ const type = dirData[offset + 66];
1981
+ const startSector = dirData.readUInt32LE(offset + 116);
1982
+ const size = dirData.readUInt32LE(offset + 120);
1983
+ dirEntries.push({ name, type, startSector, size });
1984
+ }
1985
+ let miniStreamData = null;
1986
+ function getMiniStream() {
1987
+ if (miniStreamData) return miniStreamData;
1988
+ const root = dirEntries[0];
1989
+ if (!root || root.type !== 5) {
1990
+ miniStreamData = Buffer.alloc(0);
1991
+ return miniStreamData;
1992
+ }
1993
+ miniStreamData = readChain(root.startSector, root.size || MAX_STREAM_SIZE);
1994
+ return miniStreamData;
1995
+ }
1996
+ function readMiniStream(startSector, size) {
1997
+ const mft = getMiniFatTable();
1998
+ const ms = getMiniStream();
1999
+ if (mft.length === 0 || ms.length === 0) return Buffer.alloc(0);
2000
+ const chunks = [];
2001
+ let current = startSector;
2002
+ let totalRead = 0;
2003
+ const visited = /* @__PURE__ */ new Set();
2004
+ while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < size) {
2005
+ if (visited.has(current)) break;
2006
+ if (visited.size > MAX_CHAIN_LENGTH) break;
2007
+ visited.add(current);
2008
+ const off = current * miniSectorSize;
2009
+ const remaining = size - totalRead;
2010
+ const chunkSize = Math.min(miniSectorSize, remaining);
2011
+ if (off + chunkSize <= ms.length) {
2012
+ chunks.push(ms.subarray(off, off + chunkSize));
2013
+ }
2014
+ totalRead += chunkSize;
2015
+ current = current < mft.length ? mft[current] : END_OF_CHAIN;
2016
+ }
2017
+ return Buffer.concat(chunks);
2018
+ }
2019
+ function readStreamData(entry) {
2020
+ if (entry.size === 0) return Buffer.alloc(0);
2021
+ if (entry.size < miniStreamCutoff) {
2022
+ const miniResult = readMiniStream(entry.startSector, entry.size);
2023
+ if (miniResult.length > 0) return miniResult;
2024
+ }
2025
+ return readChain(entry.startSector, entry.size);
2026
+ }
2027
+ function findEntryByPath(path) {
2028
+ const parts = path.replace(/^\//, "").split("/");
2029
+ if (parts.length === 1) {
2030
+ return dirEntries.find((e) => e.name === parts[0] && e.type === 2) ?? null;
2031
+ }
2032
+ const storageName = parts[0];
2033
+ const streamName = parts.slice(1).join("/");
2034
+ for (const e of dirEntries) {
2035
+ if (e.type === 2 && e.name === streamName) {
2036
+ return e;
2037
+ }
2038
+ }
2039
+ const lastPart = parts[parts.length - 1];
2040
+ return dirEntries.find((e) => e.type === 2 && e.name === lastPart) ?? null;
2041
+ }
2042
+ return {
2043
+ findStream(path) {
2044
+ const normalized = path.replace(/^\//, "");
2045
+ const entry = findEntryByPath(normalized);
2046
+ if (!entry || entry.type !== 2) return null;
2047
+ const stream = readStreamData(entry);
2048
+ return stream.length > 0 ? stream : null;
2049
+ },
2050
+ entries() {
2051
+ return dirEntries.filter((e) => e.type === 2);
2052
+ }
2053
+ };
2054
+ }
2055
+
1124
2056
  // src/hwp5/parser.ts
1125
2057
  import { createRequire } from "module";
1126
2058
  var require2 = createRequire(import.meta.url);
@@ -1128,21 +2060,40 @@ var CFB = require2("cfb");
1128
2060
  var MAX_SECTIONS = 100;
1129
2061
  var MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024;
1130
2062
  function parseHwp5Document(buffer, options) {
1131
- const cfb = CFB.parse(buffer);
1132
- const headerEntry = CFB.find(cfb, "/FileHeader");
1133
- if (!headerEntry?.content) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
1134
- const header = parseFileHeader(Buffer.from(headerEntry.content));
2063
+ let cfb = null;
2064
+ let lenientCfb = null;
2065
+ const warnings = [];
2066
+ try {
2067
+ cfb = CFB.parse(buffer);
2068
+ } catch {
2069
+ try {
2070
+ lenientCfb = parseLenientCfb(buffer);
2071
+ warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
2072
+ } catch {
2073
+ throw new KordocError("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
2074
+ }
2075
+ }
2076
+ const findStream = (path) => {
2077
+ if (cfb) {
2078
+ const entry = CFB.find(cfb, path);
2079
+ return entry?.content ? Buffer.from(entry.content) : null;
2080
+ }
2081
+ return lenientCfb.findStream(path);
2082
+ };
2083
+ const headerData = findStream("/FileHeader");
2084
+ if (!headerData) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
2085
+ const header = parseFileHeader(headerData);
1135
2086
  if (header.signature !== "HWP Document File") throw new KordocError("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
1136
2087
  if (header.flags & FLAG_ENCRYPTED) throw new KordocError("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1137
2088
  if (header.flags & FLAG_DRM) throw new KordocError("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1138
2089
  const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
2090
+ const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
1139
2091
  const metadata = {
1140
2092
  version: `${header.versionMajor}.x`
1141
2093
  };
1142
- extractHwp5Metadata(cfb, metadata);
1143
- const docInfo = parseDocInfoStream(cfb, compressed);
1144
- const warnings = [];
1145
- const sections = findSections(cfb);
2094
+ if (cfb) extractHwp5Metadata(cfb, metadata);
2095
+ const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
2096
+ const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
1146
2097
  if (sections.length === 0) throw new KordocError("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
1147
2098
  metadata.pageCount = sections.length;
1148
2099
  const pageFilter = options?.pages ? parsePageRange(options.pages, sections.length) : null;
@@ -1154,7 +2105,7 @@ function parseHwp5Document(buffer, options) {
1154
2105
  if (pageFilter && !pageFilter.has(si + 1)) continue;
1155
2106
  try {
1156
2107
  const sectionData = sections[si];
1157
- const data = compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
2108
+ const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
1158
2109
  totalDecompressed += data.length;
1159
2110
  if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new KordocError("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
1160
2111
  const records = readRecords(data);
@@ -1167,7 +2118,7 @@ function parseHwp5Document(buffer, options) {
1167
2118
  warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
1168
2119
  }
1169
2120
  }
1170
- const images = extractHwp5Images(cfb, blocks, compressed, warnings);
2121
+ const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
1171
2122
  if (docInfo) {
1172
2123
  detectHwp5Headings(blocks, docInfo);
1173
2124
  }
@@ -1186,6 +2137,15 @@ function parseDocInfoStream(cfb, compressed) {
1186
2137
  return null;
1187
2138
  }
1188
2139
  }
2140
+ function parseDocInfoFromStream(raw, compressed) {
2141
+ if (!raw) return null;
2142
+ try {
2143
+ const data = compressed ? decompressStream(raw) : raw;
2144
+ return parseDocInfo(readRecords(data));
2145
+ } catch {
2146
+ return null;
2147
+ }
2148
+ }
1189
2149
  function detectHwp5Headings(blocks, docInfo) {
1190
2150
  let baseFontSize = 0;
1191
2151
  for (const style of docInfo.styles) {
@@ -1279,6 +2239,20 @@ function extractHwp5MetadataOnly(buffer) {
1279
2239
  metadata.pageCount = sections.length;
1280
2240
  return metadata;
1281
2241
  }
2242
+ function findViewTextSections(cfb, compressed) {
2243
+ const sections = [];
2244
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2245
+ const entry = CFB.find(cfb, `/ViewText/Section${i}`);
2246
+ if (!entry?.content) break;
2247
+ try {
2248
+ const decrypted = decryptViewText(Buffer.from(entry.content), compressed);
2249
+ sections.push({ idx: i, content: decrypted });
2250
+ } catch {
2251
+ break;
2252
+ }
2253
+ }
2254
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2255
+ }
1282
2256
  function findSections(cfb) {
1283
2257
  const sections = [];
1284
2258
  for (let i = 0; i < MAX_SECTIONS; i++) {
@@ -1297,6 +2271,38 @@ function findSections(cfb) {
1297
2271
  }
1298
2272
  return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
1299
2273
  }
2274
+ function findSectionsLenient(lcfb, compressed) {
2275
+ const sections = [];
2276
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2277
+ const raw = lcfb.findStream(`/BodyText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
2278
+ if (!raw) break;
2279
+ sections.push({ idx: i, content: compressed ? decompressStream(raw) : raw });
2280
+ }
2281
+ if (sections.length === 0) {
2282
+ for (const e of lcfb.entries()) {
2283
+ if (sections.length >= MAX_SECTIONS) break;
2284
+ if (e.name.startsWith("Section")) {
2285
+ const idx = parseInt(e.name.replace("Section", ""), 10) || 0;
2286
+ const raw = lcfb.findStream(e.name);
2287
+ if (raw) sections.push({ idx, content: compressed ? decompressStream(raw) : raw });
2288
+ }
2289
+ }
2290
+ }
2291
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2292
+ }
2293
+ function findViewTextSectionsLenient(lcfb, compressed) {
2294
+ const sections = [];
2295
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2296
+ const raw = lcfb.findStream(`/ViewText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
2297
+ if (!raw) break;
2298
+ try {
2299
+ sections.push({ idx: i, content: decryptViewText(raw, compressed) });
2300
+ } catch {
2301
+ break;
2302
+ }
2303
+ }
2304
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2305
+ }
1300
2306
  var TAG_SHAPE_COMPONENT = 74;
1301
2307
  function extractBinDataId(records, ctrlIdx) {
1302
2308
  const ctrlLevel = records[ctrlIdx].level;
@@ -1371,6 +2377,53 @@ function extractHwp5Images(cfb, blocks, compressed, warnings) {
1371
2377
  }
1372
2378
  return images;
1373
2379
  }
2380
+ function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
2381
+ const binDataMap = /* @__PURE__ */ new Map();
2382
+ const binRe = /^BIN(\d{4})/i;
2383
+ for (const e of lcfb.entries()) {
2384
+ const match = e.name.match(binRe);
2385
+ if (!match) continue;
2386
+ const idx = parseInt(match[1], 10);
2387
+ let raw = lcfb.findStream(e.name);
2388
+ if (!raw) continue;
2389
+ if (compressed) {
2390
+ try {
2391
+ raw = decompressStream(raw);
2392
+ } catch {
2393
+ }
2394
+ }
2395
+ binDataMap.set(idx, { data: raw, name: e.name });
2396
+ }
2397
+ if (binDataMap.size === 0) return [];
2398
+ const images = [];
2399
+ let imageIndex = 0;
2400
+ for (const block of blocks) {
2401
+ if (block.type !== "image" || !block.text) continue;
2402
+ const binId = parseInt(block.text, 10);
2403
+ if (isNaN(binId)) continue;
2404
+ const bin = binDataMap.get(binId);
2405
+ if (!bin) {
2406
+ warnings.push({ page: block.pageNumber, message: `BinData ${binId} \uFFFD\uFFFD\uFFFD\uC74C`, code: "SKIPPED_IMAGE" });
2407
+ block.type = "paragraph";
2408
+ block.text = `[\uC774\uBBF8\uC9C0: BinData ${binId}]`;
2409
+ continue;
2410
+ }
2411
+ const mime = detectImageMime(bin.data);
2412
+ if (!mime) {
2413
+ warnings.push({ page: block.pageNumber, message: `BinData ${binId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
2414
+ block.type = "paragraph";
2415
+ block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
2416
+ continue;
2417
+ }
2418
+ imageIndex++;
2419
+ const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
2420
+ const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
2421
+ images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
2422
+ block.text = filename;
2423
+ block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
2424
+ }
2425
+ return images;
2426
+ }
1374
2427
  function parseSection(records, docInfo, warnings, sectionNum) {
1375
2428
  const blocks = [];
1376
2429
  let i = 0;
@@ -1407,12 +2460,61 @@ function parseSection(records, docInfo, warnings, sectionNum) {
1407
2460
  }
1408
2461
  } else if (ctrlId === " elo" || ctrlId === "ole ") {
1409
2462
  warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
2463
+ } else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
2464
+ const noteText = extractNoteText(records, i);
2465
+ if (noteText && blocks.length > 0) {
2466
+ const lastBlock = blocks[blocks.length - 1];
2467
+ if (lastBlock.type === "paragraph") {
2468
+ lastBlock.footnoteText = lastBlock.footnoteText ? lastBlock.footnoteText + "; " + noteText : noteText;
2469
+ }
2470
+ }
2471
+ } else if (ctrlId === "%tok" || ctrlId === "klnk") {
2472
+ const url = extractHyperlinkUrl(rec.data);
2473
+ if (url && blocks.length > 0) {
2474
+ const lastBlock = blocks[blocks.length - 1];
2475
+ if (lastBlock.type === "paragraph" && !lastBlock.href) {
2476
+ lastBlock.href = sanitizeHref(url) ?? void 0;
2477
+ }
2478
+ }
1410
2479
  }
1411
2480
  }
1412
2481
  i++;
1413
2482
  }
1414
2483
  return blocks;
1415
2484
  }
2485
+ function extractNoteText(records, ctrlIdx) {
2486
+ const ctrlLevel = records[ctrlIdx].level;
2487
+ const texts = [];
2488
+ for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 100; j++) {
2489
+ const r = records[j];
2490
+ if (r.level <= ctrlLevel) break;
2491
+ if (r.tagId === TAG_PARA_TEXT) {
2492
+ const t = extractText(r.data).trim();
2493
+ if (t) texts.push(t);
2494
+ }
2495
+ }
2496
+ return texts.length > 0 ? texts.join(" ") : null;
2497
+ }
2498
+ function extractHyperlinkUrl(data) {
2499
+ try {
2500
+ const httpSig = Buffer.from("http", "utf16le");
2501
+ const idx = data.indexOf(httpSig);
2502
+ if (idx >= 0) {
2503
+ let end = idx;
2504
+ while (end + 1 < data.length) {
2505
+ const ch = data.readUInt16LE(end);
2506
+ if (ch === 0) break;
2507
+ end += 2;
2508
+ }
2509
+ const url = data.subarray(idx, end).toString("utf16le");
2510
+ if (/^https?:\/\/.+/.test(url) && url.length < 2e3) {
2511
+ return url;
2512
+ }
2513
+ }
2514
+ } catch {
2515
+ }
2516
+ return null;
2517
+ }
1416
2518
  function resolveCharStyle(charShapeIds, docInfo) {
1417
2519
  if (charShapeIds.length === 0 || docInfo.charShapes.length === 0) return void 0;
1418
2520
  const freq = /* @__PURE__ */ new Map();
@@ -4261,4 +5363,4 @@ export {
4261
5363
  extractFormFields,
4262
5364
  parse
4263
5365
  };
4264
- //# sourceMappingURL=chunk-QQ6PZADA.js.map
5366
+ //# sourceMappingURL=chunk-UUHAAZYN.js.map