kordoc 1.8.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -181,35 +181,106 @@ var import_jszip2 = __toESM(require("jszip"), 1);
181
181
  var import_zlib = require("zlib");
182
182
  var import_xmldom = require("@xmldom/xmldom");
183
183
 
184
- // src/table/builder.ts
184
+ // src/utils.ts
185
+ var VERSION = true ? "2.0.0" : "0.0.0-dev";
186
+ function toArrayBuffer(buf) {
187
+ if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
188
+ return buf.buffer;
189
+ }
190
+ return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
191
+ }
192
+ var KordocError = class extends Error {
193
+ constructor(message) {
194
+ super(message);
195
+ this.name = "KordocError";
196
+ }
197
+ };
198
+ function isPathTraversal(name) {
199
+ if (name.includes("\0")) return true;
200
+ const normalized = name.replace(/\\/g, "/");
201
+ return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
202
+ }
203
+ function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
204
+ try {
205
+ const data = new DataView(buffer);
206
+ const len = buffer.byteLength;
207
+ let eocdOffset = -1;
208
+ for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
209
+ if (data.getUint32(i, true) === 101010256) {
210
+ eocdOffset = i;
211
+ break;
212
+ }
213
+ }
214
+ if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
215
+ const entryCount = data.getUint16(eocdOffset + 10, true);
216
+ if (entryCount > maxEntries) {
217
+ throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
218
+ }
219
+ const cdSize = data.getUint32(eocdOffset + 12, true);
220
+ const cdOffset = data.getUint32(eocdOffset + 16, true);
221
+ if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
222
+ let totalUncompressed = 0;
223
+ let pos = cdOffset;
224
+ for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
225
+ if (data.getUint32(pos, true) !== 33639248) break;
226
+ totalUncompressed += data.getUint32(pos + 24, true);
227
+ const nameLen = data.getUint16(pos + 28, true);
228
+ const extraLen = data.getUint16(pos + 30, true);
229
+ const commentLen = data.getUint16(pos + 32, true);
230
+ pos += 46 + nameLen + extraLen + commentLen;
231
+ }
232
+ if (totalUncompressed > maxUncompressedSize) {
233
+ throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
234
+ }
235
+ return { totalUncompressed, entryCount };
236
+ } catch (err) {
237
+ if (err instanceof KordocError) throw err;
238
+ return { totalUncompressed: 0, entryCount: 0 };
239
+ }
240
+ }
185
241
  var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
186
242
  function sanitizeHref(href) {
187
243
  const trimmed = href.trim();
188
244
  if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
189
245
  return trimmed;
190
246
  }
247
+ function classifyError(err) {
248
+ if (!(err instanceof Error)) return "PARSE_ERROR";
249
+ const msg = err.message;
250
+ if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
251
+ if (msg.includes("DRM")) return "DRM_PROTECTED";
252
+ if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
253
+ if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
254
+ if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
255
+ if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
256
+ if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
257
+ return "PARSE_ERROR";
258
+ }
259
+
260
+ // src/table/builder.ts
191
261
  var MAX_COLS = 200;
192
262
  var MAX_ROWS = 1e4;
193
263
  function buildTable(rows) {
194
264
  if (rows.length > MAX_ROWS) rows = rows.slice(0, MAX_ROWS);
195
265
  const numRows = rows.length;
196
- const tempOccupied = /* @__PURE__ */ new Set();
266
+ const hasAddr = rows.some((row) => row.some((c) => c.colAddr !== void 0 && c.rowAddr !== void 0));
267
+ if (hasAddr) return buildTableDirect(rows, numRows);
197
268
  let maxCols = 0;
269
+ const tempOccupied = Array.from({ length: numRows }, () => []);
198
270
  for (let rowIdx = 0; rowIdx < numRows; rowIdx++) {
199
271
  let colIdx = 0;
200
272
  for (const cell of rows[rowIdx]) {
201
- while (colIdx < MAX_COLS && tempOccupied.has(rowIdx * MAX_COLS + colIdx)) colIdx++;
273
+ while (colIdx < MAX_COLS && tempOccupied[rowIdx][colIdx]) colIdx++;
202
274
  if (colIdx >= MAX_COLS) break;
203
275
  for (let r = rowIdx; r < Math.min(rowIdx + cell.rowSpan, numRows); r++) {
204
276
  for (let c = colIdx; c < Math.min(colIdx + cell.colSpan, MAX_COLS); c++) {
205
- tempOccupied.add(r * MAX_COLS + c);
277
+ tempOccupied[r][c] = true;
206
278
  }
207
279
  }
208
280
  colIdx += cell.colSpan;
209
281
  if (colIdx > maxCols) maxCols = colIdx;
210
282
  }
211
283
  }
212
- tempOccupied.clear();
213
284
  if (maxCols === 0) return { rows: 0, cols: 0, cells: [], hasHeader: false };
214
285
  const grid = Array.from(
215
286
  { length: numRows },
@@ -237,6 +308,40 @@ function buildTable(rows) {
237
308
  cellIdx++;
238
309
  }
239
310
  }
311
+ return trimAndReturn(grid, numRows, maxCols);
312
+ }
313
+ function buildTableDirect(rows, numRows) {
314
+ let maxCols = 0;
315
+ for (const row of rows) {
316
+ for (const cell of row) {
317
+ const end = (cell.colAddr ?? 0) + cell.colSpan;
318
+ if (end > maxCols) maxCols = end;
319
+ }
320
+ }
321
+ if (maxCols === 0) return { rows: 0, cols: 0, cells: [], hasHeader: false };
322
+ const grid = Array.from(
323
+ { length: numRows },
324
+ () => Array.from({ length: maxCols }, () => ({ text: "", colSpan: 1, rowSpan: 1 }))
325
+ );
326
+ for (const row of rows) {
327
+ for (const cell of row) {
328
+ const r = cell.rowAddr ?? 0;
329
+ const c = cell.colAddr ?? 0;
330
+ if (r >= numRows || c >= maxCols) continue;
331
+ grid[r][c] = { text: cell.text.trim(), colSpan: cell.colSpan, rowSpan: cell.rowSpan };
332
+ for (let dr = 0; dr < cell.rowSpan; dr++) {
333
+ for (let dc = 0; dc < cell.colSpan; dc++) {
334
+ if (dr === 0 && dc === 0) continue;
335
+ if (r + dr < numRows && c + dc < maxCols) {
336
+ grid[r + dr][c + dc] = { text: "", colSpan: 1, rowSpan: 1 };
337
+ }
338
+ }
339
+ }
340
+ }
341
+ }
342
+ return trimAndReturn(grid, numRows, maxCols);
343
+ }
344
+ function trimAndReturn(grid, numRows, maxCols) {
240
345
  let effectiveCols = maxCols;
241
346
  while (effectiveCols > 0) {
242
347
  const colEmpty = grid.every((row) => !row[effectiveCols - 1]?.text?.trim());
@@ -352,12 +457,10 @@ function tableToMarkdown(table) {
352
457
  const display = Array.from({ length: numRows }, () => Array(numCols).fill(""));
353
458
  const skip = /* @__PURE__ */ new Set();
354
459
  for (let r = 0; r < numRows; r++) {
355
- let cellIdx = 0;
356
460
  for (let c = 0; c < numCols; c++) {
357
461
  if (skip.has(`${r},${c}`)) continue;
358
- const cell = cells[r]?.[cellIdx];
359
- if (!cell) break;
360
- cellIdx++;
462
+ const cell = cells[r]?.[c];
463
+ if (!cell) continue;
361
464
  display[r][c] = sanitizeText(cell.text).replace(/\n/g, "<br>");
362
465
  for (let dr = 0; dr < cell.rowSpan; dr++) {
363
466
  for (let dc = 0; dc < cell.colSpan; dc++) {
@@ -405,82 +508,6 @@ var HEADING_RATIO_H1 = 1.5;
405
508
  var HEADING_RATIO_H2 = 1.3;
406
509
  var HEADING_RATIO_H3 = 1.15;
407
510
 
408
- // src/utils.ts
409
- var VERSION = true ? "1.8.0" : "0.0.0-dev";
410
- function toArrayBuffer(buf) {
411
- if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
412
- return buf.buffer;
413
- }
414
- return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
415
- }
416
- var KordocError = class extends Error {
417
- constructor(message) {
418
- super(message);
419
- this.name = "KordocError";
420
- }
421
- };
422
- function isPathTraversal(name) {
423
- if (name.includes("\0")) return true;
424
- const normalized = name.replace(/\\/g, "/");
425
- return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
426
- }
427
- function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
428
- try {
429
- const data = new DataView(buffer);
430
- const len = buffer.byteLength;
431
- let eocdOffset = -1;
432
- for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
433
- if (data.getUint32(i, true) === 101010256) {
434
- eocdOffset = i;
435
- break;
436
- }
437
- }
438
- if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
439
- const entryCount = data.getUint16(eocdOffset + 10, true);
440
- if (entryCount > maxEntries) {
441
- throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
442
- }
443
- const cdSize = data.getUint32(eocdOffset + 12, true);
444
- const cdOffset = data.getUint32(eocdOffset + 16, true);
445
- if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
446
- let totalUncompressed = 0;
447
- let pos = cdOffset;
448
- for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
449
- if (data.getUint32(pos, true) !== 33639248) break;
450
- totalUncompressed += data.getUint32(pos + 24, true);
451
- const nameLen = data.getUint16(pos + 28, true);
452
- const extraLen = data.getUint16(pos + 30, true);
453
- const commentLen = data.getUint16(pos + 32, true);
454
- pos += 46 + nameLen + extraLen + commentLen;
455
- }
456
- if (totalUncompressed > maxUncompressedSize) {
457
- throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
458
- }
459
- return { totalUncompressed, entryCount };
460
- } catch (err) {
461
- if (err instanceof KordocError) throw err;
462
- return { totalUncompressed: 0, entryCount: 0 };
463
- }
464
- }
465
- var SAFE_HREF_RE2 = /^(?:https?:|mailto:|tel:|#)/i;
466
- function sanitizeHref2(href) {
467
- const trimmed = href.trim();
468
- if (!trimmed || !SAFE_HREF_RE2.test(trimmed)) return null;
469
- return trimmed;
470
- }
471
- function classifyError(err) {
472
- if (!(err instanceof Error)) return "PARSE_ERROR";
473
- const msg = err.message;
474
- if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
475
- if (msg.includes("DRM")) return "DRM_PROTECTED";
476
- if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
477
- if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
478
- if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
479
- if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
480
- if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
481
- return "PARSE_ERROR";
482
- }
483
-
484
511
  // src/hwpx/parser.ts
485
512
  init_page_range();
486
513
  var MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024;
@@ -953,6 +980,14 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
953
980
  }
954
981
  }
955
982
  break;
983
+ case "cellAddr":
984
+ if (tableCtx?.cell) {
985
+ const ca = parseInt(el.getAttribute("colAddr") || "", 10);
986
+ const ra = parseInt(el.getAttribute("rowAddr") || "", 10);
987
+ if (!isNaN(ca)) tableCtx.cell.colAddr = ca;
988
+ if (!isNaN(ra)) tableCtx.cell.rowAddr = ra;
989
+ }
990
+ break;
956
991
  case "cellSpan":
957
992
  if (tableCtx?.cell) {
958
993
  const cs = parseInt(el.getAttribute("colSpan") || "1", 10);
@@ -1079,7 +1114,7 @@ function extractParagraphInfo(para, styleMap) {
1079
1114
  case "hyperlink": {
1080
1115
  const url = child.getAttribute("url") || child.getAttribute("href") || "";
1081
1116
  if (url) {
1082
- const safe = sanitizeHref2(url);
1117
+ const safe = sanitizeHref(url);
1083
1118
  if (safe) href = safe;
1084
1119
  }
1085
1120
  walk(child);
@@ -1172,13 +1207,16 @@ var TAG_TABLE = 77;
1172
1207
  var TAG_DOC_CHAR_SHAPE = 55;
1173
1208
  var TAG_DOC_STYLE = 58;
1174
1209
  var CHAR_LINE = 0;
1210
+ var CHAR_SECTION_BREAK = 10;
1175
1211
  var CHAR_PARA = 13;
1176
1212
  var CHAR_TAB = 9;
1177
1213
  var CHAR_HYPHEN = 30;
1178
1214
  var CHAR_NBSP = 31;
1179
1215
  var CHAR_FIXED_NBSP = 24;
1216
+ var CHAR_FIXED_WIDTH = 25;
1180
1217
  var FLAG_COMPRESSED = 1 << 0;
1181
1218
  var FLAG_ENCRYPTED = 1 << 1;
1219
+ var FLAG_DISTRIBUTION = 1 << 2;
1182
1220
  var FLAG_DRM = 1 << 4;
1183
1221
  var MAX_RECORDS = 5e5;
1184
1222
  function readRecords(data) {
@@ -1273,27 +1311,41 @@ function extractText(data) {
1273
1311
  const ch = data.readUInt16LE(i);
1274
1312
  i += 2;
1275
1313
  switch (ch) {
1314
+ // ── char 타입 (2바이트만, 확장 데이터 없음) ──
1276
1315
  case CHAR_LINE:
1277
1316
  result += "\n";
1278
1317
  break;
1279
- case CHAR_PARA:
1280
- break;
1281
- case CHAR_TAB:
1282
- result += " ";
1318
+ case CHAR_SECTION_BREAK:
1319
+ result += "\n";
1283
1320
  if (i + 14 <= data.length) i += 14;
1284
1321
  break;
1322
+ case CHAR_PARA:
1323
+ break;
1324
+ // 문단 끝
1285
1325
  case CHAR_HYPHEN:
1286
1326
  result += "-";
1287
1327
  break;
1288
1328
  case CHAR_NBSP:
1329
+ result += " ";
1330
+ break;
1289
1331
  case CHAR_FIXED_NBSP:
1332
+ result += "\xA0";
1333
+ break;
1334
+ // 진짜 NBSP
1335
+ case CHAR_FIXED_WIDTH:
1290
1336
  result += " ";
1291
1337
  break;
1338
+ // 고정폭 공백
1339
+ // ── inline 타입 (2바이트 + 14바이트 확장) ──
1340
+ case CHAR_TAB:
1341
+ result += " ";
1342
+ if (i + 14 <= data.length) i += 14;
1343
+ break;
1292
1344
  default:
1293
1345
  if (ch >= 1 && ch <= 31) {
1294
- const isExt = ch >= 1 && ch <= 3 || ch >= 10 && ch <= 18 || ch >= 21 && ch <= 23;
1346
+ const isExtended = ch >= 1 && ch <= 3 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= 18 || ch >= 21 && ch <= 23;
1295
1347
  const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
1296
- if ((isExt || isInline) && i + 14 <= data.length) i += 14;
1348
+ if ((isExtended || isInline) && i + 14 <= data.length) i += 14;
1297
1349
  } else if (ch >= 32) {
1298
1350
  if (ch >= 55296 && ch <= 56319 && i + 1 < data.length) {
1299
1351
  const lo = data.readUInt16LE(i);
@@ -1312,6 +1364,886 @@ function extractText(data) {
1312
1364
  return result;
1313
1365
  }
1314
1366
 
1367
+ // src/hwp5/aes.ts
1368
+ var S_BOX = new Uint8Array([
1369
+ 99,
1370
+ 124,
1371
+ 119,
1372
+ 123,
1373
+ 242,
1374
+ 107,
1375
+ 111,
1376
+ 197,
1377
+ 48,
1378
+ 1,
1379
+ 103,
1380
+ 43,
1381
+ 254,
1382
+ 215,
1383
+ 171,
1384
+ 118,
1385
+ 202,
1386
+ 130,
1387
+ 201,
1388
+ 125,
1389
+ 250,
1390
+ 89,
1391
+ 71,
1392
+ 240,
1393
+ 173,
1394
+ 212,
1395
+ 162,
1396
+ 175,
1397
+ 156,
1398
+ 164,
1399
+ 114,
1400
+ 192,
1401
+ 183,
1402
+ 253,
1403
+ 147,
1404
+ 38,
1405
+ 54,
1406
+ 63,
1407
+ 247,
1408
+ 204,
1409
+ 52,
1410
+ 165,
1411
+ 229,
1412
+ 241,
1413
+ 113,
1414
+ 216,
1415
+ 49,
1416
+ 21,
1417
+ 4,
1418
+ 199,
1419
+ 35,
1420
+ 195,
1421
+ 24,
1422
+ 150,
1423
+ 5,
1424
+ 154,
1425
+ 7,
1426
+ 18,
1427
+ 128,
1428
+ 226,
1429
+ 235,
1430
+ 39,
1431
+ 178,
1432
+ 117,
1433
+ 9,
1434
+ 131,
1435
+ 44,
1436
+ 26,
1437
+ 27,
1438
+ 110,
1439
+ 90,
1440
+ 160,
1441
+ 82,
1442
+ 59,
1443
+ 214,
1444
+ 179,
1445
+ 41,
1446
+ 227,
1447
+ 47,
1448
+ 132,
1449
+ 83,
1450
+ 209,
1451
+ 0,
1452
+ 237,
1453
+ 32,
1454
+ 252,
1455
+ 177,
1456
+ 91,
1457
+ 106,
1458
+ 203,
1459
+ 190,
1460
+ 57,
1461
+ 74,
1462
+ 76,
1463
+ 88,
1464
+ 207,
1465
+ 208,
1466
+ 239,
1467
+ 170,
1468
+ 251,
1469
+ 67,
1470
+ 77,
1471
+ 51,
1472
+ 133,
1473
+ 69,
1474
+ 249,
1475
+ 2,
1476
+ 127,
1477
+ 80,
1478
+ 60,
1479
+ 159,
1480
+ 168,
1481
+ 81,
1482
+ 163,
1483
+ 64,
1484
+ 143,
1485
+ 146,
1486
+ 157,
1487
+ 56,
1488
+ 245,
1489
+ 188,
1490
+ 182,
1491
+ 218,
1492
+ 33,
1493
+ 16,
1494
+ 255,
1495
+ 243,
1496
+ 210,
1497
+ 205,
1498
+ 12,
1499
+ 19,
1500
+ 236,
1501
+ 95,
1502
+ 151,
1503
+ 68,
1504
+ 23,
1505
+ 196,
1506
+ 167,
1507
+ 126,
1508
+ 61,
1509
+ 100,
1510
+ 93,
1511
+ 25,
1512
+ 115,
1513
+ 96,
1514
+ 129,
1515
+ 79,
1516
+ 220,
1517
+ 34,
1518
+ 42,
1519
+ 144,
1520
+ 136,
1521
+ 70,
1522
+ 238,
1523
+ 184,
1524
+ 20,
1525
+ 222,
1526
+ 94,
1527
+ 11,
1528
+ 219,
1529
+ 224,
1530
+ 50,
1531
+ 58,
1532
+ 10,
1533
+ 73,
1534
+ 6,
1535
+ 36,
1536
+ 92,
1537
+ 194,
1538
+ 211,
1539
+ 172,
1540
+ 98,
1541
+ 145,
1542
+ 149,
1543
+ 228,
1544
+ 121,
1545
+ 231,
1546
+ 200,
1547
+ 55,
1548
+ 109,
1549
+ 141,
1550
+ 213,
1551
+ 78,
1552
+ 169,
1553
+ 108,
1554
+ 86,
1555
+ 244,
1556
+ 234,
1557
+ 101,
1558
+ 122,
1559
+ 174,
1560
+ 8,
1561
+ 186,
1562
+ 120,
1563
+ 37,
1564
+ 46,
1565
+ 28,
1566
+ 166,
1567
+ 180,
1568
+ 198,
1569
+ 232,
1570
+ 221,
1571
+ 116,
1572
+ 31,
1573
+ 75,
1574
+ 189,
1575
+ 139,
1576
+ 138,
1577
+ 112,
1578
+ 62,
1579
+ 181,
1580
+ 102,
1581
+ 72,
1582
+ 3,
1583
+ 246,
1584
+ 14,
1585
+ 97,
1586
+ 53,
1587
+ 87,
1588
+ 185,
1589
+ 134,
1590
+ 193,
1591
+ 29,
1592
+ 158,
1593
+ 225,
1594
+ 248,
1595
+ 152,
1596
+ 17,
1597
+ 105,
1598
+ 217,
1599
+ 142,
1600
+ 148,
1601
+ 155,
1602
+ 30,
1603
+ 135,
1604
+ 233,
1605
+ 206,
1606
+ 85,
1607
+ 40,
1608
+ 223,
1609
+ 140,
1610
+ 161,
1611
+ 137,
1612
+ 13,
1613
+ 191,
1614
+ 230,
1615
+ 66,
1616
+ 104,
1617
+ 65,
1618
+ 153,
1619
+ 45,
1620
+ 15,
1621
+ 176,
1622
+ 84,
1623
+ 187,
1624
+ 22
1625
+ ]);
1626
+ var INV_S_BOX = new Uint8Array([
1627
+ 82,
1628
+ 9,
1629
+ 106,
1630
+ 213,
1631
+ 48,
1632
+ 54,
1633
+ 165,
1634
+ 56,
1635
+ 191,
1636
+ 64,
1637
+ 163,
1638
+ 158,
1639
+ 129,
1640
+ 243,
1641
+ 215,
1642
+ 251,
1643
+ 124,
1644
+ 227,
1645
+ 57,
1646
+ 130,
1647
+ 155,
1648
+ 47,
1649
+ 255,
1650
+ 135,
1651
+ 52,
1652
+ 142,
1653
+ 67,
1654
+ 68,
1655
+ 196,
1656
+ 222,
1657
+ 233,
1658
+ 203,
1659
+ 84,
1660
+ 123,
1661
+ 148,
1662
+ 50,
1663
+ 166,
1664
+ 194,
1665
+ 35,
1666
+ 61,
1667
+ 238,
1668
+ 76,
1669
+ 149,
1670
+ 11,
1671
+ 66,
1672
+ 250,
1673
+ 195,
1674
+ 78,
1675
+ 8,
1676
+ 46,
1677
+ 161,
1678
+ 102,
1679
+ 40,
1680
+ 217,
1681
+ 36,
1682
+ 178,
1683
+ 118,
1684
+ 91,
1685
+ 162,
1686
+ 73,
1687
+ 109,
1688
+ 139,
1689
+ 209,
1690
+ 37,
1691
+ 114,
1692
+ 248,
1693
+ 246,
1694
+ 100,
1695
+ 134,
1696
+ 104,
1697
+ 152,
1698
+ 22,
1699
+ 212,
1700
+ 164,
1701
+ 92,
1702
+ 204,
1703
+ 93,
1704
+ 101,
1705
+ 182,
1706
+ 146,
1707
+ 108,
1708
+ 112,
1709
+ 72,
1710
+ 80,
1711
+ 253,
1712
+ 237,
1713
+ 185,
1714
+ 218,
1715
+ 94,
1716
+ 21,
1717
+ 70,
1718
+ 87,
1719
+ 167,
1720
+ 141,
1721
+ 157,
1722
+ 132,
1723
+ 144,
1724
+ 216,
1725
+ 171,
1726
+ 0,
1727
+ 140,
1728
+ 188,
1729
+ 211,
1730
+ 10,
1731
+ 247,
1732
+ 228,
1733
+ 88,
1734
+ 5,
1735
+ 184,
1736
+ 179,
1737
+ 69,
1738
+ 6,
1739
+ 208,
1740
+ 44,
1741
+ 30,
1742
+ 143,
1743
+ 202,
1744
+ 63,
1745
+ 15,
1746
+ 2,
1747
+ 193,
1748
+ 175,
1749
+ 189,
1750
+ 3,
1751
+ 1,
1752
+ 19,
1753
+ 138,
1754
+ 107,
1755
+ 58,
1756
+ 145,
1757
+ 17,
1758
+ 65,
1759
+ 79,
1760
+ 103,
1761
+ 220,
1762
+ 234,
1763
+ 151,
1764
+ 242,
1765
+ 207,
1766
+ 206,
1767
+ 240,
1768
+ 180,
1769
+ 230,
1770
+ 115,
1771
+ 150,
1772
+ 172,
1773
+ 116,
1774
+ 34,
1775
+ 231,
1776
+ 173,
1777
+ 53,
1778
+ 133,
1779
+ 226,
1780
+ 249,
1781
+ 55,
1782
+ 232,
1783
+ 28,
1784
+ 117,
1785
+ 223,
1786
+ 110,
1787
+ 71,
1788
+ 241,
1789
+ 26,
1790
+ 113,
1791
+ 29,
1792
+ 41,
1793
+ 197,
1794
+ 137,
1795
+ 111,
1796
+ 183,
1797
+ 98,
1798
+ 14,
1799
+ 170,
1800
+ 24,
1801
+ 190,
1802
+ 27,
1803
+ 252,
1804
+ 86,
1805
+ 62,
1806
+ 75,
1807
+ 198,
1808
+ 210,
1809
+ 121,
1810
+ 32,
1811
+ 154,
1812
+ 219,
1813
+ 192,
1814
+ 254,
1815
+ 120,
1816
+ 205,
1817
+ 90,
1818
+ 244,
1819
+ 31,
1820
+ 221,
1821
+ 168,
1822
+ 51,
1823
+ 136,
1824
+ 7,
1825
+ 199,
1826
+ 49,
1827
+ 177,
1828
+ 18,
1829
+ 16,
1830
+ 89,
1831
+ 39,
1832
+ 128,
1833
+ 236,
1834
+ 95,
1835
+ 96,
1836
+ 81,
1837
+ 127,
1838
+ 169,
1839
+ 25,
1840
+ 181,
1841
+ 74,
1842
+ 13,
1843
+ 45,
1844
+ 229,
1845
+ 122,
1846
+ 159,
1847
+ 147,
1848
+ 201,
1849
+ 156,
1850
+ 239,
1851
+ 160,
1852
+ 224,
1853
+ 59,
1854
+ 77,
1855
+ 174,
1856
+ 42,
1857
+ 245,
1858
+ 176,
1859
+ 200,
1860
+ 235,
1861
+ 187,
1862
+ 60,
1863
+ 131,
1864
+ 83,
1865
+ 153,
1866
+ 97,
1867
+ 23,
1868
+ 43,
1869
+ 4,
1870
+ 126,
1871
+ 186,
1872
+ 119,
1873
+ 214,
1874
+ 38,
1875
+ 225,
1876
+ 105,
1877
+ 20,
1878
+ 99,
1879
+ 85,
1880
+ 33,
1881
+ 12,
1882
+ 125
1883
+ ]);
1884
+ var RCON = new Uint8Array([1, 2, 4, 8, 16, 32, 64, 128, 27, 54]);
1885
+ function gmul(a, b) {
1886
+ let p = 0;
1887
+ for (let i = 0; i < 8; i++) {
1888
+ if (b & 1) p ^= a;
1889
+ const hi = a & 128;
1890
+ a = a << 1 & 255;
1891
+ if (hi) a ^= 27;
1892
+ b >>= 1;
1893
+ }
1894
+ return p;
1895
+ }
1896
+ function expandKey(key) {
1897
+ const w = new Uint32Array(44);
1898
+ for (let i = 0; i < 4; i++) {
1899
+ w[i] = key[4 * i] << 24 | key[4 * i + 1] << 16 | key[4 * i + 2] << 8 | key[4 * i + 3];
1900
+ }
1901
+ for (let i = 4; i < 44; i++) {
1902
+ let temp = w[i - 1];
1903
+ if (i % 4 === 0) {
1904
+ temp = (temp << 8 | temp >>> 24) >>> 0;
1905
+ temp = S_BOX[temp >>> 24 & 255] << 24 | S_BOX[temp >>> 16 & 255] << 16 | S_BOX[temp >>> 8 & 255] << 8 | S_BOX[temp & 255];
1906
+ temp = (temp ^ RCON[i / 4 - 1] << 24) >>> 0;
1907
+ }
1908
+ w[i] = (w[i - 4] ^ temp) >>> 0;
1909
+ }
1910
+ return w;
1911
+ }
1912
+ function decryptBlock(block, roundKeys) {
1913
+ const s = new Uint8Array(16);
1914
+ for (let i = 0; i < 16; i++) s[i] = block[i];
1915
+ addRoundKey(s, roundKeys, 10);
1916
+ for (let round = 9; round >= 1; round--) {
1917
+ invShiftRows(s);
1918
+ invSubBytes(s);
1919
+ addRoundKey(s, roundKeys, round);
1920
+ invMixColumns(s);
1921
+ }
1922
+ invShiftRows(s);
1923
+ invSubBytes(s);
1924
+ addRoundKey(s, roundKeys, 0);
1925
+ return s;
1926
+ }
1927
+ function addRoundKey(s, w, round) {
1928
+ const base = round * 4;
1929
+ for (let c = 0; c < 4; c++) {
1930
+ const k = w[base + c];
1931
+ s[c * 4] ^= k >>> 24 & 255;
1932
+ s[c * 4 + 1] ^= k >>> 16 & 255;
1933
+ s[c * 4 + 2] ^= k >>> 8 & 255;
1934
+ s[c * 4 + 3] ^= k & 255;
1935
+ }
1936
+ }
1937
+ function invSubBytes(s) {
1938
+ for (let i = 0; i < 16; i++) s[i] = INV_S_BOX[s[i]];
1939
+ }
1940
+ function invShiftRows(s) {
1941
+ let t = s[13];
1942
+ s[13] = s[9];
1943
+ s[9] = s[5];
1944
+ s[5] = s[1];
1945
+ s[1] = t;
1946
+ t = s[2];
1947
+ s[2] = s[10];
1948
+ s[10] = t;
1949
+ t = s[6];
1950
+ s[6] = s[14];
1951
+ s[14] = t;
1952
+ t = s[3];
1953
+ s[3] = s[7];
1954
+ s[7] = s[11];
1955
+ s[11] = s[15];
1956
+ s[15] = t;
1957
+ }
1958
+ function invMixColumns(s) {
1959
+ for (let c = 0; c < 4; c++) {
1960
+ const i = c * 4;
1961
+ const a0 = s[i], a1 = s[i + 1], a2 = s[i + 2], a3 = s[i + 3];
1962
+ s[i] = gmul(a0, 14) ^ gmul(a1, 11) ^ gmul(a2, 13) ^ gmul(a3, 9);
1963
+ s[i + 1] = gmul(a0, 9) ^ gmul(a1, 14) ^ gmul(a2, 11) ^ gmul(a3, 13);
1964
+ s[i + 2] = gmul(a0, 13) ^ gmul(a1, 9) ^ gmul(a2, 14) ^ gmul(a3, 11);
1965
+ s[i + 3] = gmul(a0, 11) ^ gmul(a1, 13) ^ gmul(a2, 9) ^ gmul(a3, 14);
1966
+ }
1967
+ }
1968
+ function aes128EcbDecrypt(data, key) {
1969
+ if (key.length !== 16) throw new Error("AES-128 \uD0A4\uB294 16\uBC14\uC774\uD2B8\uC5EC\uC57C \uD569\uB2C8\uB2E4");
1970
+ if (data.length % 16 !== 0) throw new Error("AES ECB \uC785\uB825\uC740 16\uBC14\uC774\uD2B8\uC758 \uBC30\uC218\uC5EC\uC57C \uD569\uB2C8\uB2E4");
1971
+ const roundKeys = expandKey(key);
1972
+ const out = new Uint8Array(data.length);
1973
+ for (let offset = 0; offset < data.length; offset += 16) {
1974
+ const block = data.subarray(offset, offset + 16);
1975
+ const decrypted = decryptBlock(block, roundKeys);
1976
+ out.set(decrypted, offset);
1977
+ }
1978
+ return out;
1979
+ }
1980
+
1981
+ // src/hwp5/crypto.ts
1982
+ var MsvcLcg = class {
1983
+ seed;
1984
+ constructor(seed) {
1985
+ this.seed = seed >>> 0;
1986
+ }
1987
+ /** 0 ~ 0x7FFF 범위 난수 반환 (MSVC rand() 호환) */
1988
+ rand() {
1989
+ this.seed = Math.imul(this.seed, 214013) + 2531011 >>> 0;
1990
+ return this.seed >>> 16 & 32767;
1991
+ }
1992
+ };
1993
+ function decryptDistributePayload(payload) {
1994
+ if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
1995
+ const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
1996
+ const lcg = new MsvcLcg(seed);
1997
+ const result = new Uint8Array(256);
1998
+ result[0] = payload[0];
1999
+ result[1] = payload[1];
2000
+ result[2] = payload[2];
2001
+ result[3] = payload[3];
2002
+ let i = 4;
2003
+ while (i < 256) {
2004
+ const keyByte = lcg.rand() & 255;
2005
+ const n = (lcg.rand() & 15) + 1;
2006
+ for (let j = 0; j < n && i < 256; j++, i++) {
2007
+ result[i] = payload[i] ^ keyByte;
2008
+ }
2009
+ }
2010
+ return result;
2011
+ }
2012
+ function extractAesKey(decryptedPayload) {
2013
+ const offset = 4 + (decryptedPayload[0] & 15);
2014
+ if (offset + 16 > decryptedPayload.length) {
2015
+ throw new Error("AES \uD0A4 \uCD94\uCD9C \uC2E4\uD328: \uC624\uD504\uC14B\uC774 payload \uBC94\uC704\uB97C \uCD08\uACFC\uD569\uB2C8\uB2E4");
2016
+ }
2017
+ return decryptedPayload.slice(offset, offset + 16);
2018
+ }
2019
+ function parseRecordHeader(data, offset) {
2020
+ if (offset + 4 > data.length) throw new Error("\uB808\uCF54\uB4DC \uD5E4\uB354 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
2021
+ const header = (data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 | data[offset + 3] << 24) >>> 0;
2022
+ const tagId = header & 1023;
2023
+ let size = header >>> 20 & 4095;
2024
+ let headerSize = 4;
2025
+ if (size === 4095) {
2026
+ if (offset + 8 > data.length) throw new Error("\uD655\uC7A5 \uB808\uCF54\uB4DC \uD06C\uAE30 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
2027
+ size = (data[offset + 4] | data[offset + 5] << 8 | data[offset + 6] << 16 | data[offset + 7] << 24) >>> 0;
2028
+ headerSize = 8;
2029
+ }
2030
+ return { tagId, size, headerSize };
2031
+ }
2032
+ var TAG_DISTRIBUTE_DOC_DATA = 16 + 28;
2033
+ function decryptViewText(viewTextRaw, compressed) {
2034
+ const data = new Uint8Array(viewTextRaw);
2035
+ const rec = parseRecordHeader(data, 0);
2036
+ if (rec.tagId !== TAG_DISTRIBUTE_DOC_DATA) {
2037
+ throw new Error(`\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC758 \uCCAB \uB808\uCF54\uB4DC\uAC00 DISTRIBUTE_DOC_DATA(${TAG_DISTRIBUTE_DOC_DATA})\uAC00 \uC544\uB2D9\uB2C8\uB2E4 (\uC2E4\uC81C: ${rec.tagId})`);
2038
+ }
2039
+ const payloadStart = rec.headerSize;
2040
+ const payloadEnd = payloadStart + rec.size;
2041
+ if (payloadEnd > data.length || rec.size < 256) {
2042
+ throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 \uC720\uD6A8\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
2043
+ }
2044
+ const payload = data.subarray(payloadStart, payloadStart + 256);
2045
+ const decryptedPayload = decryptDistributePayload(payload);
2046
+ const aesKey = extractAesKey(decryptedPayload);
2047
+ const encryptedStart = payloadEnd;
2048
+ const encryptedData = data.subarray(encryptedStart);
2049
+ if (encryptedData.length === 0) {
2050
+ throw new Error("\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC5D0 \uC554\uD638\uD654\uB41C \uBCF8\uBB38 \uB370\uC774\uD130\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
2051
+ }
2052
+ const alignedLen = encryptedData.length - encryptedData.length % 16;
2053
+ if (alignedLen === 0) {
2054
+ throw new Error("\uC554\uD638\uD654\uB41C \uB370\uC774\uD130\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (16\uBC14\uC774\uD2B8 \uBBF8\uB9CC)");
2055
+ }
2056
+ const alignedData = encryptedData.subarray(0, alignedLen);
2057
+ const decrypted = aes128EcbDecrypt(alignedData, aesKey);
2058
+ if (compressed) {
2059
+ try {
2060
+ return decompressStream(Buffer.from(decrypted));
2061
+ } catch {
2062
+ return Buffer.from(decrypted);
2063
+ }
2064
+ }
2065
+ return Buffer.from(decrypted);
2066
+ }
2067
+
2068
+ // src/hwp5/cfb-lenient.ts
2069
+ var CFB_MAGIC = Buffer.from([208, 207, 17, 224, 161, 177, 26, 225]);
2070
+ var END_OF_CHAIN = 4294967294;
2071
+ var FREE_SECT = 4294967295;
2072
+ var MAX_CHAIN_LENGTH = 1e6;
2073
+ var MAX_DIR_ENTRIES = 1e5;
2074
+ var MAX_STREAM_SIZE = 100 * 1024 * 1024;
2075
+ function parseLenientCfb(data) {
2076
+ if (data.length < 512) throw new Error("CFB \uD30C\uC77C\uC774 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 512\uBC14\uC774\uD2B8)");
2077
+ if (!data.subarray(0, 8).equals(CFB_MAGIC)) throw new Error("CFB \uB9E4\uC9C1 \uBC14\uC774\uD2B8 \uBD88\uC77C\uCE58");
2078
+ const sectorSizeShift = data.readUInt16LE(30);
2079
+ if (sectorSizeShift < 7 || sectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + sectorSizeShift);
2080
+ const sectorSize = 1 << sectorSizeShift;
2081
+ const miniSectorSizeShift = data.readUInt16LE(32);
2082
+ if (miniSectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uBBF8\uB2C8 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + miniSectorSizeShift);
2083
+ const miniSectorSize = 1 << miniSectorSizeShift;
2084
+ const fatSectorCount = data.readUInt32LE(44);
2085
+ const firstDirSector = data.readUInt32LE(48);
2086
+ const miniStreamCutoff = data.readUInt32LE(56);
2087
+ const firstMiniFatSector = data.readUInt32LE(60);
2088
+ const miniFatSectorCount = data.readUInt32LE(64);
2089
+ const firstDifatSector = data.readUInt32LE(68);
2090
+ const difatSectorCount = data.readUInt32LE(72);
2091
+ function sectorOffset(id) {
2092
+ return 512 + id * sectorSize;
2093
+ }
2094
+ function readSectorData(id) {
2095
+ const off = sectorOffset(id);
2096
+ if (off + sectorSize > data.length) return Buffer.alloc(0);
2097
+ return data.subarray(off, off + sectorSize);
2098
+ }
2099
+ const fatSectors = [];
2100
+ for (let i = 0; i < 109 && fatSectors.length < fatSectorCount; i++) {
2101
+ const sid = data.readUInt32LE(76 + i * 4);
2102
+ if (sid === FREE_SECT || sid === END_OF_CHAIN) break;
2103
+ fatSectors.push(sid);
2104
+ }
2105
+ let difatSector = firstDifatSector;
2106
+ const visitedDifat = /* @__PURE__ */ new Set();
2107
+ for (let d = 0; d < difatSectorCount && difatSector !== END_OF_CHAIN && difatSector !== FREE_SECT; d++) {
2108
+ if (visitedDifat.has(difatSector)) break;
2109
+ visitedDifat.add(difatSector);
2110
+ const buf = readSectorData(difatSector);
2111
+ const entriesPerSector = sectorSize / 4 - 1;
2112
+ for (let i = 0; i < entriesPerSector && fatSectors.length < fatSectorCount; i++) {
2113
+ const sid = buf.readUInt32LE(i * 4);
2114
+ if (sid === FREE_SECT || sid === END_OF_CHAIN) continue;
2115
+ fatSectors.push(sid);
2116
+ }
2117
+ difatSector = buf.readUInt32LE(entriesPerSector * 4);
2118
+ }
2119
+ const entriesPerFatSector = sectorSize / 4;
2120
+ const fatTable = new Uint32Array(fatSectors.length * entriesPerFatSector);
2121
+ for (let fi = 0; fi < fatSectors.length; fi++) {
2122
+ const buf = readSectorData(fatSectors[fi]);
2123
+ for (let i = 0; i < entriesPerFatSector; i++) {
2124
+ fatTable[fi * entriesPerFatSector + i] = i * 4 + 3 < buf.length ? buf.readUInt32LE(i * 4) : FREE_SECT;
2125
+ }
2126
+ }
2127
+ function readChain(startSector, maxBytes) {
2128
+ if (startSector === END_OF_CHAIN || startSector === FREE_SECT) return Buffer.alloc(0);
2129
+ if (maxBytes > MAX_STREAM_SIZE) throw new Error("\uC2A4\uD2B8\uB9BC\uC774 \uB108\uBB34 \uD07D\uB2C8\uB2E4");
2130
+ const chunks = [];
2131
+ let current = startSector;
2132
+ let totalRead = 0;
2133
+ const visited = /* @__PURE__ */ new Set();
2134
+ while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < maxBytes) {
2135
+ if (visited.has(current)) break;
2136
+ if (visited.size > MAX_CHAIN_LENGTH) break;
2137
+ visited.add(current);
2138
+ const buf = readSectorData(current);
2139
+ const remaining = maxBytes - totalRead;
2140
+ chunks.push(remaining < sectorSize ? buf.subarray(0, remaining) : buf);
2141
+ totalRead += Math.min(buf.length, remaining);
2142
+ current = current < fatTable.length ? fatTable[current] : END_OF_CHAIN;
2143
+ }
2144
+ return Buffer.concat(chunks);
2145
+ }
2146
+ let miniFatTable = null;
2147
+ function getMiniFatTable() {
2148
+ if (miniFatTable) return miniFatTable;
2149
+ if (miniFatSectorCount === 0 || firstMiniFatSector === END_OF_CHAIN) {
2150
+ miniFatTable = new Uint32Array(0);
2151
+ return miniFatTable;
2152
+ }
2153
+ const miniFatData = readChain(firstMiniFatSector, miniFatSectorCount * sectorSize);
2154
+ const entries = miniFatData.length / 4;
2155
+ miniFatTable = new Uint32Array(entries);
2156
+ for (let i = 0; i < entries; i++) {
2157
+ miniFatTable[i] = miniFatData.readUInt32LE(i * 4);
2158
+ }
2159
+ return miniFatTable;
2160
+ }
2161
+ const dirData = readChain(firstDirSector, MAX_DIR_ENTRIES * 128);
2162
+ const dirEntries = [];
2163
+ for (let offset = 0; offset + 128 <= dirData.length && dirEntries.length < MAX_DIR_ENTRIES; offset += 128) {
2164
+ const nameLen = dirData.readUInt16LE(offset + 64);
2165
+ if (nameLen <= 0 || nameLen > 64) {
2166
+ dirEntries.push({ name: "", type: 0, startSector: 0, size: 0 });
2167
+ continue;
2168
+ }
2169
+ const nameBytes = nameLen - 2;
2170
+ const name = nameBytes > 0 ? dirData.subarray(offset, offset + nameBytes).toString("utf16le") : "";
2171
+ const type = dirData[offset + 66];
2172
+ const startSector = dirData.readUInt32LE(offset + 116);
2173
+ const size = dirData.readUInt32LE(offset + 120);
2174
+ dirEntries.push({ name, type, startSector, size });
2175
+ }
2176
+ let miniStreamData = null;
2177
+ function getMiniStream() {
2178
+ if (miniStreamData) return miniStreamData;
2179
+ const root = dirEntries[0];
2180
+ if (!root || root.type !== 5) {
2181
+ miniStreamData = Buffer.alloc(0);
2182
+ return miniStreamData;
2183
+ }
2184
+ miniStreamData = readChain(root.startSector, root.size || MAX_STREAM_SIZE);
2185
+ return miniStreamData;
2186
+ }
2187
+ function readMiniStream(startSector, size) {
2188
+ const mft = getMiniFatTable();
2189
+ const ms = getMiniStream();
2190
+ if (mft.length === 0 || ms.length === 0) return Buffer.alloc(0);
2191
+ const chunks = [];
2192
+ let current = startSector;
2193
+ let totalRead = 0;
2194
+ const visited = /* @__PURE__ */ new Set();
2195
+ while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < size) {
2196
+ if (visited.has(current)) break;
2197
+ if (visited.size > MAX_CHAIN_LENGTH) break;
2198
+ visited.add(current);
2199
+ const off = current * miniSectorSize;
2200
+ const remaining = size - totalRead;
2201
+ const chunkSize = Math.min(miniSectorSize, remaining);
2202
+ if (off + chunkSize <= ms.length) {
2203
+ chunks.push(ms.subarray(off, off + chunkSize));
2204
+ }
2205
+ totalRead += chunkSize;
2206
+ current = current < mft.length ? mft[current] : END_OF_CHAIN;
2207
+ }
2208
+ return Buffer.concat(chunks);
2209
+ }
2210
+ function readStreamData(entry) {
2211
+ if (entry.size === 0) return Buffer.alloc(0);
2212
+ if (entry.size < miniStreamCutoff) {
2213
+ const miniResult = readMiniStream(entry.startSector, entry.size);
2214
+ if (miniResult.length > 0) return miniResult;
2215
+ }
2216
+ return readChain(entry.startSector, entry.size);
2217
+ }
2218
+ function findEntryByPath(path) {
2219
+ const parts = path.replace(/^\//, "").split("/");
2220
+ if (parts.length === 1) {
2221
+ return dirEntries.find((e) => e.name === parts[0] && e.type === 2) ?? null;
2222
+ }
2223
+ const storageName = parts[0];
2224
+ const streamName = parts.slice(1).join("/");
2225
+ for (const e of dirEntries) {
2226
+ if (e.type === 2 && e.name === streamName) {
2227
+ return e;
2228
+ }
2229
+ }
2230
+ const lastPart = parts[parts.length - 1];
2231
+ return dirEntries.find((e) => e.type === 2 && e.name === lastPart) ?? null;
2232
+ }
2233
+ return {
2234
+ findStream(path) {
2235
+ const normalized = path.replace(/^\//, "");
2236
+ const entry = findEntryByPath(normalized);
2237
+ if (!entry || entry.type !== 2) return null;
2238
+ const stream = readStreamData(entry);
2239
+ return stream.length > 0 ? stream : null;
2240
+ },
2241
+ entries() {
2242
+ return dirEntries.filter((e) => e.type === 2);
2243
+ }
2244
+ };
2245
+ }
2246
+
1315
2247
  // src/hwp5/parser.ts
1316
2248
  init_page_range();
1317
2249
  var import_module = require("module");
@@ -1321,21 +2253,40 @@ var CFB = require2("cfb");
1321
2253
  var MAX_SECTIONS = 100;
1322
2254
  var MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024;
1323
2255
  function parseHwp5Document(buffer, options) {
1324
- const cfb = CFB.parse(buffer);
1325
- const headerEntry = CFB.find(cfb, "/FileHeader");
1326
- if (!headerEntry?.content) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
1327
- const header = parseFileHeader(Buffer.from(headerEntry.content));
2256
+ let cfb = null;
2257
+ let lenientCfb = null;
2258
+ const warnings = [];
2259
+ try {
2260
+ cfb = CFB.parse(buffer);
2261
+ } catch {
2262
+ try {
2263
+ lenientCfb = parseLenientCfb(buffer);
2264
+ warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
2265
+ } catch {
2266
+ throw new KordocError("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
2267
+ }
2268
+ }
2269
+ const findStream = (path) => {
2270
+ if (cfb) {
2271
+ const entry = CFB.find(cfb, path);
2272
+ return entry?.content ? Buffer.from(entry.content) : null;
2273
+ }
2274
+ return lenientCfb.findStream(path);
2275
+ };
2276
+ const headerData = findStream("/FileHeader");
2277
+ if (!headerData) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
2278
+ const header = parseFileHeader(headerData);
1328
2279
  if (header.signature !== "HWP Document File") throw new KordocError("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
1329
2280
  if (header.flags & FLAG_ENCRYPTED) throw new KordocError("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1330
2281
  if (header.flags & FLAG_DRM) throw new KordocError("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
1331
2282
  const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
2283
+ const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
1332
2284
  const metadata = {
1333
2285
  version: `${header.versionMajor}.x`
1334
2286
  };
1335
- extractHwp5Metadata(cfb, metadata);
1336
- const docInfo = parseDocInfoStream(cfb, compressed);
1337
- const warnings = [];
1338
- const sections = findSections(cfb);
2287
+ if (cfb) extractHwp5Metadata(cfb, metadata);
2288
+ const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
2289
+ const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
1339
2290
  if (sections.length === 0) throw new KordocError("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
1340
2291
  metadata.pageCount = sections.length;
1341
2292
  const pageFilter = options?.pages ? parsePageRange(options.pages, sections.length) : null;
@@ -1347,7 +2298,7 @@ function parseHwp5Document(buffer, options) {
1347
2298
  if (pageFilter && !pageFilter.has(si + 1)) continue;
1348
2299
  try {
1349
2300
  const sectionData = sections[si];
1350
- const data = compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
2301
+ const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
1351
2302
  totalDecompressed += data.length;
1352
2303
  if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new KordocError("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
1353
2304
  const records = readRecords(data);
@@ -1360,7 +2311,7 @@ function parseHwp5Document(buffer, options) {
1360
2311
  warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
1361
2312
  }
1362
2313
  }
1363
- const images = extractHwp5Images(cfb, blocks, compressed, warnings);
2314
+ const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
1364
2315
  if (docInfo) {
1365
2316
  detectHwp5Headings(blocks, docInfo);
1366
2317
  }
@@ -1379,6 +2330,15 @@ function parseDocInfoStream(cfb, compressed) {
1379
2330
  return null;
1380
2331
  }
1381
2332
  }
2333
+ function parseDocInfoFromStream(raw, compressed) {
2334
+ if (!raw) return null;
2335
+ try {
2336
+ const data = compressed ? decompressStream(raw) : raw;
2337
+ return parseDocInfo(readRecords(data));
2338
+ } catch {
2339
+ return null;
2340
+ }
2341
+ }
1382
2342
  function detectHwp5Headings(blocks, docInfo) {
1383
2343
  let baseFontSize = 0;
1384
2344
  for (const style of docInfo.styles) {
@@ -1458,6 +2418,20 @@ function extractHwp5Metadata(cfb, metadata) {
1458
2418
  } catch {
1459
2419
  }
1460
2420
  }
2421
+ function findViewTextSections(cfb, compressed) {
2422
+ const sections = [];
2423
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2424
+ const entry = CFB.find(cfb, `/ViewText/Section${i}`);
2425
+ if (!entry?.content) break;
2426
+ try {
2427
+ const decrypted = decryptViewText(Buffer.from(entry.content), compressed);
2428
+ sections.push({ idx: i, content: decrypted });
2429
+ } catch {
2430
+ break;
2431
+ }
2432
+ }
2433
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2434
+ }
1461
2435
  function findSections(cfb) {
1462
2436
  const sections = [];
1463
2437
  for (let i = 0; i < MAX_SECTIONS; i++) {
@@ -1476,6 +2450,38 @@ function findSections(cfb) {
1476
2450
  }
1477
2451
  return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
1478
2452
  }
2453
+ function findSectionsLenient(lcfb, compressed) {
2454
+ const sections = [];
2455
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2456
+ const raw = lcfb.findStream(`/BodyText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
2457
+ if (!raw) break;
2458
+ sections.push({ idx: i, content: compressed ? decompressStream(raw) : raw });
2459
+ }
2460
+ if (sections.length === 0) {
2461
+ for (const e of lcfb.entries()) {
2462
+ if (sections.length >= MAX_SECTIONS) break;
2463
+ if (e.name.startsWith("Section")) {
2464
+ const idx = parseInt(e.name.replace("Section", ""), 10) || 0;
2465
+ const raw = lcfb.findStream(e.name);
2466
+ if (raw) sections.push({ idx, content: compressed ? decompressStream(raw) : raw });
2467
+ }
2468
+ }
2469
+ }
2470
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2471
+ }
2472
+ function findViewTextSectionsLenient(lcfb, compressed) {
2473
+ const sections = [];
2474
+ for (let i = 0; i < MAX_SECTIONS; i++) {
2475
+ const raw = lcfb.findStream(`/ViewText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
2476
+ if (!raw) break;
2477
+ try {
2478
+ sections.push({ idx: i, content: decryptViewText(raw, compressed) });
2479
+ } catch {
2480
+ break;
2481
+ }
2482
+ }
2483
+ return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
2484
+ }
1479
2485
  var TAG_SHAPE_COMPONENT = 74;
1480
2486
  function extractBinDataId(records, ctrlIdx) {
1481
2487
  const ctrlLevel = records[ctrlIdx].level;
@@ -1550,6 +2556,53 @@ function extractHwp5Images(cfb, blocks, compressed, warnings) {
1550
2556
  }
1551
2557
  return images;
1552
2558
  }
2559
+ function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
2560
+ const binDataMap = /* @__PURE__ */ new Map();
2561
+ const binRe = /^BIN(\d{4})/i;
2562
+ for (const e of lcfb.entries()) {
2563
+ const match = e.name.match(binRe);
2564
+ if (!match) continue;
2565
+ const idx = parseInt(match[1], 10);
2566
+ let raw = lcfb.findStream(e.name);
2567
+ if (!raw) continue;
2568
+ if (compressed) {
2569
+ try {
2570
+ raw = decompressStream(raw);
2571
+ } catch {
2572
+ }
2573
+ }
2574
+ binDataMap.set(idx, { data: raw, name: e.name });
2575
+ }
2576
+ if (binDataMap.size === 0) return [];
2577
+ const images = [];
2578
+ let imageIndex = 0;
2579
+ for (const block of blocks) {
2580
+ if (block.type !== "image" || !block.text) continue;
2581
+ const binId = parseInt(block.text, 10);
2582
+ if (isNaN(binId)) continue;
2583
+ const bin = binDataMap.get(binId);
2584
+ if (!bin) {
2585
+ warnings.push({ page: block.pageNumber, message: `BinData ${binId} \uFFFD\uFFFD\uFFFD\uC74C`, code: "SKIPPED_IMAGE" });
2586
+ block.type = "paragraph";
2587
+ block.text = `[\uC774\uBBF8\uC9C0: BinData ${binId}]`;
2588
+ continue;
2589
+ }
2590
+ const mime = detectImageMime(bin.data);
2591
+ if (!mime) {
2592
+ warnings.push({ page: block.pageNumber, message: `BinData ${binId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
2593
+ block.type = "paragraph";
2594
+ block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
2595
+ continue;
2596
+ }
2597
+ imageIndex++;
2598
+ const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
2599
+ const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
2600
+ images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
2601
+ block.text = filename;
2602
+ block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
2603
+ }
2604
+ return images;
2605
+ }
1553
2606
  function parseSection(records, docInfo, warnings, sectionNum) {
1554
2607
  const blocks = [];
1555
2608
  let i = 0;
@@ -1586,12 +2639,61 @@ function parseSection(records, docInfo, warnings, sectionNum) {
1586
2639
  }
1587
2640
  } else if (ctrlId === " elo" || ctrlId === "ole ") {
1588
2641
  warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
2642
+ } else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
2643
+ const noteText = extractNoteText(records, i);
2644
+ if (noteText && blocks.length > 0) {
2645
+ const lastBlock = blocks[blocks.length - 1];
2646
+ if (lastBlock.type === "paragraph") {
2647
+ lastBlock.footnoteText = lastBlock.footnoteText ? lastBlock.footnoteText + "; " + noteText : noteText;
2648
+ }
2649
+ }
2650
+ } else if (ctrlId === "%tok" || ctrlId === "klnk") {
2651
+ const url = extractHyperlinkUrl(rec.data);
2652
+ if (url && blocks.length > 0) {
2653
+ const lastBlock = blocks[blocks.length - 1];
2654
+ if (lastBlock.type === "paragraph" && !lastBlock.href) {
2655
+ lastBlock.href = sanitizeHref(url) ?? void 0;
2656
+ }
2657
+ }
1589
2658
  }
1590
2659
  }
1591
2660
  i++;
1592
2661
  }
1593
2662
  return blocks;
1594
2663
  }
2664
+ function extractNoteText(records, ctrlIdx) {
2665
+ const ctrlLevel = records[ctrlIdx].level;
2666
+ const texts = [];
2667
+ for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 100; j++) {
2668
+ const r = records[j];
2669
+ if (r.level <= ctrlLevel) break;
2670
+ if (r.tagId === TAG_PARA_TEXT) {
2671
+ const t = extractText(r.data).trim();
2672
+ if (t) texts.push(t);
2673
+ }
2674
+ }
2675
+ return texts.length > 0 ? texts.join(" ") : null;
2676
+ }
2677
+ function extractHyperlinkUrl(data) {
2678
+ try {
2679
+ const httpSig = Buffer.from("http", "utf16le");
2680
+ const idx = data.indexOf(httpSig);
2681
+ if (idx >= 0) {
2682
+ let end = idx;
2683
+ while (end + 1 < data.length) {
2684
+ const ch = data.readUInt16LE(end);
2685
+ if (ch === 0) break;
2686
+ end += 2;
2687
+ }
2688
+ const url = data.subarray(idx, end).toString("utf16le");
2689
+ if (/^https?:\/\/.+/.test(url) && url.length < 2e3) {
2690
+ return url;
2691
+ }
2692
+ }
2693
+ } catch {
2694
+ }
2695
+ return null;
2696
+ }
1595
2697
  function resolveCharStyle(charShapeIds, docInfo) {
1596
2698
  if (charShapeIds.length === 0 || docInfo.charShapes.length === 0) return void 0;
1597
2699
  const freq = /* @__PURE__ */ new Map();