kordoc 1.9.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -3
- package/dist/{chunk-AHW56LNX.js → chunk-UMO6QQO5.js} +2 -2
- package/dist/{chunk-MDRW3HYC.js → chunk-UUHAAZYN.js} +1086 -25
- package/dist/chunk-UUHAAZYN.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/index.cjs +1155 -94
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1155 -94
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-VU6Z7HNR.js → utils-K23YMTIM.js} +2 -2
- package/dist/{watch-5IOZWFDD.js → watch-CGG7CCHJ.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-MDRW3HYC.js.map +0 -1
- /package/dist/{chunk-AHW56LNX.js.map → chunk-UMO6QQO5.js.map} +0 -0
- /package/dist/{utils-VU6Z7HNR.js.map → utils-K23YMTIM.js.map} +0 -0
- /package/dist/{watch-5IOZWFDD.js.map → watch-CGG7CCHJ.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -181,13 +181,83 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
181
181
|
var import_zlib = require("zlib");
|
|
182
182
|
var import_xmldom = require("@xmldom/xmldom");
|
|
183
183
|
|
|
184
|
-
// src/
|
|
184
|
+
// src/utils.ts
|
|
185
|
+
var VERSION = true ? "2.0.0" : "0.0.0-dev";
|
|
186
|
+
function toArrayBuffer(buf) {
|
|
187
|
+
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
188
|
+
return buf.buffer;
|
|
189
|
+
}
|
|
190
|
+
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
191
|
+
}
|
|
192
|
+
var KordocError = class extends Error {
|
|
193
|
+
constructor(message) {
|
|
194
|
+
super(message);
|
|
195
|
+
this.name = "KordocError";
|
|
196
|
+
}
|
|
197
|
+
};
|
|
198
|
+
function isPathTraversal(name) {
|
|
199
|
+
if (name.includes("\0")) return true;
|
|
200
|
+
const normalized = name.replace(/\\/g, "/");
|
|
201
|
+
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
202
|
+
}
|
|
203
|
+
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
204
|
+
try {
|
|
205
|
+
const data = new DataView(buffer);
|
|
206
|
+
const len = buffer.byteLength;
|
|
207
|
+
let eocdOffset = -1;
|
|
208
|
+
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
209
|
+
if (data.getUint32(i, true) === 101010256) {
|
|
210
|
+
eocdOffset = i;
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
215
|
+
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
216
|
+
if (entryCount > maxEntries) {
|
|
217
|
+
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
218
|
+
}
|
|
219
|
+
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
220
|
+
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
221
|
+
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
222
|
+
let totalUncompressed = 0;
|
|
223
|
+
let pos = cdOffset;
|
|
224
|
+
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
225
|
+
if (data.getUint32(pos, true) !== 33639248) break;
|
|
226
|
+
totalUncompressed += data.getUint32(pos + 24, true);
|
|
227
|
+
const nameLen = data.getUint16(pos + 28, true);
|
|
228
|
+
const extraLen = data.getUint16(pos + 30, true);
|
|
229
|
+
const commentLen = data.getUint16(pos + 32, true);
|
|
230
|
+
pos += 46 + nameLen + extraLen + commentLen;
|
|
231
|
+
}
|
|
232
|
+
if (totalUncompressed > maxUncompressedSize) {
|
|
233
|
+
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
234
|
+
}
|
|
235
|
+
return { totalUncompressed, entryCount };
|
|
236
|
+
} catch (err) {
|
|
237
|
+
if (err instanceof KordocError) throw err;
|
|
238
|
+
return { totalUncompressed: 0, entryCount: 0 };
|
|
239
|
+
}
|
|
240
|
+
}
|
|
185
241
|
var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
186
242
|
function sanitizeHref(href) {
|
|
187
243
|
const trimmed = href.trim();
|
|
188
244
|
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
189
245
|
return trimmed;
|
|
190
246
|
}
|
|
247
|
+
function classifyError(err) {
|
|
248
|
+
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
249
|
+
const msg = err.message;
|
|
250
|
+
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
251
|
+
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
252
|
+
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
253
|
+
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
254
|
+
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
255
|
+
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
256
|
+
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
257
|
+
return "PARSE_ERROR";
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// src/table/builder.ts
|
|
191
261
|
var MAX_COLS = 200;
|
|
192
262
|
var MAX_ROWS = 1e4;
|
|
193
263
|
function buildTable(rows) {
|
|
@@ -438,82 +508,6 @@ var HEADING_RATIO_H1 = 1.5;
|
|
|
438
508
|
var HEADING_RATIO_H2 = 1.3;
|
|
439
509
|
var HEADING_RATIO_H3 = 1.15;
|
|
440
510
|
|
|
441
|
-
// src/utils.ts
|
|
442
|
-
var VERSION = true ? "1.9.0" : "0.0.0-dev";
|
|
443
|
-
function toArrayBuffer(buf) {
|
|
444
|
-
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
445
|
-
return buf.buffer;
|
|
446
|
-
}
|
|
447
|
-
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
448
|
-
}
|
|
449
|
-
var KordocError = class extends Error {
|
|
450
|
-
constructor(message) {
|
|
451
|
-
super(message);
|
|
452
|
-
this.name = "KordocError";
|
|
453
|
-
}
|
|
454
|
-
};
|
|
455
|
-
function isPathTraversal(name) {
|
|
456
|
-
if (name.includes("\0")) return true;
|
|
457
|
-
const normalized = name.replace(/\\/g, "/");
|
|
458
|
-
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
459
|
-
}
|
|
460
|
-
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
461
|
-
try {
|
|
462
|
-
const data = new DataView(buffer);
|
|
463
|
-
const len = buffer.byteLength;
|
|
464
|
-
let eocdOffset = -1;
|
|
465
|
-
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
466
|
-
if (data.getUint32(i, true) === 101010256) {
|
|
467
|
-
eocdOffset = i;
|
|
468
|
-
break;
|
|
469
|
-
}
|
|
470
|
-
}
|
|
471
|
-
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
472
|
-
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
473
|
-
if (entryCount > maxEntries) {
|
|
474
|
-
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
475
|
-
}
|
|
476
|
-
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
477
|
-
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
478
|
-
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
479
|
-
let totalUncompressed = 0;
|
|
480
|
-
let pos = cdOffset;
|
|
481
|
-
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
482
|
-
if (data.getUint32(pos, true) !== 33639248) break;
|
|
483
|
-
totalUncompressed += data.getUint32(pos + 24, true);
|
|
484
|
-
const nameLen = data.getUint16(pos + 28, true);
|
|
485
|
-
const extraLen = data.getUint16(pos + 30, true);
|
|
486
|
-
const commentLen = data.getUint16(pos + 32, true);
|
|
487
|
-
pos += 46 + nameLen + extraLen + commentLen;
|
|
488
|
-
}
|
|
489
|
-
if (totalUncompressed > maxUncompressedSize) {
|
|
490
|
-
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
491
|
-
}
|
|
492
|
-
return { totalUncompressed, entryCount };
|
|
493
|
-
} catch (err) {
|
|
494
|
-
if (err instanceof KordocError) throw err;
|
|
495
|
-
return { totalUncompressed: 0, entryCount: 0 };
|
|
496
|
-
}
|
|
497
|
-
}
|
|
498
|
-
var SAFE_HREF_RE2 = /^(?:https?:|mailto:|tel:|#)/i;
|
|
499
|
-
function sanitizeHref2(href) {
|
|
500
|
-
const trimmed = href.trim();
|
|
501
|
-
if (!trimmed || !SAFE_HREF_RE2.test(trimmed)) return null;
|
|
502
|
-
return trimmed;
|
|
503
|
-
}
|
|
504
|
-
function classifyError(err) {
|
|
505
|
-
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
506
|
-
const msg = err.message;
|
|
507
|
-
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
508
|
-
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
509
|
-
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
510
|
-
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
511
|
-
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
512
|
-
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
513
|
-
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
514
|
-
return "PARSE_ERROR";
|
|
515
|
-
}
|
|
516
|
-
|
|
517
511
|
// src/hwpx/parser.ts
|
|
518
512
|
init_page_range();
|
|
519
513
|
var MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024;
|
|
@@ -1120,7 +1114,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1120
1114
|
case "hyperlink": {
|
|
1121
1115
|
const url = child.getAttribute("url") || child.getAttribute("href") || "";
|
|
1122
1116
|
if (url) {
|
|
1123
|
-
const safe =
|
|
1117
|
+
const safe = sanitizeHref(url);
|
|
1124
1118
|
if (safe) href = safe;
|
|
1125
1119
|
}
|
|
1126
1120
|
walk(child);
|
|
@@ -1213,13 +1207,16 @@ var TAG_TABLE = 77;
|
|
|
1213
1207
|
var TAG_DOC_CHAR_SHAPE = 55;
|
|
1214
1208
|
var TAG_DOC_STYLE = 58;
|
|
1215
1209
|
var CHAR_LINE = 0;
|
|
1210
|
+
var CHAR_SECTION_BREAK = 10;
|
|
1216
1211
|
var CHAR_PARA = 13;
|
|
1217
1212
|
var CHAR_TAB = 9;
|
|
1218
1213
|
var CHAR_HYPHEN = 30;
|
|
1219
1214
|
var CHAR_NBSP = 31;
|
|
1220
1215
|
var CHAR_FIXED_NBSP = 24;
|
|
1216
|
+
var CHAR_FIXED_WIDTH = 25;
|
|
1221
1217
|
var FLAG_COMPRESSED = 1 << 0;
|
|
1222
1218
|
var FLAG_ENCRYPTED = 1 << 1;
|
|
1219
|
+
var FLAG_DISTRIBUTION = 1 << 2;
|
|
1223
1220
|
var FLAG_DRM = 1 << 4;
|
|
1224
1221
|
var MAX_RECORDS = 5e5;
|
|
1225
1222
|
function readRecords(data) {
|
|
@@ -1314,27 +1311,41 @@ function extractText(data) {
|
|
|
1314
1311
|
const ch = data.readUInt16LE(i);
|
|
1315
1312
|
i += 2;
|
|
1316
1313
|
switch (ch) {
|
|
1314
|
+
// ── char 타입 (2바이트만, 확장 데이터 없음) ──
|
|
1317
1315
|
case CHAR_LINE:
|
|
1318
1316
|
result += "\n";
|
|
1319
1317
|
break;
|
|
1320
|
-
case
|
|
1321
|
-
|
|
1322
|
-
case CHAR_TAB:
|
|
1323
|
-
result += " ";
|
|
1318
|
+
case CHAR_SECTION_BREAK:
|
|
1319
|
+
result += "\n";
|
|
1324
1320
|
if (i + 14 <= data.length) i += 14;
|
|
1325
1321
|
break;
|
|
1322
|
+
case CHAR_PARA:
|
|
1323
|
+
break;
|
|
1324
|
+
// 문단 끝
|
|
1326
1325
|
case CHAR_HYPHEN:
|
|
1327
1326
|
result += "-";
|
|
1328
1327
|
break;
|
|
1329
1328
|
case CHAR_NBSP:
|
|
1329
|
+
result += " ";
|
|
1330
|
+
break;
|
|
1330
1331
|
case CHAR_FIXED_NBSP:
|
|
1332
|
+
result += "\xA0";
|
|
1333
|
+
break;
|
|
1334
|
+
// 진짜 NBSP
|
|
1335
|
+
case CHAR_FIXED_WIDTH:
|
|
1331
1336
|
result += " ";
|
|
1332
1337
|
break;
|
|
1338
|
+
// 고정폭 공백
|
|
1339
|
+
// ── inline 타입 (2바이트 + 14바이트 확장) ──
|
|
1340
|
+
case CHAR_TAB:
|
|
1341
|
+
result += " ";
|
|
1342
|
+
if (i + 14 <= data.length) i += 14;
|
|
1343
|
+
break;
|
|
1333
1344
|
default:
|
|
1334
1345
|
if (ch >= 1 && ch <= 31) {
|
|
1335
|
-
const
|
|
1346
|
+
const isExtended = ch >= 1 && ch <= 3 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= 18 || ch >= 21 && ch <= 23;
|
|
1336
1347
|
const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
|
|
1337
|
-
if ((
|
|
1348
|
+
if ((isExtended || isInline) && i + 14 <= data.length) i += 14;
|
|
1338
1349
|
} else if (ch >= 32) {
|
|
1339
1350
|
if (ch >= 55296 && ch <= 56319 && i + 1 < data.length) {
|
|
1340
1351
|
const lo = data.readUInt16LE(i);
|
|
@@ -1353,6 +1364,886 @@ function extractText(data) {
|
|
|
1353
1364
|
return result;
|
|
1354
1365
|
}
|
|
1355
1366
|
|
|
1367
|
+
// src/hwp5/aes.ts
|
|
1368
|
+
var S_BOX = new Uint8Array([
|
|
1369
|
+
99,
|
|
1370
|
+
124,
|
|
1371
|
+
119,
|
|
1372
|
+
123,
|
|
1373
|
+
242,
|
|
1374
|
+
107,
|
|
1375
|
+
111,
|
|
1376
|
+
197,
|
|
1377
|
+
48,
|
|
1378
|
+
1,
|
|
1379
|
+
103,
|
|
1380
|
+
43,
|
|
1381
|
+
254,
|
|
1382
|
+
215,
|
|
1383
|
+
171,
|
|
1384
|
+
118,
|
|
1385
|
+
202,
|
|
1386
|
+
130,
|
|
1387
|
+
201,
|
|
1388
|
+
125,
|
|
1389
|
+
250,
|
|
1390
|
+
89,
|
|
1391
|
+
71,
|
|
1392
|
+
240,
|
|
1393
|
+
173,
|
|
1394
|
+
212,
|
|
1395
|
+
162,
|
|
1396
|
+
175,
|
|
1397
|
+
156,
|
|
1398
|
+
164,
|
|
1399
|
+
114,
|
|
1400
|
+
192,
|
|
1401
|
+
183,
|
|
1402
|
+
253,
|
|
1403
|
+
147,
|
|
1404
|
+
38,
|
|
1405
|
+
54,
|
|
1406
|
+
63,
|
|
1407
|
+
247,
|
|
1408
|
+
204,
|
|
1409
|
+
52,
|
|
1410
|
+
165,
|
|
1411
|
+
229,
|
|
1412
|
+
241,
|
|
1413
|
+
113,
|
|
1414
|
+
216,
|
|
1415
|
+
49,
|
|
1416
|
+
21,
|
|
1417
|
+
4,
|
|
1418
|
+
199,
|
|
1419
|
+
35,
|
|
1420
|
+
195,
|
|
1421
|
+
24,
|
|
1422
|
+
150,
|
|
1423
|
+
5,
|
|
1424
|
+
154,
|
|
1425
|
+
7,
|
|
1426
|
+
18,
|
|
1427
|
+
128,
|
|
1428
|
+
226,
|
|
1429
|
+
235,
|
|
1430
|
+
39,
|
|
1431
|
+
178,
|
|
1432
|
+
117,
|
|
1433
|
+
9,
|
|
1434
|
+
131,
|
|
1435
|
+
44,
|
|
1436
|
+
26,
|
|
1437
|
+
27,
|
|
1438
|
+
110,
|
|
1439
|
+
90,
|
|
1440
|
+
160,
|
|
1441
|
+
82,
|
|
1442
|
+
59,
|
|
1443
|
+
214,
|
|
1444
|
+
179,
|
|
1445
|
+
41,
|
|
1446
|
+
227,
|
|
1447
|
+
47,
|
|
1448
|
+
132,
|
|
1449
|
+
83,
|
|
1450
|
+
209,
|
|
1451
|
+
0,
|
|
1452
|
+
237,
|
|
1453
|
+
32,
|
|
1454
|
+
252,
|
|
1455
|
+
177,
|
|
1456
|
+
91,
|
|
1457
|
+
106,
|
|
1458
|
+
203,
|
|
1459
|
+
190,
|
|
1460
|
+
57,
|
|
1461
|
+
74,
|
|
1462
|
+
76,
|
|
1463
|
+
88,
|
|
1464
|
+
207,
|
|
1465
|
+
208,
|
|
1466
|
+
239,
|
|
1467
|
+
170,
|
|
1468
|
+
251,
|
|
1469
|
+
67,
|
|
1470
|
+
77,
|
|
1471
|
+
51,
|
|
1472
|
+
133,
|
|
1473
|
+
69,
|
|
1474
|
+
249,
|
|
1475
|
+
2,
|
|
1476
|
+
127,
|
|
1477
|
+
80,
|
|
1478
|
+
60,
|
|
1479
|
+
159,
|
|
1480
|
+
168,
|
|
1481
|
+
81,
|
|
1482
|
+
163,
|
|
1483
|
+
64,
|
|
1484
|
+
143,
|
|
1485
|
+
146,
|
|
1486
|
+
157,
|
|
1487
|
+
56,
|
|
1488
|
+
245,
|
|
1489
|
+
188,
|
|
1490
|
+
182,
|
|
1491
|
+
218,
|
|
1492
|
+
33,
|
|
1493
|
+
16,
|
|
1494
|
+
255,
|
|
1495
|
+
243,
|
|
1496
|
+
210,
|
|
1497
|
+
205,
|
|
1498
|
+
12,
|
|
1499
|
+
19,
|
|
1500
|
+
236,
|
|
1501
|
+
95,
|
|
1502
|
+
151,
|
|
1503
|
+
68,
|
|
1504
|
+
23,
|
|
1505
|
+
196,
|
|
1506
|
+
167,
|
|
1507
|
+
126,
|
|
1508
|
+
61,
|
|
1509
|
+
100,
|
|
1510
|
+
93,
|
|
1511
|
+
25,
|
|
1512
|
+
115,
|
|
1513
|
+
96,
|
|
1514
|
+
129,
|
|
1515
|
+
79,
|
|
1516
|
+
220,
|
|
1517
|
+
34,
|
|
1518
|
+
42,
|
|
1519
|
+
144,
|
|
1520
|
+
136,
|
|
1521
|
+
70,
|
|
1522
|
+
238,
|
|
1523
|
+
184,
|
|
1524
|
+
20,
|
|
1525
|
+
222,
|
|
1526
|
+
94,
|
|
1527
|
+
11,
|
|
1528
|
+
219,
|
|
1529
|
+
224,
|
|
1530
|
+
50,
|
|
1531
|
+
58,
|
|
1532
|
+
10,
|
|
1533
|
+
73,
|
|
1534
|
+
6,
|
|
1535
|
+
36,
|
|
1536
|
+
92,
|
|
1537
|
+
194,
|
|
1538
|
+
211,
|
|
1539
|
+
172,
|
|
1540
|
+
98,
|
|
1541
|
+
145,
|
|
1542
|
+
149,
|
|
1543
|
+
228,
|
|
1544
|
+
121,
|
|
1545
|
+
231,
|
|
1546
|
+
200,
|
|
1547
|
+
55,
|
|
1548
|
+
109,
|
|
1549
|
+
141,
|
|
1550
|
+
213,
|
|
1551
|
+
78,
|
|
1552
|
+
169,
|
|
1553
|
+
108,
|
|
1554
|
+
86,
|
|
1555
|
+
244,
|
|
1556
|
+
234,
|
|
1557
|
+
101,
|
|
1558
|
+
122,
|
|
1559
|
+
174,
|
|
1560
|
+
8,
|
|
1561
|
+
186,
|
|
1562
|
+
120,
|
|
1563
|
+
37,
|
|
1564
|
+
46,
|
|
1565
|
+
28,
|
|
1566
|
+
166,
|
|
1567
|
+
180,
|
|
1568
|
+
198,
|
|
1569
|
+
232,
|
|
1570
|
+
221,
|
|
1571
|
+
116,
|
|
1572
|
+
31,
|
|
1573
|
+
75,
|
|
1574
|
+
189,
|
|
1575
|
+
139,
|
|
1576
|
+
138,
|
|
1577
|
+
112,
|
|
1578
|
+
62,
|
|
1579
|
+
181,
|
|
1580
|
+
102,
|
|
1581
|
+
72,
|
|
1582
|
+
3,
|
|
1583
|
+
246,
|
|
1584
|
+
14,
|
|
1585
|
+
97,
|
|
1586
|
+
53,
|
|
1587
|
+
87,
|
|
1588
|
+
185,
|
|
1589
|
+
134,
|
|
1590
|
+
193,
|
|
1591
|
+
29,
|
|
1592
|
+
158,
|
|
1593
|
+
225,
|
|
1594
|
+
248,
|
|
1595
|
+
152,
|
|
1596
|
+
17,
|
|
1597
|
+
105,
|
|
1598
|
+
217,
|
|
1599
|
+
142,
|
|
1600
|
+
148,
|
|
1601
|
+
155,
|
|
1602
|
+
30,
|
|
1603
|
+
135,
|
|
1604
|
+
233,
|
|
1605
|
+
206,
|
|
1606
|
+
85,
|
|
1607
|
+
40,
|
|
1608
|
+
223,
|
|
1609
|
+
140,
|
|
1610
|
+
161,
|
|
1611
|
+
137,
|
|
1612
|
+
13,
|
|
1613
|
+
191,
|
|
1614
|
+
230,
|
|
1615
|
+
66,
|
|
1616
|
+
104,
|
|
1617
|
+
65,
|
|
1618
|
+
153,
|
|
1619
|
+
45,
|
|
1620
|
+
15,
|
|
1621
|
+
176,
|
|
1622
|
+
84,
|
|
1623
|
+
187,
|
|
1624
|
+
22
|
|
1625
|
+
]);
|
|
1626
|
+
var INV_S_BOX = new Uint8Array([
|
|
1627
|
+
82,
|
|
1628
|
+
9,
|
|
1629
|
+
106,
|
|
1630
|
+
213,
|
|
1631
|
+
48,
|
|
1632
|
+
54,
|
|
1633
|
+
165,
|
|
1634
|
+
56,
|
|
1635
|
+
191,
|
|
1636
|
+
64,
|
|
1637
|
+
163,
|
|
1638
|
+
158,
|
|
1639
|
+
129,
|
|
1640
|
+
243,
|
|
1641
|
+
215,
|
|
1642
|
+
251,
|
|
1643
|
+
124,
|
|
1644
|
+
227,
|
|
1645
|
+
57,
|
|
1646
|
+
130,
|
|
1647
|
+
155,
|
|
1648
|
+
47,
|
|
1649
|
+
255,
|
|
1650
|
+
135,
|
|
1651
|
+
52,
|
|
1652
|
+
142,
|
|
1653
|
+
67,
|
|
1654
|
+
68,
|
|
1655
|
+
196,
|
|
1656
|
+
222,
|
|
1657
|
+
233,
|
|
1658
|
+
203,
|
|
1659
|
+
84,
|
|
1660
|
+
123,
|
|
1661
|
+
148,
|
|
1662
|
+
50,
|
|
1663
|
+
166,
|
|
1664
|
+
194,
|
|
1665
|
+
35,
|
|
1666
|
+
61,
|
|
1667
|
+
238,
|
|
1668
|
+
76,
|
|
1669
|
+
149,
|
|
1670
|
+
11,
|
|
1671
|
+
66,
|
|
1672
|
+
250,
|
|
1673
|
+
195,
|
|
1674
|
+
78,
|
|
1675
|
+
8,
|
|
1676
|
+
46,
|
|
1677
|
+
161,
|
|
1678
|
+
102,
|
|
1679
|
+
40,
|
|
1680
|
+
217,
|
|
1681
|
+
36,
|
|
1682
|
+
178,
|
|
1683
|
+
118,
|
|
1684
|
+
91,
|
|
1685
|
+
162,
|
|
1686
|
+
73,
|
|
1687
|
+
109,
|
|
1688
|
+
139,
|
|
1689
|
+
209,
|
|
1690
|
+
37,
|
|
1691
|
+
114,
|
|
1692
|
+
248,
|
|
1693
|
+
246,
|
|
1694
|
+
100,
|
|
1695
|
+
134,
|
|
1696
|
+
104,
|
|
1697
|
+
152,
|
|
1698
|
+
22,
|
|
1699
|
+
212,
|
|
1700
|
+
164,
|
|
1701
|
+
92,
|
|
1702
|
+
204,
|
|
1703
|
+
93,
|
|
1704
|
+
101,
|
|
1705
|
+
182,
|
|
1706
|
+
146,
|
|
1707
|
+
108,
|
|
1708
|
+
112,
|
|
1709
|
+
72,
|
|
1710
|
+
80,
|
|
1711
|
+
253,
|
|
1712
|
+
237,
|
|
1713
|
+
185,
|
|
1714
|
+
218,
|
|
1715
|
+
94,
|
|
1716
|
+
21,
|
|
1717
|
+
70,
|
|
1718
|
+
87,
|
|
1719
|
+
167,
|
|
1720
|
+
141,
|
|
1721
|
+
157,
|
|
1722
|
+
132,
|
|
1723
|
+
144,
|
|
1724
|
+
216,
|
|
1725
|
+
171,
|
|
1726
|
+
0,
|
|
1727
|
+
140,
|
|
1728
|
+
188,
|
|
1729
|
+
211,
|
|
1730
|
+
10,
|
|
1731
|
+
247,
|
|
1732
|
+
228,
|
|
1733
|
+
88,
|
|
1734
|
+
5,
|
|
1735
|
+
184,
|
|
1736
|
+
179,
|
|
1737
|
+
69,
|
|
1738
|
+
6,
|
|
1739
|
+
208,
|
|
1740
|
+
44,
|
|
1741
|
+
30,
|
|
1742
|
+
143,
|
|
1743
|
+
202,
|
|
1744
|
+
63,
|
|
1745
|
+
15,
|
|
1746
|
+
2,
|
|
1747
|
+
193,
|
|
1748
|
+
175,
|
|
1749
|
+
189,
|
|
1750
|
+
3,
|
|
1751
|
+
1,
|
|
1752
|
+
19,
|
|
1753
|
+
138,
|
|
1754
|
+
107,
|
|
1755
|
+
58,
|
|
1756
|
+
145,
|
|
1757
|
+
17,
|
|
1758
|
+
65,
|
|
1759
|
+
79,
|
|
1760
|
+
103,
|
|
1761
|
+
220,
|
|
1762
|
+
234,
|
|
1763
|
+
151,
|
|
1764
|
+
242,
|
|
1765
|
+
207,
|
|
1766
|
+
206,
|
|
1767
|
+
240,
|
|
1768
|
+
180,
|
|
1769
|
+
230,
|
|
1770
|
+
115,
|
|
1771
|
+
150,
|
|
1772
|
+
172,
|
|
1773
|
+
116,
|
|
1774
|
+
34,
|
|
1775
|
+
231,
|
|
1776
|
+
173,
|
|
1777
|
+
53,
|
|
1778
|
+
133,
|
|
1779
|
+
226,
|
|
1780
|
+
249,
|
|
1781
|
+
55,
|
|
1782
|
+
232,
|
|
1783
|
+
28,
|
|
1784
|
+
117,
|
|
1785
|
+
223,
|
|
1786
|
+
110,
|
|
1787
|
+
71,
|
|
1788
|
+
241,
|
|
1789
|
+
26,
|
|
1790
|
+
113,
|
|
1791
|
+
29,
|
|
1792
|
+
41,
|
|
1793
|
+
197,
|
|
1794
|
+
137,
|
|
1795
|
+
111,
|
|
1796
|
+
183,
|
|
1797
|
+
98,
|
|
1798
|
+
14,
|
|
1799
|
+
170,
|
|
1800
|
+
24,
|
|
1801
|
+
190,
|
|
1802
|
+
27,
|
|
1803
|
+
252,
|
|
1804
|
+
86,
|
|
1805
|
+
62,
|
|
1806
|
+
75,
|
|
1807
|
+
198,
|
|
1808
|
+
210,
|
|
1809
|
+
121,
|
|
1810
|
+
32,
|
|
1811
|
+
154,
|
|
1812
|
+
219,
|
|
1813
|
+
192,
|
|
1814
|
+
254,
|
|
1815
|
+
120,
|
|
1816
|
+
205,
|
|
1817
|
+
90,
|
|
1818
|
+
244,
|
|
1819
|
+
31,
|
|
1820
|
+
221,
|
|
1821
|
+
168,
|
|
1822
|
+
51,
|
|
1823
|
+
136,
|
|
1824
|
+
7,
|
|
1825
|
+
199,
|
|
1826
|
+
49,
|
|
1827
|
+
177,
|
|
1828
|
+
18,
|
|
1829
|
+
16,
|
|
1830
|
+
89,
|
|
1831
|
+
39,
|
|
1832
|
+
128,
|
|
1833
|
+
236,
|
|
1834
|
+
95,
|
|
1835
|
+
96,
|
|
1836
|
+
81,
|
|
1837
|
+
127,
|
|
1838
|
+
169,
|
|
1839
|
+
25,
|
|
1840
|
+
181,
|
|
1841
|
+
74,
|
|
1842
|
+
13,
|
|
1843
|
+
45,
|
|
1844
|
+
229,
|
|
1845
|
+
122,
|
|
1846
|
+
159,
|
|
1847
|
+
147,
|
|
1848
|
+
201,
|
|
1849
|
+
156,
|
|
1850
|
+
239,
|
|
1851
|
+
160,
|
|
1852
|
+
224,
|
|
1853
|
+
59,
|
|
1854
|
+
77,
|
|
1855
|
+
174,
|
|
1856
|
+
42,
|
|
1857
|
+
245,
|
|
1858
|
+
176,
|
|
1859
|
+
200,
|
|
1860
|
+
235,
|
|
1861
|
+
187,
|
|
1862
|
+
60,
|
|
1863
|
+
131,
|
|
1864
|
+
83,
|
|
1865
|
+
153,
|
|
1866
|
+
97,
|
|
1867
|
+
23,
|
|
1868
|
+
43,
|
|
1869
|
+
4,
|
|
1870
|
+
126,
|
|
1871
|
+
186,
|
|
1872
|
+
119,
|
|
1873
|
+
214,
|
|
1874
|
+
38,
|
|
1875
|
+
225,
|
|
1876
|
+
105,
|
|
1877
|
+
20,
|
|
1878
|
+
99,
|
|
1879
|
+
85,
|
|
1880
|
+
33,
|
|
1881
|
+
12,
|
|
1882
|
+
125
|
|
1883
|
+
]);
|
|
1884
|
+
var RCON = new Uint8Array([1, 2, 4, 8, 16, 32, 64, 128, 27, 54]);
|
|
1885
|
+
function gmul(a, b) {
|
|
1886
|
+
let p = 0;
|
|
1887
|
+
for (let i = 0; i < 8; i++) {
|
|
1888
|
+
if (b & 1) p ^= a;
|
|
1889
|
+
const hi = a & 128;
|
|
1890
|
+
a = a << 1 & 255;
|
|
1891
|
+
if (hi) a ^= 27;
|
|
1892
|
+
b >>= 1;
|
|
1893
|
+
}
|
|
1894
|
+
return p;
|
|
1895
|
+
}
|
|
1896
|
+
function expandKey(key) {
|
|
1897
|
+
const w = new Uint32Array(44);
|
|
1898
|
+
for (let i = 0; i < 4; i++) {
|
|
1899
|
+
w[i] = key[4 * i] << 24 | key[4 * i + 1] << 16 | key[4 * i + 2] << 8 | key[4 * i + 3];
|
|
1900
|
+
}
|
|
1901
|
+
for (let i = 4; i < 44; i++) {
|
|
1902
|
+
let temp = w[i - 1];
|
|
1903
|
+
if (i % 4 === 0) {
|
|
1904
|
+
temp = (temp << 8 | temp >>> 24) >>> 0;
|
|
1905
|
+
temp = S_BOX[temp >>> 24 & 255] << 24 | S_BOX[temp >>> 16 & 255] << 16 | S_BOX[temp >>> 8 & 255] << 8 | S_BOX[temp & 255];
|
|
1906
|
+
temp = (temp ^ RCON[i / 4 - 1] << 24) >>> 0;
|
|
1907
|
+
}
|
|
1908
|
+
w[i] = (w[i - 4] ^ temp) >>> 0;
|
|
1909
|
+
}
|
|
1910
|
+
return w;
|
|
1911
|
+
}
|
|
1912
|
+
function decryptBlock(block, roundKeys) {
|
|
1913
|
+
const s = new Uint8Array(16);
|
|
1914
|
+
for (let i = 0; i < 16; i++) s[i] = block[i];
|
|
1915
|
+
addRoundKey(s, roundKeys, 10);
|
|
1916
|
+
for (let round = 9; round >= 1; round--) {
|
|
1917
|
+
invShiftRows(s);
|
|
1918
|
+
invSubBytes(s);
|
|
1919
|
+
addRoundKey(s, roundKeys, round);
|
|
1920
|
+
invMixColumns(s);
|
|
1921
|
+
}
|
|
1922
|
+
invShiftRows(s);
|
|
1923
|
+
invSubBytes(s);
|
|
1924
|
+
addRoundKey(s, roundKeys, 0);
|
|
1925
|
+
return s;
|
|
1926
|
+
}
|
|
1927
|
+
function addRoundKey(s, w, round) {
|
|
1928
|
+
const base = round * 4;
|
|
1929
|
+
for (let c = 0; c < 4; c++) {
|
|
1930
|
+
const k = w[base + c];
|
|
1931
|
+
s[c * 4] ^= k >>> 24 & 255;
|
|
1932
|
+
s[c * 4 + 1] ^= k >>> 16 & 255;
|
|
1933
|
+
s[c * 4 + 2] ^= k >>> 8 & 255;
|
|
1934
|
+
s[c * 4 + 3] ^= k & 255;
|
|
1935
|
+
}
|
|
1936
|
+
}
|
|
1937
|
+
function invSubBytes(s) {
|
|
1938
|
+
for (let i = 0; i < 16; i++) s[i] = INV_S_BOX[s[i]];
|
|
1939
|
+
}
|
|
1940
|
+
function invShiftRows(s) {
|
|
1941
|
+
let t = s[13];
|
|
1942
|
+
s[13] = s[9];
|
|
1943
|
+
s[9] = s[5];
|
|
1944
|
+
s[5] = s[1];
|
|
1945
|
+
s[1] = t;
|
|
1946
|
+
t = s[2];
|
|
1947
|
+
s[2] = s[10];
|
|
1948
|
+
s[10] = t;
|
|
1949
|
+
t = s[6];
|
|
1950
|
+
s[6] = s[14];
|
|
1951
|
+
s[14] = t;
|
|
1952
|
+
t = s[3];
|
|
1953
|
+
s[3] = s[7];
|
|
1954
|
+
s[7] = s[11];
|
|
1955
|
+
s[11] = s[15];
|
|
1956
|
+
s[15] = t;
|
|
1957
|
+
}
|
|
1958
|
+
function invMixColumns(s) {
|
|
1959
|
+
for (let c = 0; c < 4; c++) {
|
|
1960
|
+
const i = c * 4;
|
|
1961
|
+
const a0 = s[i], a1 = s[i + 1], a2 = s[i + 2], a3 = s[i + 3];
|
|
1962
|
+
s[i] = gmul(a0, 14) ^ gmul(a1, 11) ^ gmul(a2, 13) ^ gmul(a3, 9);
|
|
1963
|
+
s[i + 1] = gmul(a0, 9) ^ gmul(a1, 14) ^ gmul(a2, 11) ^ gmul(a3, 13);
|
|
1964
|
+
s[i + 2] = gmul(a0, 13) ^ gmul(a1, 9) ^ gmul(a2, 14) ^ gmul(a3, 11);
|
|
1965
|
+
s[i + 3] = gmul(a0, 11) ^ gmul(a1, 13) ^ gmul(a2, 9) ^ gmul(a3, 14);
|
|
1966
|
+
}
|
|
1967
|
+
}
|
|
1968
|
+
function aes128EcbDecrypt(data, key) {
|
|
1969
|
+
if (key.length !== 16) throw new Error("AES-128 \uD0A4\uB294 16\uBC14\uC774\uD2B8\uC5EC\uC57C \uD569\uB2C8\uB2E4");
|
|
1970
|
+
if (data.length % 16 !== 0) throw new Error("AES ECB \uC785\uB825\uC740 16\uBC14\uC774\uD2B8\uC758 \uBC30\uC218\uC5EC\uC57C \uD569\uB2C8\uB2E4");
|
|
1971
|
+
const roundKeys = expandKey(key);
|
|
1972
|
+
const out = new Uint8Array(data.length);
|
|
1973
|
+
for (let offset = 0; offset < data.length; offset += 16) {
|
|
1974
|
+
const block = data.subarray(offset, offset + 16);
|
|
1975
|
+
const decrypted = decryptBlock(block, roundKeys);
|
|
1976
|
+
out.set(decrypted, offset);
|
|
1977
|
+
}
|
|
1978
|
+
return out;
|
|
1979
|
+
}
|
|
1980
|
+
|
|
1981
|
+
// src/hwp5/crypto.ts
|
|
1982
|
+
var MsvcLcg = class {
|
|
1983
|
+
seed;
|
|
1984
|
+
constructor(seed) {
|
|
1985
|
+
this.seed = seed >>> 0;
|
|
1986
|
+
}
|
|
1987
|
+
/** 0 ~ 0x7FFF 범위 난수 반환 (MSVC rand() 호환) */
|
|
1988
|
+
rand() {
|
|
1989
|
+
this.seed = Math.imul(this.seed, 214013) + 2531011 >>> 0;
|
|
1990
|
+
return this.seed >>> 16 & 32767;
|
|
1991
|
+
}
|
|
1992
|
+
};
|
|
1993
|
+
function decryptDistributePayload(payload) {
|
|
1994
|
+
if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
|
|
1995
|
+
const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
|
|
1996
|
+
const lcg = new MsvcLcg(seed);
|
|
1997
|
+
const result = new Uint8Array(256);
|
|
1998
|
+
result[0] = payload[0];
|
|
1999
|
+
result[1] = payload[1];
|
|
2000
|
+
result[2] = payload[2];
|
|
2001
|
+
result[3] = payload[3];
|
|
2002
|
+
let i = 4;
|
|
2003
|
+
while (i < 256) {
|
|
2004
|
+
const keyByte = lcg.rand() & 255;
|
|
2005
|
+
const n = (lcg.rand() & 15) + 1;
|
|
2006
|
+
for (let j = 0; j < n && i < 256; j++, i++) {
|
|
2007
|
+
result[i] = payload[i] ^ keyByte;
|
|
2008
|
+
}
|
|
2009
|
+
}
|
|
2010
|
+
return result;
|
|
2011
|
+
}
|
|
2012
|
+
function extractAesKey(decryptedPayload) {
|
|
2013
|
+
const offset = 4 + (decryptedPayload[0] & 15);
|
|
2014
|
+
if (offset + 16 > decryptedPayload.length) {
|
|
2015
|
+
throw new Error("AES \uD0A4 \uCD94\uCD9C \uC2E4\uD328: \uC624\uD504\uC14B\uC774 payload \uBC94\uC704\uB97C \uCD08\uACFC\uD569\uB2C8\uB2E4");
|
|
2016
|
+
}
|
|
2017
|
+
return decryptedPayload.slice(offset, offset + 16);
|
|
2018
|
+
}
|
|
2019
|
+
function parseRecordHeader(data, offset) {
|
|
2020
|
+
if (offset + 4 > data.length) throw new Error("\uB808\uCF54\uB4DC \uD5E4\uB354 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
|
|
2021
|
+
const header = (data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 | data[offset + 3] << 24) >>> 0;
|
|
2022
|
+
const tagId = header & 1023;
|
|
2023
|
+
let size = header >>> 20 & 4095;
|
|
2024
|
+
let headerSize = 4;
|
|
2025
|
+
if (size === 4095) {
|
|
2026
|
+
if (offset + 8 > data.length) throw new Error("\uD655\uC7A5 \uB808\uCF54\uB4DC \uD06C\uAE30 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
|
|
2027
|
+
size = (data[offset + 4] | data[offset + 5] << 8 | data[offset + 6] << 16 | data[offset + 7] << 24) >>> 0;
|
|
2028
|
+
headerSize = 8;
|
|
2029
|
+
}
|
|
2030
|
+
return { tagId, size, headerSize };
|
|
2031
|
+
}
|
|
2032
|
+
var TAG_DISTRIBUTE_DOC_DATA = 16 + 28;
|
|
2033
|
+
function decryptViewText(viewTextRaw, compressed) {
|
|
2034
|
+
const data = new Uint8Array(viewTextRaw);
|
|
2035
|
+
const rec = parseRecordHeader(data, 0);
|
|
2036
|
+
if (rec.tagId !== TAG_DISTRIBUTE_DOC_DATA) {
|
|
2037
|
+
throw new Error(`\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC758 \uCCAB \uB808\uCF54\uB4DC\uAC00 DISTRIBUTE_DOC_DATA(${TAG_DISTRIBUTE_DOC_DATA})\uAC00 \uC544\uB2D9\uB2C8\uB2E4 (\uC2E4\uC81C: ${rec.tagId})`);
|
|
2038
|
+
}
|
|
2039
|
+
const payloadStart = rec.headerSize;
|
|
2040
|
+
const payloadEnd = payloadStart + rec.size;
|
|
2041
|
+
if (payloadEnd > data.length || rec.size < 256) {
|
|
2042
|
+
throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 \uC720\uD6A8\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2043
|
+
}
|
|
2044
|
+
const payload = data.subarray(payloadStart, payloadStart + 256);
|
|
2045
|
+
const decryptedPayload = decryptDistributePayload(payload);
|
|
2046
|
+
const aesKey = extractAesKey(decryptedPayload);
|
|
2047
|
+
const encryptedStart = payloadEnd;
|
|
2048
|
+
const encryptedData = data.subarray(encryptedStart);
|
|
2049
|
+
if (encryptedData.length === 0) {
|
|
2050
|
+
throw new Error("\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC5D0 \uC554\uD638\uD654\uB41C \uBCF8\uBB38 \uB370\uC774\uD130\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2051
|
+
}
|
|
2052
|
+
const alignedLen = encryptedData.length - encryptedData.length % 16;
|
|
2053
|
+
if (alignedLen === 0) {
|
|
2054
|
+
throw new Error("\uC554\uD638\uD654\uB41C \uB370\uC774\uD130\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (16\uBC14\uC774\uD2B8 \uBBF8\uB9CC)");
|
|
2055
|
+
}
|
|
2056
|
+
const alignedData = encryptedData.subarray(0, alignedLen);
|
|
2057
|
+
const decrypted = aes128EcbDecrypt(alignedData, aesKey);
|
|
2058
|
+
if (compressed) {
|
|
2059
|
+
try {
|
|
2060
|
+
return decompressStream(Buffer.from(decrypted));
|
|
2061
|
+
} catch {
|
|
2062
|
+
return Buffer.from(decrypted);
|
|
2063
|
+
}
|
|
2064
|
+
}
|
|
2065
|
+
return Buffer.from(decrypted);
|
|
2066
|
+
}
|
|
2067
|
+
|
|
2068
|
+
// src/hwp5/cfb-lenient.ts
|
|
2069
|
+
var CFB_MAGIC = Buffer.from([208, 207, 17, 224, 161, 177, 26, 225]);
|
|
2070
|
+
var END_OF_CHAIN = 4294967294;
|
|
2071
|
+
var FREE_SECT = 4294967295;
|
|
2072
|
+
var MAX_CHAIN_LENGTH = 1e6;
|
|
2073
|
+
var MAX_DIR_ENTRIES = 1e5;
|
|
2074
|
+
var MAX_STREAM_SIZE = 100 * 1024 * 1024;
|
|
2075
|
+
function parseLenientCfb(data) {
|
|
2076
|
+
if (data.length < 512) throw new Error("CFB \uD30C\uC77C\uC774 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 512\uBC14\uC774\uD2B8)");
|
|
2077
|
+
if (!data.subarray(0, 8).equals(CFB_MAGIC)) throw new Error("CFB \uB9E4\uC9C1 \uBC14\uC774\uD2B8 \uBD88\uC77C\uCE58");
|
|
2078
|
+
const sectorSizeShift = data.readUInt16LE(30);
|
|
2079
|
+
if (sectorSizeShift < 7 || sectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + sectorSizeShift);
|
|
2080
|
+
const sectorSize = 1 << sectorSizeShift;
|
|
2081
|
+
const miniSectorSizeShift = data.readUInt16LE(32);
|
|
2082
|
+
if (miniSectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uBBF8\uB2C8 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + miniSectorSizeShift);
|
|
2083
|
+
const miniSectorSize = 1 << miniSectorSizeShift;
|
|
2084
|
+
const fatSectorCount = data.readUInt32LE(44);
|
|
2085
|
+
const firstDirSector = data.readUInt32LE(48);
|
|
2086
|
+
const miniStreamCutoff = data.readUInt32LE(56);
|
|
2087
|
+
const firstMiniFatSector = data.readUInt32LE(60);
|
|
2088
|
+
const miniFatSectorCount = data.readUInt32LE(64);
|
|
2089
|
+
const firstDifatSector = data.readUInt32LE(68);
|
|
2090
|
+
const difatSectorCount = data.readUInt32LE(72);
|
|
2091
|
+
function sectorOffset(id) {
|
|
2092
|
+
return 512 + id * sectorSize;
|
|
2093
|
+
}
|
|
2094
|
+
function readSectorData(id) {
|
|
2095
|
+
const off = sectorOffset(id);
|
|
2096
|
+
if (off + sectorSize > data.length) return Buffer.alloc(0);
|
|
2097
|
+
return data.subarray(off, off + sectorSize);
|
|
2098
|
+
}
|
|
2099
|
+
const fatSectors = [];
|
|
2100
|
+
for (let i = 0; i < 109 && fatSectors.length < fatSectorCount; i++) {
|
|
2101
|
+
const sid = data.readUInt32LE(76 + i * 4);
|
|
2102
|
+
if (sid === FREE_SECT || sid === END_OF_CHAIN) break;
|
|
2103
|
+
fatSectors.push(sid);
|
|
2104
|
+
}
|
|
2105
|
+
let difatSector = firstDifatSector;
|
|
2106
|
+
const visitedDifat = /* @__PURE__ */ new Set();
|
|
2107
|
+
for (let d = 0; d < difatSectorCount && difatSector !== END_OF_CHAIN && difatSector !== FREE_SECT; d++) {
|
|
2108
|
+
if (visitedDifat.has(difatSector)) break;
|
|
2109
|
+
visitedDifat.add(difatSector);
|
|
2110
|
+
const buf = readSectorData(difatSector);
|
|
2111
|
+
const entriesPerSector = sectorSize / 4 - 1;
|
|
2112
|
+
for (let i = 0; i < entriesPerSector && fatSectors.length < fatSectorCount; i++) {
|
|
2113
|
+
const sid = buf.readUInt32LE(i * 4);
|
|
2114
|
+
if (sid === FREE_SECT || sid === END_OF_CHAIN) continue;
|
|
2115
|
+
fatSectors.push(sid);
|
|
2116
|
+
}
|
|
2117
|
+
difatSector = buf.readUInt32LE(entriesPerSector * 4);
|
|
2118
|
+
}
|
|
2119
|
+
const entriesPerFatSector = sectorSize / 4;
|
|
2120
|
+
const fatTable = new Uint32Array(fatSectors.length * entriesPerFatSector);
|
|
2121
|
+
for (let fi = 0; fi < fatSectors.length; fi++) {
|
|
2122
|
+
const buf = readSectorData(fatSectors[fi]);
|
|
2123
|
+
for (let i = 0; i < entriesPerFatSector; i++) {
|
|
2124
|
+
fatTable[fi * entriesPerFatSector + i] = i * 4 + 3 < buf.length ? buf.readUInt32LE(i * 4) : FREE_SECT;
|
|
2125
|
+
}
|
|
2126
|
+
}
|
|
2127
|
+
function readChain(startSector, maxBytes) {
|
|
2128
|
+
if (startSector === END_OF_CHAIN || startSector === FREE_SECT) return Buffer.alloc(0);
|
|
2129
|
+
if (maxBytes > MAX_STREAM_SIZE) throw new Error("\uC2A4\uD2B8\uB9BC\uC774 \uB108\uBB34 \uD07D\uB2C8\uB2E4");
|
|
2130
|
+
const chunks = [];
|
|
2131
|
+
let current = startSector;
|
|
2132
|
+
let totalRead = 0;
|
|
2133
|
+
const visited = /* @__PURE__ */ new Set();
|
|
2134
|
+
while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < maxBytes) {
|
|
2135
|
+
if (visited.has(current)) break;
|
|
2136
|
+
if (visited.size > MAX_CHAIN_LENGTH) break;
|
|
2137
|
+
visited.add(current);
|
|
2138
|
+
const buf = readSectorData(current);
|
|
2139
|
+
const remaining = maxBytes - totalRead;
|
|
2140
|
+
chunks.push(remaining < sectorSize ? buf.subarray(0, remaining) : buf);
|
|
2141
|
+
totalRead += Math.min(buf.length, remaining);
|
|
2142
|
+
current = current < fatTable.length ? fatTable[current] : END_OF_CHAIN;
|
|
2143
|
+
}
|
|
2144
|
+
return Buffer.concat(chunks);
|
|
2145
|
+
}
|
|
2146
|
+
let miniFatTable = null;
|
|
2147
|
+
function getMiniFatTable() {
|
|
2148
|
+
if (miniFatTable) return miniFatTable;
|
|
2149
|
+
if (miniFatSectorCount === 0 || firstMiniFatSector === END_OF_CHAIN) {
|
|
2150
|
+
miniFatTable = new Uint32Array(0);
|
|
2151
|
+
return miniFatTable;
|
|
2152
|
+
}
|
|
2153
|
+
const miniFatData = readChain(firstMiniFatSector, miniFatSectorCount * sectorSize);
|
|
2154
|
+
const entries = miniFatData.length / 4;
|
|
2155
|
+
miniFatTable = new Uint32Array(entries);
|
|
2156
|
+
for (let i = 0; i < entries; i++) {
|
|
2157
|
+
miniFatTable[i] = miniFatData.readUInt32LE(i * 4);
|
|
2158
|
+
}
|
|
2159
|
+
return miniFatTable;
|
|
2160
|
+
}
|
|
2161
|
+
const dirData = readChain(firstDirSector, MAX_DIR_ENTRIES * 128);
|
|
2162
|
+
const dirEntries = [];
|
|
2163
|
+
for (let offset = 0; offset + 128 <= dirData.length && dirEntries.length < MAX_DIR_ENTRIES; offset += 128) {
|
|
2164
|
+
const nameLen = dirData.readUInt16LE(offset + 64);
|
|
2165
|
+
if (nameLen <= 0 || nameLen > 64) {
|
|
2166
|
+
dirEntries.push({ name: "", type: 0, startSector: 0, size: 0 });
|
|
2167
|
+
continue;
|
|
2168
|
+
}
|
|
2169
|
+
const nameBytes = nameLen - 2;
|
|
2170
|
+
const name = nameBytes > 0 ? dirData.subarray(offset, offset + nameBytes).toString("utf16le") : "";
|
|
2171
|
+
const type = dirData[offset + 66];
|
|
2172
|
+
const startSector = dirData.readUInt32LE(offset + 116);
|
|
2173
|
+
const size = dirData.readUInt32LE(offset + 120);
|
|
2174
|
+
dirEntries.push({ name, type, startSector, size });
|
|
2175
|
+
}
|
|
2176
|
+
let miniStreamData = null;
|
|
2177
|
+
function getMiniStream() {
|
|
2178
|
+
if (miniStreamData) return miniStreamData;
|
|
2179
|
+
const root = dirEntries[0];
|
|
2180
|
+
if (!root || root.type !== 5) {
|
|
2181
|
+
miniStreamData = Buffer.alloc(0);
|
|
2182
|
+
return miniStreamData;
|
|
2183
|
+
}
|
|
2184
|
+
miniStreamData = readChain(root.startSector, root.size || MAX_STREAM_SIZE);
|
|
2185
|
+
return miniStreamData;
|
|
2186
|
+
}
|
|
2187
|
+
function readMiniStream(startSector, size) {
|
|
2188
|
+
const mft = getMiniFatTable();
|
|
2189
|
+
const ms = getMiniStream();
|
|
2190
|
+
if (mft.length === 0 || ms.length === 0) return Buffer.alloc(0);
|
|
2191
|
+
const chunks = [];
|
|
2192
|
+
let current = startSector;
|
|
2193
|
+
let totalRead = 0;
|
|
2194
|
+
const visited = /* @__PURE__ */ new Set();
|
|
2195
|
+
while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < size) {
|
|
2196
|
+
if (visited.has(current)) break;
|
|
2197
|
+
if (visited.size > MAX_CHAIN_LENGTH) break;
|
|
2198
|
+
visited.add(current);
|
|
2199
|
+
const off = current * miniSectorSize;
|
|
2200
|
+
const remaining = size - totalRead;
|
|
2201
|
+
const chunkSize = Math.min(miniSectorSize, remaining);
|
|
2202
|
+
if (off + chunkSize <= ms.length) {
|
|
2203
|
+
chunks.push(ms.subarray(off, off + chunkSize));
|
|
2204
|
+
}
|
|
2205
|
+
totalRead += chunkSize;
|
|
2206
|
+
current = current < mft.length ? mft[current] : END_OF_CHAIN;
|
|
2207
|
+
}
|
|
2208
|
+
return Buffer.concat(chunks);
|
|
2209
|
+
}
|
|
2210
|
+
function readStreamData(entry) {
|
|
2211
|
+
if (entry.size === 0) return Buffer.alloc(0);
|
|
2212
|
+
if (entry.size < miniStreamCutoff) {
|
|
2213
|
+
const miniResult = readMiniStream(entry.startSector, entry.size);
|
|
2214
|
+
if (miniResult.length > 0) return miniResult;
|
|
2215
|
+
}
|
|
2216
|
+
return readChain(entry.startSector, entry.size);
|
|
2217
|
+
}
|
|
2218
|
+
function findEntryByPath(path) {
|
|
2219
|
+
const parts = path.replace(/^\//, "").split("/");
|
|
2220
|
+
if (parts.length === 1) {
|
|
2221
|
+
return dirEntries.find((e) => e.name === parts[0] && e.type === 2) ?? null;
|
|
2222
|
+
}
|
|
2223
|
+
const storageName = parts[0];
|
|
2224
|
+
const streamName = parts.slice(1).join("/");
|
|
2225
|
+
for (const e of dirEntries) {
|
|
2226
|
+
if (e.type === 2 && e.name === streamName) {
|
|
2227
|
+
return e;
|
|
2228
|
+
}
|
|
2229
|
+
}
|
|
2230
|
+
const lastPart = parts[parts.length - 1];
|
|
2231
|
+
return dirEntries.find((e) => e.type === 2 && e.name === lastPart) ?? null;
|
|
2232
|
+
}
|
|
2233
|
+
return {
|
|
2234
|
+
findStream(path) {
|
|
2235
|
+
const normalized = path.replace(/^\//, "");
|
|
2236
|
+
const entry = findEntryByPath(normalized);
|
|
2237
|
+
if (!entry || entry.type !== 2) return null;
|
|
2238
|
+
const stream = readStreamData(entry);
|
|
2239
|
+
return stream.length > 0 ? stream : null;
|
|
2240
|
+
},
|
|
2241
|
+
entries() {
|
|
2242
|
+
return dirEntries.filter((e) => e.type === 2);
|
|
2243
|
+
}
|
|
2244
|
+
};
|
|
2245
|
+
}
|
|
2246
|
+
|
|
1356
2247
|
// src/hwp5/parser.ts
|
|
1357
2248
|
init_page_range();
|
|
1358
2249
|
var import_module = require("module");
|
|
@@ -1362,21 +2253,40 @@ var CFB = require2("cfb");
|
|
|
1362
2253
|
var MAX_SECTIONS = 100;
|
|
1363
2254
|
var MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024;
|
|
1364
2255
|
function parseHwp5Document(buffer, options) {
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
2256
|
+
let cfb = null;
|
|
2257
|
+
let lenientCfb = null;
|
|
2258
|
+
const warnings = [];
|
|
2259
|
+
try {
|
|
2260
|
+
cfb = CFB.parse(buffer);
|
|
2261
|
+
} catch {
|
|
2262
|
+
try {
|
|
2263
|
+
lenientCfb = parseLenientCfb(buffer);
|
|
2264
|
+
warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
|
|
2265
|
+
} catch {
|
|
2266
|
+
throw new KordocError("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
|
|
2267
|
+
}
|
|
2268
|
+
}
|
|
2269
|
+
const findStream = (path) => {
|
|
2270
|
+
if (cfb) {
|
|
2271
|
+
const entry = CFB.find(cfb, path);
|
|
2272
|
+
return entry?.content ? Buffer.from(entry.content) : null;
|
|
2273
|
+
}
|
|
2274
|
+
return lenientCfb.findStream(path);
|
|
2275
|
+
};
|
|
2276
|
+
const headerData = findStream("/FileHeader");
|
|
2277
|
+
if (!headerData) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
|
|
2278
|
+
const header = parseFileHeader(headerData);
|
|
1369
2279
|
if (header.signature !== "HWP Document File") throw new KordocError("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
1370
2280
|
if (header.flags & FLAG_ENCRYPTED) throw new KordocError("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
1371
2281
|
if (header.flags & FLAG_DRM) throw new KordocError("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
1372
2282
|
const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
|
|
2283
|
+
const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
|
|
1373
2284
|
const metadata = {
|
|
1374
2285
|
version: `${header.versionMajor}.x`
|
|
1375
2286
|
};
|
|
1376
|
-
extractHwp5Metadata(cfb, metadata);
|
|
1377
|
-
const docInfo = parseDocInfoStream(cfb, compressed);
|
|
1378
|
-
const
|
|
1379
|
-
const sections = findSections(cfb);
|
|
2287
|
+
if (cfb) extractHwp5Metadata(cfb, metadata);
|
|
2288
|
+
const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
|
|
2289
|
+
const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
|
|
1380
2290
|
if (sections.length === 0) throw new KordocError("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
1381
2291
|
metadata.pageCount = sections.length;
|
|
1382
2292
|
const pageFilter = options?.pages ? parsePageRange(options.pages, sections.length) : null;
|
|
@@ -1388,7 +2298,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
1388
2298
|
if (pageFilter && !pageFilter.has(si + 1)) continue;
|
|
1389
2299
|
try {
|
|
1390
2300
|
const sectionData = sections[si];
|
|
1391
|
-
const data = compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
2301
|
+
const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
1392
2302
|
totalDecompressed += data.length;
|
|
1393
2303
|
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new KordocError("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
1394
2304
|
const records = readRecords(data);
|
|
@@ -1401,7 +2311,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
1401
2311
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
1402
2312
|
}
|
|
1403
2313
|
}
|
|
1404
|
-
const images = extractHwp5Images(cfb, blocks, compressed, warnings);
|
|
2314
|
+
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
1405
2315
|
if (docInfo) {
|
|
1406
2316
|
detectHwp5Headings(blocks, docInfo);
|
|
1407
2317
|
}
|
|
@@ -1420,6 +2330,15 @@ function parseDocInfoStream(cfb, compressed) {
|
|
|
1420
2330
|
return null;
|
|
1421
2331
|
}
|
|
1422
2332
|
}
|
|
2333
|
+
function parseDocInfoFromStream(raw, compressed) {
|
|
2334
|
+
if (!raw) return null;
|
|
2335
|
+
try {
|
|
2336
|
+
const data = compressed ? decompressStream(raw) : raw;
|
|
2337
|
+
return parseDocInfo(readRecords(data));
|
|
2338
|
+
} catch {
|
|
2339
|
+
return null;
|
|
2340
|
+
}
|
|
2341
|
+
}
|
|
1423
2342
|
function detectHwp5Headings(blocks, docInfo) {
|
|
1424
2343
|
let baseFontSize = 0;
|
|
1425
2344
|
for (const style of docInfo.styles) {
|
|
@@ -1499,6 +2418,20 @@ function extractHwp5Metadata(cfb, metadata) {
|
|
|
1499
2418
|
} catch {
|
|
1500
2419
|
}
|
|
1501
2420
|
}
|
|
2421
|
+
function findViewTextSections(cfb, compressed) {
|
|
2422
|
+
const sections = [];
|
|
2423
|
+
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2424
|
+
const entry = CFB.find(cfb, `/ViewText/Section${i}`);
|
|
2425
|
+
if (!entry?.content) break;
|
|
2426
|
+
try {
|
|
2427
|
+
const decrypted = decryptViewText(Buffer.from(entry.content), compressed);
|
|
2428
|
+
sections.push({ idx: i, content: decrypted });
|
|
2429
|
+
} catch {
|
|
2430
|
+
break;
|
|
2431
|
+
}
|
|
2432
|
+
}
|
|
2433
|
+
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
2434
|
+
}
|
|
1502
2435
|
function findSections(cfb) {
|
|
1503
2436
|
const sections = [];
|
|
1504
2437
|
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
@@ -1517,6 +2450,38 @@ function findSections(cfb) {
|
|
|
1517
2450
|
}
|
|
1518
2451
|
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
1519
2452
|
}
|
|
2453
|
+
function findSectionsLenient(lcfb, compressed) {
|
|
2454
|
+
const sections = [];
|
|
2455
|
+
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2456
|
+
const raw = lcfb.findStream(`/BodyText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
|
|
2457
|
+
if (!raw) break;
|
|
2458
|
+
sections.push({ idx: i, content: compressed ? decompressStream(raw) : raw });
|
|
2459
|
+
}
|
|
2460
|
+
if (sections.length === 0) {
|
|
2461
|
+
for (const e of lcfb.entries()) {
|
|
2462
|
+
if (sections.length >= MAX_SECTIONS) break;
|
|
2463
|
+
if (e.name.startsWith("Section")) {
|
|
2464
|
+
const idx = parseInt(e.name.replace("Section", ""), 10) || 0;
|
|
2465
|
+
const raw = lcfb.findStream(e.name);
|
|
2466
|
+
if (raw) sections.push({ idx, content: compressed ? decompressStream(raw) : raw });
|
|
2467
|
+
}
|
|
2468
|
+
}
|
|
2469
|
+
}
|
|
2470
|
+
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
2471
|
+
}
|
|
2472
|
+
function findViewTextSectionsLenient(lcfb, compressed) {
|
|
2473
|
+
const sections = [];
|
|
2474
|
+
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2475
|
+
const raw = lcfb.findStream(`/ViewText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
|
|
2476
|
+
if (!raw) break;
|
|
2477
|
+
try {
|
|
2478
|
+
sections.push({ idx: i, content: decryptViewText(raw, compressed) });
|
|
2479
|
+
} catch {
|
|
2480
|
+
break;
|
|
2481
|
+
}
|
|
2482
|
+
}
|
|
2483
|
+
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
2484
|
+
}
|
|
1520
2485
|
var TAG_SHAPE_COMPONENT = 74;
|
|
1521
2486
|
function extractBinDataId(records, ctrlIdx) {
|
|
1522
2487
|
const ctrlLevel = records[ctrlIdx].level;
|
|
@@ -1591,6 +2556,53 @@ function extractHwp5Images(cfb, blocks, compressed, warnings) {
|
|
|
1591
2556
|
}
|
|
1592
2557
|
return images;
|
|
1593
2558
|
}
|
|
2559
|
+
function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
|
|
2560
|
+
const binDataMap = /* @__PURE__ */ new Map();
|
|
2561
|
+
const binRe = /^BIN(\d{4})/i;
|
|
2562
|
+
for (const e of lcfb.entries()) {
|
|
2563
|
+
const match = e.name.match(binRe);
|
|
2564
|
+
if (!match) continue;
|
|
2565
|
+
const idx = parseInt(match[1], 10);
|
|
2566
|
+
let raw = lcfb.findStream(e.name);
|
|
2567
|
+
if (!raw) continue;
|
|
2568
|
+
if (compressed) {
|
|
2569
|
+
try {
|
|
2570
|
+
raw = decompressStream(raw);
|
|
2571
|
+
} catch {
|
|
2572
|
+
}
|
|
2573
|
+
}
|
|
2574
|
+
binDataMap.set(idx, { data: raw, name: e.name });
|
|
2575
|
+
}
|
|
2576
|
+
if (binDataMap.size === 0) return [];
|
|
2577
|
+
const images = [];
|
|
2578
|
+
let imageIndex = 0;
|
|
2579
|
+
for (const block of blocks) {
|
|
2580
|
+
if (block.type !== "image" || !block.text) continue;
|
|
2581
|
+
const binId = parseInt(block.text, 10);
|
|
2582
|
+
if (isNaN(binId)) continue;
|
|
2583
|
+
const bin = binDataMap.get(binId);
|
|
2584
|
+
if (!bin) {
|
|
2585
|
+
warnings.push({ page: block.pageNumber, message: `BinData ${binId} \uFFFD\uFFFD\uFFFD\uC74C`, code: "SKIPPED_IMAGE" });
|
|
2586
|
+
block.type = "paragraph";
|
|
2587
|
+
block.text = `[\uC774\uBBF8\uC9C0: BinData ${binId}]`;
|
|
2588
|
+
continue;
|
|
2589
|
+
}
|
|
2590
|
+
const mime = detectImageMime(bin.data);
|
|
2591
|
+
if (!mime) {
|
|
2592
|
+
warnings.push({ page: block.pageNumber, message: `BinData ${binId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
|
|
2593
|
+
block.type = "paragraph";
|
|
2594
|
+
block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
|
|
2595
|
+
continue;
|
|
2596
|
+
}
|
|
2597
|
+
imageIndex++;
|
|
2598
|
+
const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
|
|
2599
|
+
const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
|
|
2600
|
+
images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
|
|
2601
|
+
block.text = filename;
|
|
2602
|
+
block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
|
|
2603
|
+
}
|
|
2604
|
+
return images;
|
|
2605
|
+
}
|
|
1594
2606
|
function parseSection(records, docInfo, warnings, sectionNum) {
|
|
1595
2607
|
const blocks = [];
|
|
1596
2608
|
let i = 0;
|
|
@@ -1627,12 +2639,61 @@ function parseSection(records, docInfo, warnings, sectionNum) {
|
|
|
1627
2639
|
}
|
|
1628
2640
|
} else if (ctrlId === " elo" || ctrlId === "ole ") {
|
|
1629
2641
|
warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
|
|
2642
|
+
} else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
|
|
2643
|
+
const noteText = extractNoteText(records, i);
|
|
2644
|
+
if (noteText && blocks.length > 0) {
|
|
2645
|
+
const lastBlock = blocks[blocks.length - 1];
|
|
2646
|
+
if (lastBlock.type === "paragraph") {
|
|
2647
|
+
lastBlock.footnoteText = lastBlock.footnoteText ? lastBlock.footnoteText + "; " + noteText : noteText;
|
|
2648
|
+
}
|
|
2649
|
+
}
|
|
2650
|
+
} else if (ctrlId === "%tok" || ctrlId === "klnk") {
|
|
2651
|
+
const url = extractHyperlinkUrl(rec.data);
|
|
2652
|
+
if (url && blocks.length > 0) {
|
|
2653
|
+
const lastBlock = blocks[blocks.length - 1];
|
|
2654
|
+
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
2655
|
+
lastBlock.href = sanitizeHref(url) ?? void 0;
|
|
2656
|
+
}
|
|
2657
|
+
}
|
|
1630
2658
|
}
|
|
1631
2659
|
}
|
|
1632
2660
|
i++;
|
|
1633
2661
|
}
|
|
1634
2662
|
return blocks;
|
|
1635
2663
|
}
|
|
2664
|
+
function extractNoteText(records, ctrlIdx) {
|
|
2665
|
+
const ctrlLevel = records[ctrlIdx].level;
|
|
2666
|
+
const texts = [];
|
|
2667
|
+
for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 100; j++) {
|
|
2668
|
+
const r = records[j];
|
|
2669
|
+
if (r.level <= ctrlLevel) break;
|
|
2670
|
+
if (r.tagId === TAG_PARA_TEXT) {
|
|
2671
|
+
const t = extractText(r.data).trim();
|
|
2672
|
+
if (t) texts.push(t);
|
|
2673
|
+
}
|
|
2674
|
+
}
|
|
2675
|
+
return texts.length > 0 ? texts.join(" ") : null;
|
|
2676
|
+
}
|
|
2677
|
+
function extractHyperlinkUrl(data) {
|
|
2678
|
+
try {
|
|
2679
|
+
const httpSig = Buffer.from("http", "utf16le");
|
|
2680
|
+
const idx = data.indexOf(httpSig);
|
|
2681
|
+
if (idx >= 0) {
|
|
2682
|
+
let end = idx;
|
|
2683
|
+
while (end + 1 < data.length) {
|
|
2684
|
+
const ch = data.readUInt16LE(end);
|
|
2685
|
+
if (ch === 0) break;
|
|
2686
|
+
end += 2;
|
|
2687
|
+
}
|
|
2688
|
+
const url = data.subarray(idx, end).toString("utf16le");
|
|
2689
|
+
if (/^https?:\/\/.+/.test(url) && url.length < 2e3) {
|
|
2690
|
+
return url;
|
|
2691
|
+
}
|
|
2692
|
+
}
|
|
2693
|
+
} catch {
|
|
2694
|
+
}
|
|
2695
|
+
return null;
|
|
2696
|
+
}
|
|
1636
2697
|
function resolveCharStyle(charShapeIds, docInfo) {
|
|
1637
2698
|
if (charShapeIds.length === 0 || docInfo.charShapes.length === 0) return void 0;
|
|
1638
2699
|
const freq = /* @__PURE__ */ new Map();
|