kordoc 1.8.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -3
- package/dist/{chunk-UUKFY5P5.js → chunk-UMO6QQO5.js} +2 -2
- package/dist/{chunk-QQ6PZADA.js → chunk-UUHAAZYN.js} +1135 -33
- package/dist/chunk-UUHAAZYN.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/index.cjs +1204 -102
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1204 -102
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-OTCR2KMY.js → utils-K23YMTIM.js} +2 -2
- package/dist/{watch-JFDOENIO.js → watch-CGG7CCHJ.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-QQ6PZADA.js.map +0 -1
- /package/dist/{chunk-UUKFY5P5.js.map → chunk-UMO6QQO5.js.map} +0 -0
- /package/dist/{utils-OTCR2KMY.js.map → utils-K23YMTIM.js.map} +0 -0
- /package/dist/{watch-JFDOENIO.js.map → watch-CGG7CCHJ.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -181,35 +181,106 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
181
181
|
var import_zlib = require("zlib");
|
|
182
182
|
var import_xmldom = require("@xmldom/xmldom");
|
|
183
183
|
|
|
184
|
-
// src/
|
|
184
|
+
// src/utils.ts
|
|
185
|
+
var VERSION = true ? "2.0.0" : "0.0.0-dev";
|
|
186
|
+
function toArrayBuffer(buf) {
|
|
187
|
+
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
188
|
+
return buf.buffer;
|
|
189
|
+
}
|
|
190
|
+
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
191
|
+
}
|
|
192
|
+
var KordocError = class extends Error {
|
|
193
|
+
constructor(message) {
|
|
194
|
+
super(message);
|
|
195
|
+
this.name = "KordocError";
|
|
196
|
+
}
|
|
197
|
+
};
|
|
198
|
+
function isPathTraversal(name) {
|
|
199
|
+
if (name.includes("\0")) return true;
|
|
200
|
+
const normalized = name.replace(/\\/g, "/");
|
|
201
|
+
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
202
|
+
}
|
|
203
|
+
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
204
|
+
try {
|
|
205
|
+
const data = new DataView(buffer);
|
|
206
|
+
const len = buffer.byteLength;
|
|
207
|
+
let eocdOffset = -1;
|
|
208
|
+
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
209
|
+
if (data.getUint32(i, true) === 101010256) {
|
|
210
|
+
eocdOffset = i;
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
215
|
+
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
216
|
+
if (entryCount > maxEntries) {
|
|
217
|
+
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
218
|
+
}
|
|
219
|
+
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
220
|
+
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
221
|
+
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
222
|
+
let totalUncompressed = 0;
|
|
223
|
+
let pos = cdOffset;
|
|
224
|
+
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
225
|
+
if (data.getUint32(pos, true) !== 33639248) break;
|
|
226
|
+
totalUncompressed += data.getUint32(pos + 24, true);
|
|
227
|
+
const nameLen = data.getUint16(pos + 28, true);
|
|
228
|
+
const extraLen = data.getUint16(pos + 30, true);
|
|
229
|
+
const commentLen = data.getUint16(pos + 32, true);
|
|
230
|
+
pos += 46 + nameLen + extraLen + commentLen;
|
|
231
|
+
}
|
|
232
|
+
if (totalUncompressed > maxUncompressedSize) {
|
|
233
|
+
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
234
|
+
}
|
|
235
|
+
return { totalUncompressed, entryCount };
|
|
236
|
+
} catch (err) {
|
|
237
|
+
if (err instanceof KordocError) throw err;
|
|
238
|
+
return { totalUncompressed: 0, entryCount: 0 };
|
|
239
|
+
}
|
|
240
|
+
}
|
|
185
241
|
var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
186
242
|
function sanitizeHref(href) {
|
|
187
243
|
const trimmed = href.trim();
|
|
188
244
|
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
189
245
|
return trimmed;
|
|
190
246
|
}
|
|
247
|
+
function classifyError(err) {
|
|
248
|
+
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
249
|
+
const msg = err.message;
|
|
250
|
+
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
251
|
+
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
252
|
+
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
253
|
+
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
254
|
+
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
255
|
+
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
256
|
+
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
257
|
+
return "PARSE_ERROR";
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// src/table/builder.ts
|
|
191
261
|
var MAX_COLS = 200;
|
|
192
262
|
var MAX_ROWS = 1e4;
|
|
193
263
|
function buildTable(rows) {
|
|
194
264
|
if (rows.length > MAX_ROWS) rows = rows.slice(0, MAX_ROWS);
|
|
195
265
|
const numRows = rows.length;
|
|
196
|
-
const
|
|
266
|
+
const hasAddr = rows.some((row) => row.some((c) => c.colAddr !== void 0 && c.rowAddr !== void 0));
|
|
267
|
+
if (hasAddr) return buildTableDirect(rows, numRows);
|
|
197
268
|
let maxCols = 0;
|
|
269
|
+
const tempOccupied = Array.from({ length: numRows }, () => []);
|
|
198
270
|
for (let rowIdx = 0; rowIdx < numRows; rowIdx++) {
|
|
199
271
|
let colIdx = 0;
|
|
200
272
|
for (const cell of rows[rowIdx]) {
|
|
201
|
-
while (colIdx < MAX_COLS && tempOccupied
|
|
273
|
+
while (colIdx < MAX_COLS && tempOccupied[rowIdx][colIdx]) colIdx++;
|
|
202
274
|
if (colIdx >= MAX_COLS) break;
|
|
203
275
|
for (let r = rowIdx; r < Math.min(rowIdx + cell.rowSpan, numRows); r++) {
|
|
204
276
|
for (let c = colIdx; c < Math.min(colIdx + cell.colSpan, MAX_COLS); c++) {
|
|
205
|
-
tempOccupied
|
|
277
|
+
tempOccupied[r][c] = true;
|
|
206
278
|
}
|
|
207
279
|
}
|
|
208
280
|
colIdx += cell.colSpan;
|
|
209
281
|
if (colIdx > maxCols) maxCols = colIdx;
|
|
210
282
|
}
|
|
211
283
|
}
|
|
212
|
-
tempOccupied.clear();
|
|
213
284
|
if (maxCols === 0) return { rows: 0, cols: 0, cells: [], hasHeader: false };
|
|
214
285
|
const grid = Array.from(
|
|
215
286
|
{ length: numRows },
|
|
@@ -237,6 +308,40 @@ function buildTable(rows) {
|
|
|
237
308
|
cellIdx++;
|
|
238
309
|
}
|
|
239
310
|
}
|
|
311
|
+
return trimAndReturn(grid, numRows, maxCols);
|
|
312
|
+
}
|
|
313
|
+
function buildTableDirect(rows, numRows) {
|
|
314
|
+
let maxCols = 0;
|
|
315
|
+
for (const row of rows) {
|
|
316
|
+
for (const cell of row) {
|
|
317
|
+
const end = (cell.colAddr ?? 0) + cell.colSpan;
|
|
318
|
+
if (end > maxCols) maxCols = end;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
if (maxCols === 0) return { rows: 0, cols: 0, cells: [], hasHeader: false };
|
|
322
|
+
const grid = Array.from(
|
|
323
|
+
{ length: numRows },
|
|
324
|
+
() => Array.from({ length: maxCols }, () => ({ text: "", colSpan: 1, rowSpan: 1 }))
|
|
325
|
+
);
|
|
326
|
+
for (const row of rows) {
|
|
327
|
+
for (const cell of row) {
|
|
328
|
+
const r = cell.rowAddr ?? 0;
|
|
329
|
+
const c = cell.colAddr ?? 0;
|
|
330
|
+
if (r >= numRows || c >= maxCols) continue;
|
|
331
|
+
grid[r][c] = { text: cell.text.trim(), colSpan: cell.colSpan, rowSpan: cell.rowSpan };
|
|
332
|
+
for (let dr = 0; dr < cell.rowSpan; dr++) {
|
|
333
|
+
for (let dc = 0; dc < cell.colSpan; dc++) {
|
|
334
|
+
if (dr === 0 && dc === 0) continue;
|
|
335
|
+
if (r + dr < numRows && c + dc < maxCols) {
|
|
336
|
+
grid[r + dr][c + dc] = { text: "", colSpan: 1, rowSpan: 1 };
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
return trimAndReturn(grid, numRows, maxCols);
|
|
343
|
+
}
|
|
344
|
+
function trimAndReturn(grid, numRows, maxCols) {
|
|
240
345
|
let effectiveCols = maxCols;
|
|
241
346
|
while (effectiveCols > 0) {
|
|
242
347
|
const colEmpty = grid.every((row) => !row[effectiveCols - 1]?.text?.trim());
|
|
@@ -352,12 +457,10 @@ function tableToMarkdown(table) {
|
|
|
352
457
|
const display = Array.from({ length: numRows }, () => Array(numCols).fill(""));
|
|
353
458
|
const skip = /* @__PURE__ */ new Set();
|
|
354
459
|
for (let r = 0; r < numRows; r++) {
|
|
355
|
-
let cellIdx = 0;
|
|
356
460
|
for (let c = 0; c < numCols; c++) {
|
|
357
461
|
if (skip.has(`${r},${c}`)) continue;
|
|
358
|
-
const cell = cells[r]?.[
|
|
359
|
-
if (!cell)
|
|
360
|
-
cellIdx++;
|
|
462
|
+
const cell = cells[r]?.[c];
|
|
463
|
+
if (!cell) continue;
|
|
361
464
|
display[r][c] = sanitizeText(cell.text).replace(/\n/g, "<br>");
|
|
362
465
|
for (let dr = 0; dr < cell.rowSpan; dr++) {
|
|
363
466
|
for (let dc = 0; dc < cell.colSpan; dc++) {
|
|
@@ -405,82 +508,6 @@ var HEADING_RATIO_H1 = 1.5;
|
|
|
405
508
|
var HEADING_RATIO_H2 = 1.3;
|
|
406
509
|
var HEADING_RATIO_H3 = 1.15;
|
|
407
510
|
|
|
408
|
-
// src/utils.ts
|
|
409
|
-
var VERSION = true ? "1.8.0" : "0.0.0-dev";
|
|
410
|
-
function toArrayBuffer(buf) {
|
|
411
|
-
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
412
|
-
return buf.buffer;
|
|
413
|
-
}
|
|
414
|
-
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
415
|
-
}
|
|
416
|
-
var KordocError = class extends Error {
|
|
417
|
-
constructor(message) {
|
|
418
|
-
super(message);
|
|
419
|
-
this.name = "KordocError";
|
|
420
|
-
}
|
|
421
|
-
};
|
|
422
|
-
function isPathTraversal(name) {
|
|
423
|
-
if (name.includes("\0")) return true;
|
|
424
|
-
const normalized = name.replace(/\\/g, "/");
|
|
425
|
-
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
426
|
-
}
|
|
427
|
-
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
428
|
-
try {
|
|
429
|
-
const data = new DataView(buffer);
|
|
430
|
-
const len = buffer.byteLength;
|
|
431
|
-
let eocdOffset = -1;
|
|
432
|
-
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
433
|
-
if (data.getUint32(i, true) === 101010256) {
|
|
434
|
-
eocdOffset = i;
|
|
435
|
-
break;
|
|
436
|
-
}
|
|
437
|
-
}
|
|
438
|
-
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
439
|
-
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
440
|
-
if (entryCount > maxEntries) {
|
|
441
|
-
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
442
|
-
}
|
|
443
|
-
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
444
|
-
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
445
|
-
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
446
|
-
let totalUncompressed = 0;
|
|
447
|
-
let pos = cdOffset;
|
|
448
|
-
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
449
|
-
if (data.getUint32(pos, true) !== 33639248) break;
|
|
450
|
-
totalUncompressed += data.getUint32(pos + 24, true);
|
|
451
|
-
const nameLen = data.getUint16(pos + 28, true);
|
|
452
|
-
const extraLen = data.getUint16(pos + 30, true);
|
|
453
|
-
const commentLen = data.getUint16(pos + 32, true);
|
|
454
|
-
pos += 46 + nameLen + extraLen + commentLen;
|
|
455
|
-
}
|
|
456
|
-
if (totalUncompressed > maxUncompressedSize) {
|
|
457
|
-
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
458
|
-
}
|
|
459
|
-
return { totalUncompressed, entryCount };
|
|
460
|
-
} catch (err) {
|
|
461
|
-
if (err instanceof KordocError) throw err;
|
|
462
|
-
return { totalUncompressed: 0, entryCount: 0 };
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
var SAFE_HREF_RE2 = /^(?:https?:|mailto:|tel:|#)/i;
|
|
466
|
-
function sanitizeHref2(href) {
|
|
467
|
-
const trimmed = href.trim();
|
|
468
|
-
if (!trimmed || !SAFE_HREF_RE2.test(trimmed)) return null;
|
|
469
|
-
return trimmed;
|
|
470
|
-
}
|
|
471
|
-
function classifyError(err) {
|
|
472
|
-
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
473
|
-
const msg = err.message;
|
|
474
|
-
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
475
|
-
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
476
|
-
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
477
|
-
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
478
|
-
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
479
|
-
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
480
|
-
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
481
|
-
return "PARSE_ERROR";
|
|
482
|
-
}
|
|
483
|
-
|
|
484
511
|
// src/hwpx/parser.ts
|
|
485
512
|
init_page_range();
|
|
486
513
|
var MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024;
|
|
@@ -953,6 +980,14 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
|
|
|
953
980
|
}
|
|
954
981
|
}
|
|
955
982
|
break;
|
|
983
|
+
case "cellAddr":
|
|
984
|
+
if (tableCtx?.cell) {
|
|
985
|
+
const ca = parseInt(el.getAttribute("colAddr") || "", 10);
|
|
986
|
+
const ra = parseInt(el.getAttribute("rowAddr") || "", 10);
|
|
987
|
+
if (!isNaN(ca)) tableCtx.cell.colAddr = ca;
|
|
988
|
+
if (!isNaN(ra)) tableCtx.cell.rowAddr = ra;
|
|
989
|
+
}
|
|
990
|
+
break;
|
|
956
991
|
case "cellSpan":
|
|
957
992
|
if (tableCtx?.cell) {
|
|
958
993
|
const cs = parseInt(el.getAttribute("colSpan") || "1", 10);
|
|
@@ -1079,7 +1114,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1079
1114
|
case "hyperlink": {
|
|
1080
1115
|
const url = child.getAttribute("url") || child.getAttribute("href") || "";
|
|
1081
1116
|
if (url) {
|
|
1082
|
-
const safe =
|
|
1117
|
+
const safe = sanitizeHref(url);
|
|
1083
1118
|
if (safe) href = safe;
|
|
1084
1119
|
}
|
|
1085
1120
|
walk(child);
|
|
@@ -1172,13 +1207,16 @@ var TAG_TABLE = 77;
|
|
|
1172
1207
|
var TAG_DOC_CHAR_SHAPE = 55;
|
|
1173
1208
|
var TAG_DOC_STYLE = 58;
|
|
1174
1209
|
var CHAR_LINE = 0;
|
|
1210
|
+
var CHAR_SECTION_BREAK = 10;
|
|
1175
1211
|
var CHAR_PARA = 13;
|
|
1176
1212
|
var CHAR_TAB = 9;
|
|
1177
1213
|
var CHAR_HYPHEN = 30;
|
|
1178
1214
|
var CHAR_NBSP = 31;
|
|
1179
1215
|
var CHAR_FIXED_NBSP = 24;
|
|
1216
|
+
var CHAR_FIXED_WIDTH = 25;
|
|
1180
1217
|
var FLAG_COMPRESSED = 1 << 0;
|
|
1181
1218
|
var FLAG_ENCRYPTED = 1 << 1;
|
|
1219
|
+
var FLAG_DISTRIBUTION = 1 << 2;
|
|
1182
1220
|
var FLAG_DRM = 1 << 4;
|
|
1183
1221
|
var MAX_RECORDS = 5e5;
|
|
1184
1222
|
function readRecords(data) {
|
|
@@ -1273,27 +1311,41 @@ function extractText(data) {
|
|
|
1273
1311
|
const ch = data.readUInt16LE(i);
|
|
1274
1312
|
i += 2;
|
|
1275
1313
|
switch (ch) {
|
|
1314
|
+
// ── char 타입 (2바이트만, 확장 데이터 없음) ──
|
|
1276
1315
|
case CHAR_LINE:
|
|
1277
1316
|
result += "\n";
|
|
1278
1317
|
break;
|
|
1279
|
-
case
|
|
1280
|
-
|
|
1281
|
-
case CHAR_TAB:
|
|
1282
|
-
result += " ";
|
|
1318
|
+
case CHAR_SECTION_BREAK:
|
|
1319
|
+
result += "\n";
|
|
1283
1320
|
if (i + 14 <= data.length) i += 14;
|
|
1284
1321
|
break;
|
|
1322
|
+
case CHAR_PARA:
|
|
1323
|
+
break;
|
|
1324
|
+
// 문단 끝
|
|
1285
1325
|
case CHAR_HYPHEN:
|
|
1286
1326
|
result += "-";
|
|
1287
1327
|
break;
|
|
1288
1328
|
case CHAR_NBSP:
|
|
1329
|
+
result += " ";
|
|
1330
|
+
break;
|
|
1289
1331
|
case CHAR_FIXED_NBSP:
|
|
1332
|
+
result += "\xA0";
|
|
1333
|
+
break;
|
|
1334
|
+
// 진짜 NBSP
|
|
1335
|
+
case CHAR_FIXED_WIDTH:
|
|
1290
1336
|
result += " ";
|
|
1291
1337
|
break;
|
|
1338
|
+
// 고정폭 공백
|
|
1339
|
+
// ── inline 타입 (2바이트 + 14바이트 확장) ──
|
|
1340
|
+
case CHAR_TAB:
|
|
1341
|
+
result += " ";
|
|
1342
|
+
if (i + 14 <= data.length) i += 14;
|
|
1343
|
+
break;
|
|
1292
1344
|
default:
|
|
1293
1345
|
if (ch >= 1 && ch <= 31) {
|
|
1294
|
-
const
|
|
1346
|
+
const isExtended = ch >= 1 && ch <= 3 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= 18 || ch >= 21 && ch <= 23;
|
|
1295
1347
|
const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
|
|
1296
|
-
if ((
|
|
1348
|
+
if ((isExtended || isInline) && i + 14 <= data.length) i += 14;
|
|
1297
1349
|
} else if (ch >= 32) {
|
|
1298
1350
|
if (ch >= 55296 && ch <= 56319 && i + 1 < data.length) {
|
|
1299
1351
|
const lo = data.readUInt16LE(i);
|
|
@@ -1312,6 +1364,886 @@ function extractText(data) {
|
|
|
1312
1364
|
return result;
|
|
1313
1365
|
}
|
|
1314
1366
|
|
|
1367
|
+
// src/hwp5/aes.ts
|
|
1368
|
+
var S_BOX = new Uint8Array([
|
|
1369
|
+
99,
|
|
1370
|
+
124,
|
|
1371
|
+
119,
|
|
1372
|
+
123,
|
|
1373
|
+
242,
|
|
1374
|
+
107,
|
|
1375
|
+
111,
|
|
1376
|
+
197,
|
|
1377
|
+
48,
|
|
1378
|
+
1,
|
|
1379
|
+
103,
|
|
1380
|
+
43,
|
|
1381
|
+
254,
|
|
1382
|
+
215,
|
|
1383
|
+
171,
|
|
1384
|
+
118,
|
|
1385
|
+
202,
|
|
1386
|
+
130,
|
|
1387
|
+
201,
|
|
1388
|
+
125,
|
|
1389
|
+
250,
|
|
1390
|
+
89,
|
|
1391
|
+
71,
|
|
1392
|
+
240,
|
|
1393
|
+
173,
|
|
1394
|
+
212,
|
|
1395
|
+
162,
|
|
1396
|
+
175,
|
|
1397
|
+
156,
|
|
1398
|
+
164,
|
|
1399
|
+
114,
|
|
1400
|
+
192,
|
|
1401
|
+
183,
|
|
1402
|
+
253,
|
|
1403
|
+
147,
|
|
1404
|
+
38,
|
|
1405
|
+
54,
|
|
1406
|
+
63,
|
|
1407
|
+
247,
|
|
1408
|
+
204,
|
|
1409
|
+
52,
|
|
1410
|
+
165,
|
|
1411
|
+
229,
|
|
1412
|
+
241,
|
|
1413
|
+
113,
|
|
1414
|
+
216,
|
|
1415
|
+
49,
|
|
1416
|
+
21,
|
|
1417
|
+
4,
|
|
1418
|
+
199,
|
|
1419
|
+
35,
|
|
1420
|
+
195,
|
|
1421
|
+
24,
|
|
1422
|
+
150,
|
|
1423
|
+
5,
|
|
1424
|
+
154,
|
|
1425
|
+
7,
|
|
1426
|
+
18,
|
|
1427
|
+
128,
|
|
1428
|
+
226,
|
|
1429
|
+
235,
|
|
1430
|
+
39,
|
|
1431
|
+
178,
|
|
1432
|
+
117,
|
|
1433
|
+
9,
|
|
1434
|
+
131,
|
|
1435
|
+
44,
|
|
1436
|
+
26,
|
|
1437
|
+
27,
|
|
1438
|
+
110,
|
|
1439
|
+
90,
|
|
1440
|
+
160,
|
|
1441
|
+
82,
|
|
1442
|
+
59,
|
|
1443
|
+
214,
|
|
1444
|
+
179,
|
|
1445
|
+
41,
|
|
1446
|
+
227,
|
|
1447
|
+
47,
|
|
1448
|
+
132,
|
|
1449
|
+
83,
|
|
1450
|
+
209,
|
|
1451
|
+
0,
|
|
1452
|
+
237,
|
|
1453
|
+
32,
|
|
1454
|
+
252,
|
|
1455
|
+
177,
|
|
1456
|
+
91,
|
|
1457
|
+
106,
|
|
1458
|
+
203,
|
|
1459
|
+
190,
|
|
1460
|
+
57,
|
|
1461
|
+
74,
|
|
1462
|
+
76,
|
|
1463
|
+
88,
|
|
1464
|
+
207,
|
|
1465
|
+
208,
|
|
1466
|
+
239,
|
|
1467
|
+
170,
|
|
1468
|
+
251,
|
|
1469
|
+
67,
|
|
1470
|
+
77,
|
|
1471
|
+
51,
|
|
1472
|
+
133,
|
|
1473
|
+
69,
|
|
1474
|
+
249,
|
|
1475
|
+
2,
|
|
1476
|
+
127,
|
|
1477
|
+
80,
|
|
1478
|
+
60,
|
|
1479
|
+
159,
|
|
1480
|
+
168,
|
|
1481
|
+
81,
|
|
1482
|
+
163,
|
|
1483
|
+
64,
|
|
1484
|
+
143,
|
|
1485
|
+
146,
|
|
1486
|
+
157,
|
|
1487
|
+
56,
|
|
1488
|
+
245,
|
|
1489
|
+
188,
|
|
1490
|
+
182,
|
|
1491
|
+
218,
|
|
1492
|
+
33,
|
|
1493
|
+
16,
|
|
1494
|
+
255,
|
|
1495
|
+
243,
|
|
1496
|
+
210,
|
|
1497
|
+
205,
|
|
1498
|
+
12,
|
|
1499
|
+
19,
|
|
1500
|
+
236,
|
|
1501
|
+
95,
|
|
1502
|
+
151,
|
|
1503
|
+
68,
|
|
1504
|
+
23,
|
|
1505
|
+
196,
|
|
1506
|
+
167,
|
|
1507
|
+
126,
|
|
1508
|
+
61,
|
|
1509
|
+
100,
|
|
1510
|
+
93,
|
|
1511
|
+
25,
|
|
1512
|
+
115,
|
|
1513
|
+
96,
|
|
1514
|
+
129,
|
|
1515
|
+
79,
|
|
1516
|
+
220,
|
|
1517
|
+
34,
|
|
1518
|
+
42,
|
|
1519
|
+
144,
|
|
1520
|
+
136,
|
|
1521
|
+
70,
|
|
1522
|
+
238,
|
|
1523
|
+
184,
|
|
1524
|
+
20,
|
|
1525
|
+
222,
|
|
1526
|
+
94,
|
|
1527
|
+
11,
|
|
1528
|
+
219,
|
|
1529
|
+
224,
|
|
1530
|
+
50,
|
|
1531
|
+
58,
|
|
1532
|
+
10,
|
|
1533
|
+
73,
|
|
1534
|
+
6,
|
|
1535
|
+
36,
|
|
1536
|
+
92,
|
|
1537
|
+
194,
|
|
1538
|
+
211,
|
|
1539
|
+
172,
|
|
1540
|
+
98,
|
|
1541
|
+
145,
|
|
1542
|
+
149,
|
|
1543
|
+
228,
|
|
1544
|
+
121,
|
|
1545
|
+
231,
|
|
1546
|
+
200,
|
|
1547
|
+
55,
|
|
1548
|
+
109,
|
|
1549
|
+
141,
|
|
1550
|
+
213,
|
|
1551
|
+
78,
|
|
1552
|
+
169,
|
|
1553
|
+
108,
|
|
1554
|
+
86,
|
|
1555
|
+
244,
|
|
1556
|
+
234,
|
|
1557
|
+
101,
|
|
1558
|
+
122,
|
|
1559
|
+
174,
|
|
1560
|
+
8,
|
|
1561
|
+
186,
|
|
1562
|
+
120,
|
|
1563
|
+
37,
|
|
1564
|
+
46,
|
|
1565
|
+
28,
|
|
1566
|
+
166,
|
|
1567
|
+
180,
|
|
1568
|
+
198,
|
|
1569
|
+
232,
|
|
1570
|
+
221,
|
|
1571
|
+
116,
|
|
1572
|
+
31,
|
|
1573
|
+
75,
|
|
1574
|
+
189,
|
|
1575
|
+
139,
|
|
1576
|
+
138,
|
|
1577
|
+
112,
|
|
1578
|
+
62,
|
|
1579
|
+
181,
|
|
1580
|
+
102,
|
|
1581
|
+
72,
|
|
1582
|
+
3,
|
|
1583
|
+
246,
|
|
1584
|
+
14,
|
|
1585
|
+
97,
|
|
1586
|
+
53,
|
|
1587
|
+
87,
|
|
1588
|
+
185,
|
|
1589
|
+
134,
|
|
1590
|
+
193,
|
|
1591
|
+
29,
|
|
1592
|
+
158,
|
|
1593
|
+
225,
|
|
1594
|
+
248,
|
|
1595
|
+
152,
|
|
1596
|
+
17,
|
|
1597
|
+
105,
|
|
1598
|
+
217,
|
|
1599
|
+
142,
|
|
1600
|
+
148,
|
|
1601
|
+
155,
|
|
1602
|
+
30,
|
|
1603
|
+
135,
|
|
1604
|
+
233,
|
|
1605
|
+
206,
|
|
1606
|
+
85,
|
|
1607
|
+
40,
|
|
1608
|
+
223,
|
|
1609
|
+
140,
|
|
1610
|
+
161,
|
|
1611
|
+
137,
|
|
1612
|
+
13,
|
|
1613
|
+
191,
|
|
1614
|
+
230,
|
|
1615
|
+
66,
|
|
1616
|
+
104,
|
|
1617
|
+
65,
|
|
1618
|
+
153,
|
|
1619
|
+
45,
|
|
1620
|
+
15,
|
|
1621
|
+
176,
|
|
1622
|
+
84,
|
|
1623
|
+
187,
|
|
1624
|
+
22
|
|
1625
|
+
]);
|
|
1626
|
+
var INV_S_BOX = new Uint8Array([
|
|
1627
|
+
82,
|
|
1628
|
+
9,
|
|
1629
|
+
106,
|
|
1630
|
+
213,
|
|
1631
|
+
48,
|
|
1632
|
+
54,
|
|
1633
|
+
165,
|
|
1634
|
+
56,
|
|
1635
|
+
191,
|
|
1636
|
+
64,
|
|
1637
|
+
163,
|
|
1638
|
+
158,
|
|
1639
|
+
129,
|
|
1640
|
+
243,
|
|
1641
|
+
215,
|
|
1642
|
+
251,
|
|
1643
|
+
124,
|
|
1644
|
+
227,
|
|
1645
|
+
57,
|
|
1646
|
+
130,
|
|
1647
|
+
155,
|
|
1648
|
+
47,
|
|
1649
|
+
255,
|
|
1650
|
+
135,
|
|
1651
|
+
52,
|
|
1652
|
+
142,
|
|
1653
|
+
67,
|
|
1654
|
+
68,
|
|
1655
|
+
196,
|
|
1656
|
+
222,
|
|
1657
|
+
233,
|
|
1658
|
+
203,
|
|
1659
|
+
84,
|
|
1660
|
+
123,
|
|
1661
|
+
148,
|
|
1662
|
+
50,
|
|
1663
|
+
166,
|
|
1664
|
+
194,
|
|
1665
|
+
35,
|
|
1666
|
+
61,
|
|
1667
|
+
238,
|
|
1668
|
+
76,
|
|
1669
|
+
149,
|
|
1670
|
+
11,
|
|
1671
|
+
66,
|
|
1672
|
+
250,
|
|
1673
|
+
195,
|
|
1674
|
+
78,
|
|
1675
|
+
8,
|
|
1676
|
+
46,
|
|
1677
|
+
161,
|
|
1678
|
+
102,
|
|
1679
|
+
40,
|
|
1680
|
+
217,
|
|
1681
|
+
36,
|
|
1682
|
+
178,
|
|
1683
|
+
118,
|
|
1684
|
+
91,
|
|
1685
|
+
162,
|
|
1686
|
+
73,
|
|
1687
|
+
109,
|
|
1688
|
+
139,
|
|
1689
|
+
209,
|
|
1690
|
+
37,
|
|
1691
|
+
114,
|
|
1692
|
+
248,
|
|
1693
|
+
246,
|
|
1694
|
+
100,
|
|
1695
|
+
134,
|
|
1696
|
+
104,
|
|
1697
|
+
152,
|
|
1698
|
+
22,
|
|
1699
|
+
212,
|
|
1700
|
+
164,
|
|
1701
|
+
92,
|
|
1702
|
+
204,
|
|
1703
|
+
93,
|
|
1704
|
+
101,
|
|
1705
|
+
182,
|
|
1706
|
+
146,
|
|
1707
|
+
108,
|
|
1708
|
+
112,
|
|
1709
|
+
72,
|
|
1710
|
+
80,
|
|
1711
|
+
253,
|
|
1712
|
+
237,
|
|
1713
|
+
185,
|
|
1714
|
+
218,
|
|
1715
|
+
94,
|
|
1716
|
+
21,
|
|
1717
|
+
70,
|
|
1718
|
+
87,
|
|
1719
|
+
167,
|
|
1720
|
+
141,
|
|
1721
|
+
157,
|
|
1722
|
+
132,
|
|
1723
|
+
144,
|
|
1724
|
+
216,
|
|
1725
|
+
171,
|
|
1726
|
+
0,
|
|
1727
|
+
140,
|
|
1728
|
+
188,
|
|
1729
|
+
211,
|
|
1730
|
+
10,
|
|
1731
|
+
247,
|
|
1732
|
+
228,
|
|
1733
|
+
88,
|
|
1734
|
+
5,
|
|
1735
|
+
184,
|
|
1736
|
+
179,
|
|
1737
|
+
69,
|
|
1738
|
+
6,
|
|
1739
|
+
208,
|
|
1740
|
+
44,
|
|
1741
|
+
30,
|
|
1742
|
+
143,
|
|
1743
|
+
202,
|
|
1744
|
+
63,
|
|
1745
|
+
15,
|
|
1746
|
+
2,
|
|
1747
|
+
193,
|
|
1748
|
+
175,
|
|
1749
|
+
189,
|
|
1750
|
+
3,
|
|
1751
|
+
1,
|
|
1752
|
+
19,
|
|
1753
|
+
138,
|
|
1754
|
+
107,
|
|
1755
|
+
58,
|
|
1756
|
+
145,
|
|
1757
|
+
17,
|
|
1758
|
+
65,
|
|
1759
|
+
79,
|
|
1760
|
+
103,
|
|
1761
|
+
220,
|
|
1762
|
+
234,
|
|
1763
|
+
151,
|
|
1764
|
+
242,
|
|
1765
|
+
207,
|
|
1766
|
+
206,
|
|
1767
|
+
240,
|
|
1768
|
+
180,
|
|
1769
|
+
230,
|
|
1770
|
+
115,
|
|
1771
|
+
150,
|
|
1772
|
+
172,
|
|
1773
|
+
116,
|
|
1774
|
+
34,
|
|
1775
|
+
231,
|
|
1776
|
+
173,
|
|
1777
|
+
53,
|
|
1778
|
+
133,
|
|
1779
|
+
226,
|
|
1780
|
+
249,
|
|
1781
|
+
55,
|
|
1782
|
+
232,
|
|
1783
|
+
28,
|
|
1784
|
+
117,
|
|
1785
|
+
223,
|
|
1786
|
+
110,
|
|
1787
|
+
71,
|
|
1788
|
+
241,
|
|
1789
|
+
26,
|
|
1790
|
+
113,
|
|
1791
|
+
29,
|
|
1792
|
+
41,
|
|
1793
|
+
197,
|
|
1794
|
+
137,
|
|
1795
|
+
111,
|
|
1796
|
+
183,
|
|
1797
|
+
98,
|
|
1798
|
+
14,
|
|
1799
|
+
170,
|
|
1800
|
+
24,
|
|
1801
|
+
190,
|
|
1802
|
+
27,
|
|
1803
|
+
252,
|
|
1804
|
+
86,
|
|
1805
|
+
62,
|
|
1806
|
+
75,
|
|
1807
|
+
198,
|
|
1808
|
+
210,
|
|
1809
|
+
121,
|
|
1810
|
+
32,
|
|
1811
|
+
154,
|
|
1812
|
+
219,
|
|
1813
|
+
192,
|
|
1814
|
+
254,
|
|
1815
|
+
120,
|
|
1816
|
+
205,
|
|
1817
|
+
90,
|
|
1818
|
+
244,
|
|
1819
|
+
31,
|
|
1820
|
+
221,
|
|
1821
|
+
168,
|
|
1822
|
+
51,
|
|
1823
|
+
136,
|
|
1824
|
+
7,
|
|
1825
|
+
199,
|
|
1826
|
+
49,
|
|
1827
|
+
177,
|
|
1828
|
+
18,
|
|
1829
|
+
16,
|
|
1830
|
+
89,
|
|
1831
|
+
39,
|
|
1832
|
+
128,
|
|
1833
|
+
236,
|
|
1834
|
+
95,
|
|
1835
|
+
96,
|
|
1836
|
+
81,
|
|
1837
|
+
127,
|
|
1838
|
+
169,
|
|
1839
|
+
25,
|
|
1840
|
+
181,
|
|
1841
|
+
74,
|
|
1842
|
+
13,
|
|
1843
|
+
45,
|
|
1844
|
+
229,
|
|
1845
|
+
122,
|
|
1846
|
+
159,
|
|
1847
|
+
147,
|
|
1848
|
+
201,
|
|
1849
|
+
156,
|
|
1850
|
+
239,
|
|
1851
|
+
160,
|
|
1852
|
+
224,
|
|
1853
|
+
59,
|
|
1854
|
+
77,
|
|
1855
|
+
174,
|
|
1856
|
+
42,
|
|
1857
|
+
245,
|
|
1858
|
+
176,
|
|
1859
|
+
200,
|
|
1860
|
+
235,
|
|
1861
|
+
187,
|
|
1862
|
+
60,
|
|
1863
|
+
131,
|
|
1864
|
+
83,
|
|
1865
|
+
153,
|
|
1866
|
+
97,
|
|
1867
|
+
23,
|
|
1868
|
+
43,
|
|
1869
|
+
4,
|
|
1870
|
+
126,
|
|
1871
|
+
186,
|
|
1872
|
+
119,
|
|
1873
|
+
214,
|
|
1874
|
+
38,
|
|
1875
|
+
225,
|
|
1876
|
+
105,
|
|
1877
|
+
20,
|
|
1878
|
+
99,
|
|
1879
|
+
85,
|
|
1880
|
+
33,
|
|
1881
|
+
12,
|
|
1882
|
+
125
|
|
1883
|
+
]);
|
|
1884
|
+
var RCON = new Uint8Array([1, 2, 4, 8, 16, 32, 64, 128, 27, 54]);
|
|
1885
|
+
function gmul(a, b) {
|
|
1886
|
+
let p = 0;
|
|
1887
|
+
for (let i = 0; i < 8; i++) {
|
|
1888
|
+
if (b & 1) p ^= a;
|
|
1889
|
+
const hi = a & 128;
|
|
1890
|
+
a = a << 1 & 255;
|
|
1891
|
+
if (hi) a ^= 27;
|
|
1892
|
+
b >>= 1;
|
|
1893
|
+
}
|
|
1894
|
+
return p;
|
|
1895
|
+
}
|
|
1896
|
+
function expandKey(key) {
|
|
1897
|
+
const w = new Uint32Array(44);
|
|
1898
|
+
for (let i = 0; i < 4; i++) {
|
|
1899
|
+
w[i] = key[4 * i] << 24 | key[4 * i + 1] << 16 | key[4 * i + 2] << 8 | key[4 * i + 3];
|
|
1900
|
+
}
|
|
1901
|
+
for (let i = 4; i < 44; i++) {
|
|
1902
|
+
let temp = w[i - 1];
|
|
1903
|
+
if (i % 4 === 0) {
|
|
1904
|
+
temp = (temp << 8 | temp >>> 24) >>> 0;
|
|
1905
|
+
temp = S_BOX[temp >>> 24 & 255] << 24 | S_BOX[temp >>> 16 & 255] << 16 | S_BOX[temp >>> 8 & 255] << 8 | S_BOX[temp & 255];
|
|
1906
|
+
temp = (temp ^ RCON[i / 4 - 1] << 24) >>> 0;
|
|
1907
|
+
}
|
|
1908
|
+
w[i] = (w[i - 4] ^ temp) >>> 0;
|
|
1909
|
+
}
|
|
1910
|
+
return w;
|
|
1911
|
+
}
|
|
1912
|
+
function decryptBlock(block, roundKeys) {
|
|
1913
|
+
const s = new Uint8Array(16);
|
|
1914
|
+
for (let i = 0; i < 16; i++) s[i] = block[i];
|
|
1915
|
+
addRoundKey(s, roundKeys, 10);
|
|
1916
|
+
for (let round = 9; round >= 1; round--) {
|
|
1917
|
+
invShiftRows(s);
|
|
1918
|
+
invSubBytes(s);
|
|
1919
|
+
addRoundKey(s, roundKeys, round);
|
|
1920
|
+
invMixColumns(s);
|
|
1921
|
+
}
|
|
1922
|
+
invShiftRows(s);
|
|
1923
|
+
invSubBytes(s);
|
|
1924
|
+
addRoundKey(s, roundKeys, 0);
|
|
1925
|
+
return s;
|
|
1926
|
+
}
|
|
1927
|
+
function addRoundKey(s, w, round) {
|
|
1928
|
+
const base = round * 4;
|
|
1929
|
+
for (let c = 0; c < 4; c++) {
|
|
1930
|
+
const k = w[base + c];
|
|
1931
|
+
s[c * 4] ^= k >>> 24 & 255;
|
|
1932
|
+
s[c * 4 + 1] ^= k >>> 16 & 255;
|
|
1933
|
+
s[c * 4 + 2] ^= k >>> 8 & 255;
|
|
1934
|
+
s[c * 4 + 3] ^= k & 255;
|
|
1935
|
+
}
|
|
1936
|
+
}
|
|
1937
|
+
function invSubBytes(s) {
|
|
1938
|
+
for (let i = 0; i < 16; i++) s[i] = INV_S_BOX[s[i]];
|
|
1939
|
+
}
|
|
1940
|
+
function invShiftRows(s) {
|
|
1941
|
+
let t = s[13];
|
|
1942
|
+
s[13] = s[9];
|
|
1943
|
+
s[9] = s[5];
|
|
1944
|
+
s[5] = s[1];
|
|
1945
|
+
s[1] = t;
|
|
1946
|
+
t = s[2];
|
|
1947
|
+
s[2] = s[10];
|
|
1948
|
+
s[10] = t;
|
|
1949
|
+
t = s[6];
|
|
1950
|
+
s[6] = s[14];
|
|
1951
|
+
s[14] = t;
|
|
1952
|
+
t = s[3];
|
|
1953
|
+
s[3] = s[7];
|
|
1954
|
+
s[7] = s[11];
|
|
1955
|
+
s[11] = s[15];
|
|
1956
|
+
s[15] = t;
|
|
1957
|
+
}
|
|
1958
|
+
function invMixColumns(s) {
|
|
1959
|
+
for (let c = 0; c < 4; c++) {
|
|
1960
|
+
const i = c * 4;
|
|
1961
|
+
const a0 = s[i], a1 = s[i + 1], a2 = s[i + 2], a3 = s[i + 3];
|
|
1962
|
+
s[i] = gmul(a0, 14) ^ gmul(a1, 11) ^ gmul(a2, 13) ^ gmul(a3, 9);
|
|
1963
|
+
s[i + 1] = gmul(a0, 9) ^ gmul(a1, 14) ^ gmul(a2, 11) ^ gmul(a3, 13);
|
|
1964
|
+
s[i + 2] = gmul(a0, 13) ^ gmul(a1, 9) ^ gmul(a2, 14) ^ gmul(a3, 11);
|
|
1965
|
+
s[i + 3] = gmul(a0, 11) ^ gmul(a1, 13) ^ gmul(a2, 9) ^ gmul(a3, 14);
|
|
1966
|
+
}
|
|
1967
|
+
}
|
|
1968
|
+
function aes128EcbDecrypt(data, key) {
|
|
1969
|
+
if (key.length !== 16) throw new Error("AES-128 \uD0A4\uB294 16\uBC14\uC774\uD2B8\uC5EC\uC57C \uD569\uB2C8\uB2E4");
|
|
1970
|
+
if (data.length % 16 !== 0) throw new Error("AES ECB \uC785\uB825\uC740 16\uBC14\uC774\uD2B8\uC758 \uBC30\uC218\uC5EC\uC57C \uD569\uB2C8\uB2E4");
|
|
1971
|
+
const roundKeys = expandKey(key);
|
|
1972
|
+
const out = new Uint8Array(data.length);
|
|
1973
|
+
for (let offset = 0; offset < data.length; offset += 16) {
|
|
1974
|
+
const block = data.subarray(offset, offset + 16);
|
|
1975
|
+
const decrypted = decryptBlock(block, roundKeys);
|
|
1976
|
+
out.set(decrypted, offset);
|
|
1977
|
+
}
|
|
1978
|
+
return out;
|
|
1979
|
+
}
|
|
1980
|
+
|
|
1981
|
+
// src/hwp5/crypto.ts
|
|
1982
|
+
var MsvcLcg = class {
|
|
1983
|
+
seed;
|
|
1984
|
+
constructor(seed) {
|
|
1985
|
+
this.seed = seed >>> 0;
|
|
1986
|
+
}
|
|
1987
|
+
/** 0 ~ 0x7FFF 범위 난수 반환 (MSVC rand() 호환) */
|
|
1988
|
+
rand() {
|
|
1989
|
+
this.seed = Math.imul(this.seed, 214013) + 2531011 >>> 0;
|
|
1990
|
+
return this.seed >>> 16 & 32767;
|
|
1991
|
+
}
|
|
1992
|
+
};
|
|
1993
|
+
function decryptDistributePayload(payload) {
|
|
1994
|
+
if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
|
|
1995
|
+
const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
|
|
1996
|
+
const lcg = new MsvcLcg(seed);
|
|
1997
|
+
const result = new Uint8Array(256);
|
|
1998
|
+
result[0] = payload[0];
|
|
1999
|
+
result[1] = payload[1];
|
|
2000
|
+
result[2] = payload[2];
|
|
2001
|
+
result[3] = payload[3];
|
|
2002
|
+
let i = 4;
|
|
2003
|
+
while (i < 256) {
|
|
2004
|
+
const keyByte = lcg.rand() & 255;
|
|
2005
|
+
const n = (lcg.rand() & 15) + 1;
|
|
2006
|
+
for (let j = 0; j < n && i < 256; j++, i++) {
|
|
2007
|
+
result[i] = payload[i] ^ keyByte;
|
|
2008
|
+
}
|
|
2009
|
+
}
|
|
2010
|
+
return result;
|
|
2011
|
+
}
|
|
2012
|
+
function extractAesKey(decryptedPayload) {
|
|
2013
|
+
const offset = 4 + (decryptedPayload[0] & 15);
|
|
2014
|
+
if (offset + 16 > decryptedPayload.length) {
|
|
2015
|
+
throw new Error("AES \uD0A4 \uCD94\uCD9C \uC2E4\uD328: \uC624\uD504\uC14B\uC774 payload \uBC94\uC704\uB97C \uCD08\uACFC\uD569\uB2C8\uB2E4");
|
|
2016
|
+
}
|
|
2017
|
+
return decryptedPayload.slice(offset, offset + 16);
|
|
2018
|
+
}
|
|
2019
|
+
function parseRecordHeader(data, offset) {
|
|
2020
|
+
if (offset + 4 > data.length) throw new Error("\uB808\uCF54\uB4DC \uD5E4\uB354 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
|
|
2021
|
+
const header = (data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 | data[offset + 3] << 24) >>> 0;
|
|
2022
|
+
const tagId = header & 1023;
|
|
2023
|
+
let size = header >>> 20 & 4095;
|
|
2024
|
+
let headerSize = 4;
|
|
2025
|
+
if (size === 4095) {
|
|
2026
|
+
if (offset + 8 > data.length) throw new Error("\uD655\uC7A5 \uB808\uCF54\uB4DC \uD06C\uAE30 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
|
|
2027
|
+
size = (data[offset + 4] | data[offset + 5] << 8 | data[offset + 6] << 16 | data[offset + 7] << 24) >>> 0;
|
|
2028
|
+
headerSize = 8;
|
|
2029
|
+
}
|
|
2030
|
+
return { tagId, size, headerSize };
|
|
2031
|
+
}
|
|
2032
|
+
var TAG_DISTRIBUTE_DOC_DATA = 16 + 28;
|
|
2033
|
+
function decryptViewText(viewTextRaw, compressed) {
|
|
2034
|
+
const data = new Uint8Array(viewTextRaw);
|
|
2035
|
+
const rec = parseRecordHeader(data, 0);
|
|
2036
|
+
if (rec.tagId !== TAG_DISTRIBUTE_DOC_DATA) {
|
|
2037
|
+
throw new Error(`\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC758 \uCCAB \uB808\uCF54\uB4DC\uAC00 DISTRIBUTE_DOC_DATA(${TAG_DISTRIBUTE_DOC_DATA})\uAC00 \uC544\uB2D9\uB2C8\uB2E4 (\uC2E4\uC81C: ${rec.tagId})`);
|
|
2038
|
+
}
|
|
2039
|
+
const payloadStart = rec.headerSize;
|
|
2040
|
+
const payloadEnd = payloadStart + rec.size;
|
|
2041
|
+
if (payloadEnd > data.length || rec.size < 256) {
|
|
2042
|
+
throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 \uC720\uD6A8\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2043
|
+
}
|
|
2044
|
+
const payload = data.subarray(payloadStart, payloadStart + 256);
|
|
2045
|
+
const decryptedPayload = decryptDistributePayload(payload);
|
|
2046
|
+
const aesKey = extractAesKey(decryptedPayload);
|
|
2047
|
+
const encryptedStart = payloadEnd;
|
|
2048
|
+
const encryptedData = data.subarray(encryptedStart);
|
|
2049
|
+
if (encryptedData.length === 0) {
|
|
2050
|
+
throw new Error("\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC5D0 \uC554\uD638\uD654\uB41C \uBCF8\uBB38 \uB370\uC774\uD130\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2051
|
+
}
|
|
2052
|
+
const alignedLen = encryptedData.length - encryptedData.length % 16;
|
|
2053
|
+
if (alignedLen === 0) {
|
|
2054
|
+
throw new Error("\uC554\uD638\uD654\uB41C \uB370\uC774\uD130\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (16\uBC14\uC774\uD2B8 \uBBF8\uB9CC)");
|
|
2055
|
+
}
|
|
2056
|
+
const alignedData = encryptedData.subarray(0, alignedLen);
|
|
2057
|
+
const decrypted = aes128EcbDecrypt(alignedData, aesKey);
|
|
2058
|
+
if (compressed) {
|
|
2059
|
+
try {
|
|
2060
|
+
return decompressStream(Buffer.from(decrypted));
|
|
2061
|
+
} catch {
|
|
2062
|
+
return Buffer.from(decrypted);
|
|
2063
|
+
}
|
|
2064
|
+
}
|
|
2065
|
+
return Buffer.from(decrypted);
|
|
2066
|
+
}
|
|
2067
|
+
|
|
2068
|
+
// src/hwp5/cfb-lenient.ts
|
|
2069
|
+
var CFB_MAGIC = Buffer.from([208, 207, 17, 224, 161, 177, 26, 225]);
|
|
2070
|
+
var END_OF_CHAIN = 4294967294;
|
|
2071
|
+
var FREE_SECT = 4294967295;
|
|
2072
|
+
var MAX_CHAIN_LENGTH = 1e6;
|
|
2073
|
+
var MAX_DIR_ENTRIES = 1e5;
|
|
2074
|
+
var MAX_STREAM_SIZE = 100 * 1024 * 1024;
|
|
2075
|
+
function parseLenientCfb(data) {
|
|
2076
|
+
if (data.length < 512) throw new Error("CFB \uD30C\uC77C\uC774 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 512\uBC14\uC774\uD2B8)");
|
|
2077
|
+
if (!data.subarray(0, 8).equals(CFB_MAGIC)) throw new Error("CFB \uB9E4\uC9C1 \uBC14\uC774\uD2B8 \uBD88\uC77C\uCE58");
|
|
2078
|
+
const sectorSizeShift = data.readUInt16LE(30);
|
|
2079
|
+
if (sectorSizeShift < 7 || sectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + sectorSizeShift);
|
|
2080
|
+
const sectorSize = 1 << sectorSizeShift;
|
|
2081
|
+
const miniSectorSizeShift = data.readUInt16LE(32);
|
|
2082
|
+
if (miniSectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uBBF8\uB2C8 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + miniSectorSizeShift);
|
|
2083
|
+
const miniSectorSize = 1 << miniSectorSizeShift;
|
|
2084
|
+
const fatSectorCount = data.readUInt32LE(44);
|
|
2085
|
+
const firstDirSector = data.readUInt32LE(48);
|
|
2086
|
+
const miniStreamCutoff = data.readUInt32LE(56);
|
|
2087
|
+
const firstMiniFatSector = data.readUInt32LE(60);
|
|
2088
|
+
const miniFatSectorCount = data.readUInt32LE(64);
|
|
2089
|
+
const firstDifatSector = data.readUInt32LE(68);
|
|
2090
|
+
const difatSectorCount = data.readUInt32LE(72);
|
|
2091
|
+
function sectorOffset(id) {
|
|
2092
|
+
return 512 + id * sectorSize;
|
|
2093
|
+
}
|
|
2094
|
+
function readSectorData(id) {
|
|
2095
|
+
const off = sectorOffset(id);
|
|
2096
|
+
if (off + sectorSize > data.length) return Buffer.alloc(0);
|
|
2097
|
+
return data.subarray(off, off + sectorSize);
|
|
2098
|
+
}
|
|
2099
|
+
const fatSectors = [];
|
|
2100
|
+
for (let i = 0; i < 109 && fatSectors.length < fatSectorCount; i++) {
|
|
2101
|
+
const sid = data.readUInt32LE(76 + i * 4);
|
|
2102
|
+
if (sid === FREE_SECT || sid === END_OF_CHAIN) break;
|
|
2103
|
+
fatSectors.push(sid);
|
|
2104
|
+
}
|
|
2105
|
+
let difatSector = firstDifatSector;
|
|
2106
|
+
const visitedDifat = /* @__PURE__ */ new Set();
|
|
2107
|
+
for (let d = 0; d < difatSectorCount && difatSector !== END_OF_CHAIN && difatSector !== FREE_SECT; d++) {
|
|
2108
|
+
if (visitedDifat.has(difatSector)) break;
|
|
2109
|
+
visitedDifat.add(difatSector);
|
|
2110
|
+
const buf = readSectorData(difatSector);
|
|
2111
|
+
const entriesPerSector = sectorSize / 4 - 1;
|
|
2112
|
+
for (let i = 0; i < entriesPerSector && fatSectors.length < fatSectorCount; i++) {
|
|
2113
|
+
const sid = buf.readUInt32LE(i * 4);
|
|
2114
|
+
if (sid === FREE_SECT || sid === END_OF_CHAIN) continue;
|
|
2115
|
+
fatSectors.push(sid);
|
|
2116
|
+
}
|
|
2117
|
+
difatSector = buf.readUInt32LE(entriesPerSector * 4);
|
|
2118
|
+
}
|
|
2119
|
+
const entriesPerFatSector = sectorSize / 4;
|
|
2120
|
+
const fatTable = new Uint32Array(fatSectors.length * entriesPerFatSector);
|
|
2121
|
+
for (let fi = 0; fi < fatSectors.length; fi++) {
|
|
2122
|
+
const buf = readSectorData(fatSectors[fi]);
|
|
2123
|
+
for (let i = 0; i < entriesPerFatSector; i++) {
|
|
2124
|
+
fatTable[fi * entriesPerFatSector + i] = i * 4 + 3 < buf.length ? buf.readUInt32LE(i * 4) : FREE_SECT;
|
|
2125
|
+
}
|
|
2126
|
+
}
|
|
2127
|
+
function readChain(startSector, maxBytes) {
|
|
2128
|
+
if (startSector === END_OF_CHAIN || startSector === FREE_SECT) return Buffer.alloc(0);
|
|
2129
|
+
if (maxBytes > MAX_STREAM_SIZE) throw new Error("\uC2A4\uD2B8\uB9BC\uC774 \uB108\uBB34 \uD07D\uB2C8\uB2E4");
|
|
2130
|
+
const chunks = [];
|
|
2131
|
+
let current = startSector;
|
|
2132
|
+
let totalRead = 0;
|
|
2133
|
+
const visited = /* @__PURE__ */ new Set();
|
|
2134
|
+
while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < maxBytes) {
|
|
2135
|
+
if (visited.has(current)) break;
|
|
2136
|
+
if (visited.size > MAX_CHAIN_LENGTH) break;
|
|
2137
|
+
visited.add(current);
|
|
2138
|
+
const buf = readSectorData(current);
|
|
2139
|
+
const remaining = maxBytes - totalRead;
|
|
2140
|
+
chunks.push(remaining < sectorSize ? buf.subarray(0, remaining) : buf);
|
|
2141
|
+
totalRead += Math.min(buf.length, remaining);
|
|
2142
|
+
current = current < fatTable.length ? fatTable[current] : END_OF_CHAIN;
|
|
2143
|
+
}
|
|
2144
|
+
return Buffer.concat(chunks);
|
|
2145
|
+
}
|
|
2146
|
+
let miniFatTable = null;
|
|
2147
|
+
function getMiniFatTable() {
|
|
2148
|
+
if (miniFatTable) return miniFatTable;
|
|
2149
|
+
if (miniFatSectorCount === 0 || firstMiniFatSector === END_OF_CHAIN) {
|
|
2150
|
+
miniFatTable = new Uint32Array(0);
|
|
2151
|
+
return miniFatTable;
|
|
2152
|
+
}
|
|
2153
|
+
const miniFatData = readChain(firstMiniFatSector, miniFatSectorCount * sectorSize);
|
|
2154
|
+
const entries = miniFatData.length / 4;
|
|
2155
|
+
miniFatTable = new Uint32Array(entries);
|
|
2156
|
+
for (let i = 0; i < entries; i++) {
|
|
2157
|
+
miniFatTable[i] = miniFatData.readUInt32LE(i * 4);
|
|
2158
|
+
}
|
|
2159
|
+
return miniFatTable;
|
|
2160
|
+
}
|
|
2161
|
+
const dirData = readChain(firstDirSector, MAX_DIR_ENTRIES * 128);
|
|
2162
|
+
const dirEntries = [];
|
|
2163
|
+
for (let offset = 0; offset + 128 <= dirData.length && dirEntries.length < MAX_DIR_ENTRIES; offset += 128) {
|
|
2164
|
+
const nameLen = dirData.readUInt16LE(offset + 64);
|
|
2165
|
+
if (nameLen <= 0 || nameLen > 64) {
|
|
2166
|
+
dirEntries.push({ name: "", type: 0, startSector: 0, size: 0 });
|
|
2167
|
+
continue;
|
|
2168
|
+
}
|
|
2169
|
+
const nameBytes = nameLen - 2;
|
|
2170
|
+
const name = nameBytes > 0 ? dirData.subarray(offset, offset + nameBytes).toString("utf16le") : "";
|
|
2171
|
+
const type = dirData[offset + 66];
|
|
2172
|
+
const startSector = dirData.readUInt32LE(offset + 116);
|
|
2173
|
+
const size = dirData.readUInt32LE(offset + 120);
|
|
2174
|
+
dirEntries.push({ name, type, startSector, size });
|
|
2175
|
+
}
|
|
2176
|
+
let miniStreamData = null;
|
|
2177
|
+
function getMiniStream() {
|
|
2178
|
+
if (miniStreamData) return miniStreamData;
|
|
2179
|
+
const root = dirEntries[0];
|
|
2180
|
+
if (!root || root.type !== 5) {
|
|
2181
|
+
miniStreamData = Buffer.alloc(0);
|
|
2182
|
+
return miniStreamData;
|
|
2183
|
+
}
|
|
2184
|
+
miniStreamData = readChain(root.startSector, root.size || MAX_STREAM_SIZE);
|
|
2185
|
+
return miniStreamData;
|
|
2186
|
+
}
|
|
2187
|
+
function readMiniStream(startSector, size) {
|
|
2188
|
+
const mft = getMiniFatTable();
|
|
2189
|
+
const ms = getMiniStream();
|
|
2190
|
+
if (mft.length === 0 || ms.length === 0) return Buffer.alloc(0);
|
|
2191
|
+
const chunks = [];
|
|
2192
|
+
let current = startSector;
|
|
2193
|
+
let totalRead = 0;
|
|
2194
|
+
const visited = /* @__PURE__ */ new Set();
|
|
2195
|
+
while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < size) {
|
|
2196
|
+
if (visited.has(current)) break;
|
|
2197
|
+
if (visited.size > MAX_CHAIN_LENGTH) break;
|
|
2198
|
+
visited.add(current);
|
|
2199
|
+
const off = current * miniSectorSize;
|
|
2200
|
+
const remaining = size - totalRead;
|
|
2201
|
+
const chunkSize = Math.min(miniSectorSize, remaining);
|
|
2202
|
+
if (off + chunkSize <= ms.length) {
|
|
2203
|
+
chunks.push(ms.subarray(off, off + chunkSize));
|
|
2204
|
+
}
|
|
2205
|
+
totalRead += chunkSize;
|
|
2206
|
+
current = current < mft.length ? mft[current] : END_OF_CHAIN;
|
|
2207
|
+
}
|
|
2208
|
+
return Buffer.concat(chunks);
|
|
2209
|
+
}
|
|
2210
|
+
function readStreamData(entry) {
|
|
2211
|
+
if (entry.size === 0) return Buffer.alloc(0);
|
|
2212
|
+
if (entry.size < miniStreamCutoff) {
|
|
2213
|
+
const miniResult = readMiniStream(entry.startSector, entry.size);
|
|
2214
|
+
if (miniResult.length > 0) return miniResult;
|
|
2215
|
+
}
|
|
2216
|
+
return readChain(entry.startSector, entry.size);
|
|
2217
|
+
}
|
|
2218
|
+
function findEntryByPath(path) {
|
|
2219
|
+
const parts = path.replace(/^\//, "").split("/");
|
|
2220
|
+
if (parts.length === 1) {
|
|
2221
|
+
return dirEntries.find((e) => e.name === parts[0] && e.type === 2) ?? null;
|
|
2222
|
+
}
|
|
2223
|
+
const storageName = parts[0];
|
|
2224
|
+
const streamName = parts.slice(1).join("/");
|
|
2225
|
+
for (const e of dirEntries) {
|
|
2226
|
+
if (e.type === 2 && e.name === streamName) {
|
|
2227
|
+
return e;
|
|
2228
|
+
}
|
|
2229
|
+
}
|
|
2230
|
+
const lastPart = parts[parts.length - 1];
|
|
2231
|
+
return dirEntries.find((e) => e.type === 2 && e.name === lastPart) ?? null;
|
|
2232
|
+
}
|
|
2233
|
+
return {
|
|
2234
|
+
findStream(path) {
|
|
2235
|
+
const normalized = path.replace(/^\//, "");
|
|
2236
|
+
const entry = findEntryByPath(normalized);
|
|
2237
|
+
if (!entry || entry.type !== 2) return null;
|
|
2238
|
+
const stream = readStreamData(entry);
|
|
2239
|
+
return stream.length > 0 ? stream : null;
|
|
2240
|
+
},
|
|
2241
|
+
entries() {
|
|
2242
|
+
return dirEntries.filter((e) => e.type === 2);
|
|
2243
|
+
}
|
|
2244
|
+
};
|
|
2245
|
+
}
|
|
2246
|
+
|
|
1315
2247
|
// src/hwp5/parser.ts
|
|
1316
2248
|
init_page_range();
|
|
1317
2249
|
var import_module = require("module");
|
|
@@ -1321,21 +2253,40 @@ var CFB = require2("cfb");
|
|
|
1321
2253
|
var MAX_SECTIONS = 100;
|
|
1322
2254
|
var MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024;
|
|
1323
2255
|
function parseHwp5Document(buffer, options) {
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
2256
|
+
let cfb = null;
|
|
2257
|
+
let lenientCfb = null;
|
|
2258
|
+
const warnings = [];
|
|
2259
|
+
try {
|
|
2260
|
+
cfb = CFB.parse(buffer);
|
|
2261
|
+
} catch {
|
|
2262
|
+
try {
|
|
2263
|
+
lenientCfb = parseLenientCfb(buffer);
|
|
2264
|
+
warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
|
|
2265
|
+
} catch {
|
|
2266
|
+
throw new KordocError("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
|
|
2267
|
+
}
|
|
2268
|
+
}
|
|
2269
|
+
const findStream = (path) => {
|
|
2270
|
+
if (cfb) {
|
|
2271
|
+
const entry = CFB.find(cfb, path);
|
|
2272
|
+
return entry?.content ? Buffer.from(entry.content) : null;
|
|
2273
|
+
}
|
|
2274
|
+
return lenientCfb.findStream(path);
|
|
2275
|
+
};
|
|
2276
|
+
const headerData = findStream("/FileHeader");
|
|
2277
|
+
if (!headerData) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
|
|
2278
|
+
const header = parseFileHeader(headerData);
|
|
1328
2279
|
if (header.signature !== "HWP Document File") throw new KordocError("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
1329
2280
|
if (header.flags & FLAG_ENCRYPTED) throw new KordocError("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
1330
2281
|
if (header.flags & FLAG_DRM) throw new KordocError("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
1331
2282
|
const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
|
|
2283
|
+
const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
|
|
1332
2284
|
const metadata = {
|
|
1333
2285
|
version: `${header.versionMajor}.x`
|
|
1334
2286
|
};
|
|
1335
|
-
extractHwp5Metadata(cfb, metadata);
|
|
1336
|
-
const docInfo = parseDocInfoStream(cfb, compressed);
|
|
1337
|
-
const
|
|
1338
|
-
const sections = findSections(cfb);
|
|
2287
|
+
if (cfb) extractHwp5Metadata(cfb, metadata);
|
|
2288
|
+
const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
|
|
2289
|
+
const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
|
|
1339
2290
|
if (sections.length === 0) throw new KordocError("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
1340
2291
|
metadata.pageCount = sections.length;
|
|
1341
2292
|
const pageFilter = options?.pages ? parsePageRange(options.pages, sections.length) : null;
|
|
@@ -1347,7 +2298,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
1347
2298
|
if (pageFilter && !pageFilter.has(si + 1)) continue;
|
|
1348
2299
|
try {
|
|
1349
2300
|
const sectionData = sections[si];
|
|
1350
|
-
const data = compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
2301
|
+
const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
1351
2302
|
totalDecompressed += data.length;
|
|
1352
2303
|
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new KordocError("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
1353
2304
|
const records = readRecords(data);
|
|
@@ -1360,7 +2311,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
1360
2311
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
1361
2312
|
}
|
|
1362
2313
|
}
|
|
1363
|
-
const images = extractHwp5Images(cfb, blocks, compressed, warnings);
|
|
2314
|
+
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
1364
2315
|
if (docInfo) {
|
|
1365
2316
|
detectHwp5Headings(blocks, docInfo);
|
|
1366
2317
|
}
|
|
@@ -1379,6 +2330,15 @@ function parseDocInfoStream(cfb, compressed) {
|
|
|
1379
2330
|
return null;
|
|
1380
2331
|
}
|
|
1381
2332
|
}
|
|
2333
|
+
function parseDocInfoFromStream(raw, compressed) {
|
|
2334
|
+
if (!raw) return null;
|
|
2335
|
+
try {
|
|
2336
|
+
const data = compressed ? decompressStream(raw) : raw;
|
|
2337
|
+
return parseDocInfo(readRecords(data));
|
|
2338
|
+
} catch {
|
|
2339
|
+
return null;
|
|
2340
|
+
}
|
|
2341
|
+
}
|
|
1382
2342
|
function detectHwp5Headings(blocks, docInfo) {
|
|
1383
2343
|
let baseFontSize = 0;
|
|
1384
2344
|
for (const style of docInfo.styles) {
|
|
@@ -1458,6 +2418,20 @@ function extractHwp5Metadata(cfb, metadata) {
|
|
|
1458
2418
|
} catch {
|
|
1459
2419
|
}
|
|
1460
2420
|
}
|
|
2421
|
+
function findViewTextSections(cfb, compressed) {
|
|
2422
|
+
const sections = [];
|
|
2423
|
+
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2424
|
+
const entry = CFB.find(cfb, `/ViewText/Section${i}`);
|
|
2425
|
+
if (!entry?.content) break;
|
|
2426
|
+
try {
|
|
2427
|
+
const decrypted = decryptViewText(Buffer.from(entry.content), compressed);
|
|
2428
|
+
sections.push({ idx: i, content: decrypted });
|
|
2429
|
+
} catch {
|
|
2430
|
+
break;
|
|
2431
|
+
}
|
|
2432
|
+
}
|
|
2433
|
+
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
2434
|
+
}
|
|
1461
2435
|
function findSections(cfb) {
|
|
1462
2436
|
const sections = [];
|
|
1463
2437
|
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
@@ -1476,6 +2450,38 @@ function findSections(cfb) {
|
|
|
1476
2450
|
}
|
|
1477
2451
|
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
1478
2452
|
}
|
|
2453
|
+
function findSectionsLenient(lcfb, compressed) {
|
|
2454
|
+
const sections = [];
|
|
2455
|
+
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2456
|
+
const raw = lcfb.findStream(`/BodyText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
|
|
2457
|
+
if (!raw) break;
|
|
2458
|
+
sections.push({ idx: i, content: compressed ? decompressStream(raw) : raw });
|
|
2459
|
+
}
|
|
2460
|
+
if (sections.length === 0) {
|
|
2461
|
+
for (const e of lcfb.entries()) {
|
|
2462
|
+
if (sections.length >= MAX_SECTIONS) break;
|
|
2463
|
+
if (e.name.startsWith("Section")) {
|
|
2464
|
+
const idx = parseInt(e.name.replace("Section", ""), 10) || 0;
|
|
2465
|
+
const raw = lcfb.findStream(e.name);
|
|
2466
|
+
if (raw) sections.push({ idx, content: compressed ? decompressStream(raw) : raw });
|
|
2467
|
+
}
|
|
2468
|
+
}
|
|
2469
|
+
}
|
|
2470
|
+
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
2471
|
+
}
|
|
2472
|
+
function findViewTextSectionsLenient(lcfb, compressed) {
|
|
2473
|
+
const sections = [];
|
|
2474
|
+
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2475
|
+
const raw = lcfb.findStream(`/ViewText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
|
|
2476
|
+
if (!raw) break;
|
|
2477
|
+
try {
|
|
2478
|
+
sections.push({ idx: i, content: decryptViewText(raw, compressed) });
|
|
2479
|
+
} catch {
|
|
2480
|
+
break;
|
|
2481
|
+
}
|
|
2482
|
+
}
|
|
2483
|
+
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
2484
|
+
}
|
|
1479
2485
|
var TAG_SHAPE_COMPONENT = 74;
|
|
1480
2486
|
function extractBinDataId(records, ctrlIdx) {
|
|
1481
2487
|
const ctrlLevel = records[ctrlIdx].level;
|
|
@@ -1550,6 +2556,53 @@ function extractHwp5Images(cfb, blocks, compressed, warnings) {
|
|
|
1550
2556
|
}
|
|
1551
2557
|
return images;
|
|
1552
2558
|
}
|
|
2559
|
+
function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
|
|
2560
|
+
const binDataMap = /* @__PURE__ */ new Map();
|
|
2561
|
+
const binRe = /^BIN(\d{4})/i;
|
|
2562
|
+
for (const e of lcfb.entries()) {
|
|
2563
|
+
const match = e.name.match(binRe);
|
|
2564
|
+
if (!match) continue;
|
|
2565
|
+
const idx = parseInt(match[1], 10);
|
|
2566
|
+
let raw = lcfb.findStream(e.name);
|
|
2567
|
+
if (!raw) continue;
|
|
2568
|
+
if (compressed) {
|
|
2569
|
+
try {
|
|
2570
|
+
raw = decompressStream(raw);
|
|
2571
|
+
} catch {
|
|
2572
|
+
}
|
|
2573
|
+
}
|
|
2574
|
+
binDataMap.set(idx, { data: raw, name: e.name });
|
|
2575
|
+
}
|
|
2576
|
+
if (binDataMap.size === 0) return [];
|
|
2577
|
+
const images = [];
|
|
2578
|
+
let imageIndex = 0;
|
|
2579
|
+
for (const block of blocks) {
|
|
2580
|
+
if (block.type !== "image" || !block.text) continue;
|
|
2581
|
+
const binId = parseInt(block.text, 10);
|
|
2582
|
+
if (isNaN(binId)) continue;
|
|
2583
|
+
const bin = binDataMap.get(binId);
|
|
2584
|
+
if (!bin) {
|
|
2585
|
+
warnings.push({ page: block.pageNumber, message: `BinData ${binId} \uFFFD\uFFFD\uFFFD\uC74C`, code: "SKIPPED_IMAGE" });
|
|
2586
|
+
block.type = "paragraph";
|
|
2587
|
+
block.text = `[\uC774\uBBF8\uC9C0: BinData ${binId}]`;
|
|
2588
|
+
continue;
|
|
2589
|
+
}
|
|
2590
|
+
const mime = detectImageMime(bin.data);
|
|
2591
|
+
if (!mime) {
|
|
2592
|
+
warnings.push({ page: block.pageNumber, message: `BinData ${binId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
|
|
2593
|
+
block.type = "paragraph";
|
|
2594
|
+
block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
|
|
2595
|
+
continue;
|
|
2596
|
+
}
|
|
2597
|
+
imageIndex++;
|
|
2598
|
+
const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
|
|
2599
|
+
const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
|
|
2600
|
+
images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
|
|
2601
|
+
block.text = filename;
|
|
2602
|
+
block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
|
|
2603
|
+
}
|
|
2604
|
+
return images;
|
|
2605
|
+
}
|
|
1553
2606
|
function parseSection(records, docInfo, warnings, sectionNum) {
|
|
1554
2607
|
const blocks = [];
|
|
1555
2608
|
let i = 0;
|
|
@@ -1586,12 +2639,61 @@ function parseSection(records, docInfo, warnings, sectionNum) {
|
|
|
1586
2639
|
}
|
|
1587
2640
|
} else if (ctrlId === " elo" || ctrlId === "ole ") {
|
|
1588
2641
|
warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
|
|
2642
|
+
} else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
|
|
2643
|
+
const noteText = extractNoteText(records, i);
|
|
2644
|
+
if (noteText && blocks.length > 0) {
|
|
2645
|
+
const lastBlock = blocks[blocks.length - 1];
|
|
2646
|
+
if (lastBlock.type === "paragraph") {
|
|
2647
|
+
lastBlock.footnoteText = lastBlock.footnoteText ? lastBlock.footnoteText + "; " + noteText : noteText;
|
|
2648
|
+
}
|
|
2649
|
+
}
|
|
2650
|
+
} else if (ctrlId === "%tok" || ctrlId === "klnk") {
|
|
2651
|
+
const url = extractHyperlinkUrl(rec.data);
|
|
2652
|
+
if (url && blocks.length > 0) {
|
|
2653
|
+
const lastBlock = blocks[blocks.length - 1];
|
|
2654
|
+
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
2655
|
+
lastBlock.href = sanitizeHref(url) ?? void 0;
|
|
2656
|
+
}
|
|
2657
|
+
}
|
|
1589
2658
|
}
|
|
1590
2659
|
}
|
|
1591
2660
|
i++;
|
|
1592
2661
|
}
|
|
1593
2662
|
return blocks;
|
|
1594
2663
|
}
|
|
2664
|
+
function extractNoteText(records, ctrlIdx) {
|
|
2665
|
+
const ctrlLevel = records[ctrlIdx].level;
|
|
2666
|
+
const texts = [];
|
|
2667
|
+
for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 100; j++) {
|
|
2668
|
+
const r = records[j];
|
|
2669
|
+
if (r.level <= ctrlLevel) break;
|
|
2670
|
+
if (r.tagId === TAG_PARA_TEXT) {
|
|
2671
|
+
const t = extractText(r.data).trim();
|
|
2672
|
+
if (t) texts.push(t);
|
|
2673
|
+
}
|
|
2674
|
+
}
|
|
2675
|
+
return texts.length > 0 ? texts.join(" ") : null;
|
|
2676
|
+
}
|
|
2677
|
+
function extractHyperlinkUrl(data) {
|
|
2678
|
+
try {
|
|
2679
|
+
const httpSig = Buffer.from("http", "utf16le");
|
|
2680
|
+
const idx = data.indexOf(httpSig);
|
|
2681
|
+
if (idx >= 0) {
|
|
2682
|
+
let end = idx;
|
|
2683
|
+
while (end + 1 < data.length) {
|
|
2684
|
+
const ch = data.readUInt16LE(end);
|
|
2685
|
+
if (ch === 0) break;
|
|
2686
|
+
end += 2;
|
|
2687
|
+
}
|
|
2688
|
+
const url = data.subarray(idx, end).toString("utf16le");
|
|
2689
|
+
if (/^https?:\/\/.+/.test(url) && url.length < 2e3) {
|
|
2690
|
+
return url;
|
|
2691
|
+
}
|
|
2692
|
+
}
|
|
2693
|
+
} catch {
|
|
2694
|
+
}
|
|
2695
|
+
return null;
|
|
2696
|
+
}
|
|
1595
2697
|
function resolveCharStyle(charShapeIds, docInfo) {
|
|
1596
2698
|
if (charShapeIds.length === 0 || docInfo.charShapes.length === 0) return void 0;
|
|
1597
2699
|
const freq = /* @__PURE__ */ new Map();
|