kordoc 1.8.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -3
- package/dist/{chunk-UUKFY5P5.js → chunk-UMO6QQO5.js} +2 -2
- package/dist/{chunk-QQ6PZADA.js → chunk-UUHAAZYN.js} +1135 -33
- package/dist/chunk-UUHAAZYN.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/index.cjs +1204 -102
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1204 -102
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-OTCR2KMY.js → utils-K23YMTIM.js} +2 -2
- package/dist/{watch-JFDOENIO.js → watch-CGG7CCHJ.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-QQ6PZADA.js.map +0 -1
- /package/dist/{chunk-UUKFY5P5.js.map → chunk-UMO6QQO5.js.map} +0 -0
- /package/dist/{utils-OTCR2KMY.js.map → utils-K23YMTIM.js.map} +0 -0
- /package/dist/{watch-JFDOENIO.js.map → watch-CGG7CCHJ.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -137,35 +137,106 @@ import JSZip2 from "jszip";
|
|
|
137
137
|
import { inflateRawSync } from "zlib";
|
|
138
138
|
import { DOMParser } from "@xmldom/xmldom";
|
|
139
139
|
|
|
140
|
-
// src/
|
|
140
|
+
// src/utils.ts
|
|
141
|
+
var VERSION = true ? "2.0.0" : "0.0.0-dev";
|
|
142
|
+
function toArrayBuffer(buf) {
|
|
143
|
+
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
144
|
+
return buf.buffer;
|
|
145
|
+
}
|
|
146
|
+
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
147
|
+
}
|
|
148
|
+
var KordocError = class extends Error {
|
|
149
|
+
constructor(message) {
|
|
150
|
+
super(message);
|
|
151
|
+
this.name = "KordocError";
|
|
152
|
+
}
|
|
153
|
+
};
|
|
154
|
+
function isPathTraversal(name) {
|
|
155
|
+
if (name.includes("\0")) return true;
|
|
156
|
+
const normalized = name.replace(/\\/g, "/");
|
|
157
|
+
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
158
|
+
}
|
|
159
|
+
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
160
|
+
try {
|
|
161
|
+
const data = new DataView(buffer);
|
|
162
|
+
const len = buffer.byteLength;
|
|
163
|
+
let eocdOffset = -1;
|
|
164
|
+
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
165
|
+
if (data.getUint32(i, true) === 101010256) {
|
|
166
|
+
eocdOffset = i;
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
171
|
+
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
172
|
+
if (entryCount > maxEntries) {
|
|
173
|
+
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
174
|
+
}
|
|
175
|
+
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
176
|
+
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
177
|
+
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
178
|
+
let totalUncompressed = 0;
|
|
179
|
+
let pos = cdOffset;
|
|
180
|
+
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
181
|
+
if (data.getUint32(pos, true) !== 33639248) break;
|
|
182
|
+
totalUncompressed += data.getUint32(pos + 24, true);
|
|
183
|
+
const nameLen = data.getUint16(pos + 28, true);
|
|
184
|
+
const extraLen = data.getUint16(pos + 30, true);
|
|
185
|
+
const commentLen = data.getUint16(pos + 32, true);
|
|
186
|
+
pos += 46 + nameLen + extraLen + commentLen;
|
|
187
|
+
}
|
|
188
|
+
if (totalUncompressed > maxUncompressedSize) {
|
|
189
|
+
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
190
|
+
}
|
|
191
|
+
return { totalUncompressed, entryCount };
|
|
192
|
+
} catch (err) {
|
|
193
|
+
if (err instanceof KordocError) throw err;
|
|
194
|
+
return { totalUncompressed: 0, entryCount: 0 };
|
|
195
|
+
}
|
|
196
|
+
}
|
|
141
197
|
var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
142
198
|
function sanitizeHref(href) {
|
|
143
199
|
const trimmed = href.trim();
|
|
144
200
|
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
145
201
|
return trimmed;
|
|
146
202
|
}
|
|
203
|
+
function classifyError(err) {
|
|
204
|
+
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
205
|
+
const msg = err.message;
|
|
206
|
+
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
207
|
+
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
208
|
+
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
209
|
+
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
210
|
+
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
211
|
+
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
212
|
+
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
213
|
+
return "PARSE_ERROR";
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// src/table/builder.ts
|
|
147
217
|
var MAX_COLS = 200;
|
|
148
218
|
var MAX_ROWS = 1e4;
|
|
149
219
|
function buildTable(rows) {
|
|
150
220
|
if (rows.length > MAX_ROWS) rows = rows.slice(0, MAX_ROWS);
|
|
151
221
|
const numRows = rows.length;
|
|
152
|
-
const
|
|
222
|
+
const hasAddr = rows.some((row) => row.some((c) => c.colAddr !== void 0 && c.rowAddr !== void 0));
|
|
223
|
+
if (hasAddr) return buildTableDirect(rows, numRows);
|
|
153
224
|
let maxCols = 0;
|
|
225
|
+
const tempOccupied = Array.from({ length: numRows }, () => []);
|
|
154
226
|
for (let rowIdx = 0; rowIdx < numRows; rowIdx++) {
|
|
155
227
|
let colIdx = 0;
|
|
156
228
|
for (const cell of rows[rowIdx]) {
|
|
157
|
-
while (colIdx < MAX_COLS && tempOccupied
|
|
229
|
+
while (colIdx < MAX_COLS && tempOccupied[rowIdx][colIdx]) colIdx++;
|
|
158
230
|
if (colIdx >= MAX_COLS) break;
|
|
159
231
|
for (let r = rowIdx; r < Math.min(rowIdx + cell.rowSpan, numRows); r++) {
|
|
160
232
|
for (let c = colIdx; c < Math.min(colIdx + cell.colSpan, MAX_COLS); c++) {
|
|
161
|
-
tempOccupied
|
|
233
|
+
tempOccupied[r][c] = true;
|
|
162
234
|
}
|
|
163
235
|
}
|
|
164
236
|
colIdx += cell.colSpan;
|
|
165
237
|
if (colIdx > maxCols) maxCols = colIdx;
|
|
166
238
|
}
|
|
167
239
|
}
|
|
168
|
-
tempOccupied.clear();
|
|
169
240
|
if (maxCols === 0) return { rows: 0, cols: 0, cells: [], hasHeader: false };
|
|
170
241
|
const grid = Array.from(
|
|
171
242
|
{ length: numRows },
|
|
@@ -193,6 +264,40 @@ function buildTable(rows) {
|
|
|
193
264
|
cellIdx++;
|
|
194
265
|
}
|
|
195
266
|
}
|
|
267
|
+
return trimAndReturn(grid, numRows, maxCols);
|
|
268
|
+
}
|
|
269
|
+
function buildTableDirect(rows, numRows) {
|
|
270
|
+
let maxCols = 0;
|
|
271
|
+
for (const row of rows) {
|
|
272
|
+
for (const cell of row) {
|
|
273
|
+
const end = (cell.colAddr ?? 0) + cell.colSpan;
|
|
274
|
+
if (end > maxCols) maxCols = end;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
if (maxCols === 0) return { rows: 0, cols: 0, cells: [], hasHeader: false };
|
|
278
|
+
const grid = Array.from(
|
|
279
|
+
{ length: numRows },
|
|
280
|
+
() => Array.from({ length: maxCols }, () => ({ text: "", colSpan: 1, rowSpan: 1 }))
|
|
281
|
+
);
|
|
282
|
+
for (const row of rows) {
|
|
283
|
+
for (const cell of row) {
|
|
284
|
+
const r = cell.rowAddr ?? 0;
|
|
285
|
+
const c = cell.colAddr ?? 0;
|
|
286
|
+
if (r >= numRows || c >= maxCols) continue;
|
|
287
|
+
grid[r][c] = { text: cell.text.trim(), colSpan: cell.colSpan, rowSpan: cell.rowSpan };
|
|
288
|
+
for (let dr = 0; dr < cell.rowSpan; dr++) {
|
|
289
|
+
for (let dc = 0; dc < cell.colSpan; dc++) {
|
|
290
|
+
if (dr === 0 && dc === 0) continue;
|
|
291
|
+
if (r + dr < numRows && c + dc < maxCols) {
|
|
292
|
+
grid[r + dr][c + dc] = { text: "", colSpan: 1, rowSpan: 1 };
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
return trimAndReturn(grid, numRows, maxCols);
|
|
299
|
+
}
|
|
300
|
+
function trimAndReturn(grid, numRows, maxCols) {
|
|
196
301
|
let effectiveCols = maxCols;
|
|
197
302
|
while (effectiveCols > 0) {
|
|
198
303
|
const colEmpty = grid.every((row) => !row[effectiveCols - 1]?.text?.trim());
|
|
@@ -308,12 +413,10 @@ function tableToMarkdown(table) {
|
|
|
308
413
|
const display = Array.from({ length: numRows }, () => Array(numCols).fill(""));
|
|
309
414
|
const skip = /* @__PURE__ */ new Set();
|
|
310
415
|
for (let r = 0; r < numRows; r++) {
|
|
311
|
-
let cellIdx = 0;
|
|
312
416
|
for (let c = 0; c < numCols; c++) {
|
|
313
417
|
if (skip.has(`${r},${c}`)) continue;
|
|
314
|
-
const cell = cells[r]?.[
|
|
315
|
-
if (!cell)
|
|
316
|
-
cellIdx++;
|
|
418
|
+
const cell = cells[r]?.[c];
|
|
419
|
+
if (!cell) continue;
|
|
317
420
|
display[r][c] = sanitizeText(cell.text).replace(/\n/g, "<br>");
|
|
318
421
|
for (let dr = 0; dr < cell.rowSpan; dr++) {
|
|
319
422
|
for (let dc = 0; dc < cell.colSpan; dc++) {
|
|
@@ -361,82 +464,6 @@ var HEADING_RATIO_H1 = 1.5;
|
|
|
361
464
|
var HEADING_RATIO_H2 = 1.3;
|
|
362
465
|
var HEADING_RATIO_H3 = 1.15;
|
|
363
466
|
|
|
364
|
-
// src/utils.ts
|
|
365
|
-
var VERSION = true ? "1.8.0" : "0.0.0-dev";
|
|
366
|
-
function toArrayBuffer(buf) {
|
|
367
|
-
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
368
|
-
return buf.buffer;
|
|
369
|
-
}
|
|
370
|
-
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
371
|
-
}
|
|
372
|
-
var KordocError = class extends Error {
|
|
373
|
-
constructor(message) {
|
|
374
|
-
super(message);
|
|
375
|
-
this.name = "KordocError";
|
|
376
|
-
}
|
|
377
|
-
};
|
|
378
|
-
function isPathTraversal(name) {
|
|
379
|
-
if (name.includes("\0")) return true;
|
|
380
|
-
const normalized = name.replace(/\\/g, "/");
|
|
381
|
-
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
382
|
-
}
|
|
383
|
-
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
384
|
-
try {
|
|
385
|
-
const data = new DataView(buffer);
|
|
386
|
-
const len = buffer.byteLength;
|
|
387
|
-
let eocdOffset = -1;
|
|
388
|
-
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
389
|
-
if (data.getUint32(i, true) === 101010256) {
|
|
390
|
-
eocdOffset = i;
|
|
391
|
-
break;
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
395
|
-
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
396
|
-
if (entryCount > maxEntries) {
|
|
397
|
-
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
398
|
-
}
|
|
399
|
-
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
400
|
-
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
401
|
-
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
402
|
-
let totalUncompressed = 0;
|
|
403
|
-
let pos = cdOffset;
|
|
404
|
-
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
405
|
-
if (data.getUint32(pos, true) !== 33639248) break;
|
|
406
|
-
totalUncompressed += data.getUint32(pos + 24, true);
|
|
407
|
-
const nameLen = data.getUint16(pos + 28, true);
|
|
408
|
-
const extraLen = data.getUint16(pos + 30, true);
|
|
409
|
-
const commentLen = data.getUint16(pos + 32, true);
|
|
410
|
-
pos += 46 + nameLen + extraLen + commentLen;
|
|
411
|
-
}
|
|
412
|
-
if (totalUncompressed > maxUncompressedSize) {
|
|
413
|
-
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
414
|
-
}
|
|
415
|
-
return { totalUncompressed, entryCount };
|
|
416
|
-
} catch (err) {
|
|
417
|
-
if (err instanceof KordocError) throw err;
|
|
418
|
-
return { totalUncompressed: 0, entryCount: 0 };
|
|
419
|
-
}
|
|
420
|
-
}
|
|
421
|
-
var SAFE_HREF_RE2 = /^(?:https?:|mailto:|tel:|#)/i;
|
|
422
|
-
function sanitizeHref2(href) {
|
|
423
|
-
const trimmed = href.trim();
|
|
424
|
-
if (!trimmed || !SAFE_HREF_RE2.test(trimmed)) return null;
|
|
425
|
-
return trimmed;
|
|
426
|
-
}
|
|
427
|
-
function classifyError(err) {
|
|
428
|
-
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
429
|
-
const msg = err.message;
|
|
430
|
-
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
431
|
-
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
432
|
-
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
433
|
-
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
434
|
-
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
435
|
-
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
436
|
-
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
437
|
-
return "PARSE_ERROR";
|
|
438
|
-
}
|
|
439
|
-
|
|
440
467
|
// src/hwpx/parser.ts
|
|
441
468
|
init_page_range();
|
|
442
469
|
var MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024;
|
|
@@ -909,6 +936,14 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
|
|
|
909
936
|
}
|
|
910
937
|
}
|
|
911
938
|
break;
|
|
939
|
+
case "cellAddr":
|
|
940
|
+
if (tableCtx?.cell) {
|
|
941
|
+
const ca = parseInt(el.getAttribute("colAddr") || "", 10);
|
|
942
|
+
const ra = parseInt(el.getAttribute("rowAddr") || "", 10);
|
|
943
|
+
if (!isNaN(ca)) tableCtx.cell.colAddr = ca;
|
|
944
|
+
if (!isNaN(ra)) tableCtx.cell.rowAddr = ra;
|
|
945
|
+
}
|
|
946
|
+
break;
|
|
912
947
|
case "cellSpan":
|
|
913
948
|
if (tableCtx?.cell) {
|
|
914
949
|
const cs = parseInt(el.getAttribute("colSpan") || "1", 10);
|
|
@@ -1035,7 +1070,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1035
1070
|
case "hyperlink": {
|
|
1036
1071
|
const url = child.getAttribute("url") || child.getAttribute("href") || "";
|
|
1037
1072
|
if (url) {
|
|
1038
|
-
const safe =
|
|
1073
|
+
const safe = sanitizeHref(url);
|
|
1039
1074
|
if (safe) href = safe;
|
|
1040
1075
|
}
|
|
1041
1076
|
walk(child);
|
|
@@ -1128,13 +1163,16 @@ var TAG_TABLE = 77;
|
|
|
1128
1163
|
var TAG_DOC_CHAR_SHAPE = 55;
|
|
1129
1164
|
var TAG_DOC_STYLE = 58;
|
|
1130
1165
|
var CHAR_LINE = 0;
|
|
1166
|
+
var CHAR_SECTION_BREAK = 10;
|
|
1131
1167
|
var CHAR_PARA = 13;
|
|
1132
1168
|
var CHAR_TAB = 9;
|
|
1133
1169
|
var CHAR_HYPHEN = 30;
|
|
1134
1170
|
var CHAR_NBSP = 31;
|
|
1135
1171
|
var CHAR_FIXED_NBSP = 24;
|
|
1172
|
+
var CHAR_FIXED_WIDTH = 25;
|
|
1136
1173
|
var FLAG_COMPRESSED = 1 << 0;
|
|
1137
1174
|
var FLAG_ENCRYPTED = 1 << 1;
|
|
1175
|
+
var FLAG_DISTRIBUTION = 1 << 2;
|
|
1138
1176
|
var FLAG_DRM = 1 << 4;
|
|
1139
1177
|
var MAX_RECORDS = 5e5;
|
|
1140
1178
|
function readRecords(data) {
|
|
@@ -1229,27 +1267,41 @@ function extractText(data) {
|
|
|
1229
1267
|
const ch = data.readUInt16LE(i);
|
|
1230
1268
|
i += 2;
|
|
1231
1269
|
switch (ch) {
|
|
1270
|
+
// ── char 타입 (2바이트만, 확장 데이터 없음) ──
|
|
1232
1271
|
case CHAR_LINE:
|
|
1233
1272
|
result += "\n";
|
|
1234
1273
|
break;
|
|
1235
|
-
case
|
|
1236
|
-
|
|
1237
|
-
case CHAR_TAB:
|
|
1238
|
-
result += " ";
|
|
1274
|
+
case CHAR_SECTION_BREAK:
|
|
1275
|
+
result += "\n";
|
|
1239
1276
|
if (i + 14 <= data.length) i += 14;
|
|
1240
1277
|
break;
|
|
1278
|
+
case CHAR_PARA:
|
|
1279
|
+
break;
|
|
1280
|
+
// 문단 끝
|
|
1241
1281
|
case CHAR_HYPHEN:
|
|
1242
1282
|
result += "-";
|
|
1243
1283
|
break;
|
|
1244
1284
|
case CHAR_NBSP:
|
|
1285
|
+
result += " ";
|
|
1286
|
+
break;
|
|
1245
1287
|
case CHAR_FIXED_NBSP:
|
|
1288
|
+
result += "\xA0";
|
|
1289
|
+
break;
|
|
1290
|
+
// 진짜 NBSP
|
|
1291
|
+
case CHAR_FIXED_WIDTH:
|
|
1246
1292
|
result += " ";
|
|
1247
1293
|
break;
|
|
1294
|
+
// 고정폭 공백
|
|
1295
|
+
// ── inline 타입 (2바이트 + 14바이트 확장) ──
|
|
1296
|
+
case CHAR_TAB:
|
|
1297
|
+
result += " ";
|
|
1298
|
+
if (i + 14 <= data.length) i += 14;
|
|
1299
|
+
break;
|
|
1248
1300
|
default:
|
|
1249
1301
|
if (ch >= 1 && ch <= 31) {
|
|
1250
|
-
const
|
|
1302
|
+
const isExtended = ch >= 1 && ch <= 3 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= 18 || ch >= 21 && ch <= 23;
|
|
1251
1303
|
const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
|
|
1252
|
-
if ((
|
|
1304
|
+
if ((isExtended || isInline) && i + 14 <= data.length) i += 14;
|
|
1253
1305
|
} else if (ch >= 32) {
|
|
1254
1306
|
if (ch >= 55296 && ch <= 56319 && i + 1 < data.length) {
|
|
1255
1307
|
const lo = data.readUInt16LE(i);
|
|
@@ -1268,6 +1320,886 @@ function extractText(data) {
|
|
|
1268
1320
|
return result;
|
|
1269
1321
|
}
|
|
1270
1322
|
|
|
1323
|
+
// src/hwp5/aes.ts
|
|
1324
|
+
var S_BOX = new Uint8Array([
|
|
1325
|
+
99,
|
|
1326
|
+
124,
|
|
1327
|
+
119,
|
|
1328
|
+
123,
|
|
1329
|
+
242,
|
|
1330
|
+
107,
|
|
1331
|
+
111,
|
|
1332
|
+
197,
|
|
1333
|
+
48,
|
|
1334
|
+
1,
|
|
1335
|
+
103,
|
|
1336
|
+
43,
|
|
1337
|
+
254,
|
|
1338
|
+
215,
|
|
1339
|
+
171,
|
|
1340
|
+
118,
|
|
1341
|
+
202,
|
|
1342
|
+
130,
|
|
1343
|
+
201,
|
|
1344
|
+
125,
|
|
1345
|
+
250,
|
|
1346
|
+
89,
|
|
1347
|
+
71,
|
|
1348
|
+
240,
|
|
1349
|
+
173,
|
|
1350
|
+
212,
|
|
1351
|
+
162,
|
|
1352
|
+
175,
|
|
1353
|
+
156,
|
|
1354
|
+
164,
|
|
1355
|
+
114,
|
|
1356
|
+
192,
|
|
1357
|
+
183,
|
|
1358
|
+
253,
|
|
1359
|
+
147,
|
|
1360
|
+
38,
|
|
1361
|
+
54,
|
|
1362
|
+
63,
|
|
1363
|
+
247,
|
|
1364
|
+
204,
|
|
1365
|
+
52,
|
|
1366
|
+
165,
|
|
1367
|
+
229,
|
|
1368
|
+
241,
|
|
1369
|
+
113,
|
|
1370
|
+
216,
|
|
1371
|
+
49,
|
|
1372
|
+
21,
|
|
1373
|
+
4,
|
|
1374
|
+
199,
|
|
1375
|
+
35,
|
|
1376
|
+
195,
|
|
1377
|
+
24,
|
|
1378
|
+
150,
|
|
1379
|
+
5,
|
|
1380
|
+
154,
|
|
1381
|
+
7,
|
|
1382
|
+
18,
|
|
1383
|
+
128,
|
|
1384
|
+
226,
|
|
1385
|
+
235,
|
|
1386
|
+
39,
|
|
1387
|
+
178,
|
|
1388
|
+
117,
|
|
1389
|
+
9,
|
|
1390
|
+
131,
|
|
1391
|
+
44,
|
|
1392
|
+
26,
|
|
1393
|
+
27,
|
|
1394
|
+
110,
|
|
1395
|
+
90,
|
|
1396
|
+
160,
|
|
1397
|
+
82,
|
|
1398
|
+
59,
|
|
1399
|
+
214,
|
|
1400
|
+
179,
|
|
1401
|
+
41,
|
|
1402
|
+
227,
|
|
1403
|
+
47,
|
|
1404
|
+
132,
|
|
1405
|
+
83,
|
|
1406
|
+
209,
|
|
1407
|
+
0,
|
|
1408
|
+
237,
|
|
1409
|
+
32,
|
|
1410
|
+
252,
|
|
1411
|
+
177,
|
|
1412
|
+
91,
|
|
1413
|
+
106,
|
|
1414
|
+
203,
|
|
1415
|
+
190,
|
|
1416
|
+
57,
|
|
1417
|
+
74,
|
|
1418
|
+
76,
|
|
1419
|
+
88,
|
|
1420
|
+
207,
|
|
1421
|
+
208,
|
|
1422
|
+
239,
|
|
1423
|
+
170,
|
|
1424
|
+
251,
|
|
1425
|
+
67,
|
|
1426
|
+
77,
|
|
1427
|
+
51,
|
|
1428
|
+
133,
|
|
1429
|
+
69,
|
|
1430
|
+
249,
|
|
1431
|
+
2,
|
|
1432
|
+
127,
|
|
1433
|
+
80,
|
|
1434
|
+
60,
|
|
1435
|
+
159,
|
|
1436
|
+
168,
|
|
1437
|
+
81,
|
|
1438
|
+
163,
|
|
1439
|
+
64,
|
|
1440
|
+
143,
|
|
1441
|
+
146,
|
|
1442
|
+
157,
|
|
1443
|
+
56,
|
|
1444
|
+
245,
|
|
1445
|
+
188,
|
|
1446
|
+
182,
|
|
1447
|
+
218,
|
|
1448
|
+
33,
|
|
1449
|
+
16,
|
|
1450
|
+
255,
|
|
1451
|
+
243,
|
|
1452
|
+
210,
|
|
1453
|
+
205,
|
|
1454
|
+
12,
|
|
1455
|
+
19,
|
|
1456
|
+
236,
|
|
1457
|
+
95,
|
|
1458
|
+
151,
|
|
1459
|
+
68,
|
|
1460
|
+
23,
|
|
1461
|
+
196,
|
|
1462
|
+
167,
|
|
1463
|
+
126,
|
|
1464
|
+
61,
|
|
1465
|
+
100,
|
|
1466
|
+
93,
|
|
1467
|
+
25,
|
|
1468
|
+
115,
|
|
1469
|
+
96,
|
|
1470
|
+
129,
|
|
1471
|
+
79,
|
|
1472
|
+
220,
|
|
1473
|
+
34,
|
|
1474
|
+
42,
|
|
1475
|
+
144,
|
|
1476
|
+
136,
|
|
1477
|
+
70,
|
|
1478
|
+
238,
|
|
1479
|
+
184,
|
|
1480
|
+
20,
|
|
1481
|
+
222,
|
|
1482
|
+
94,
|
|
1483
|
+
11,
|
|
1484
|
+
219,
|
|
1485
|
+
224,
|
|
1486
|
+
50,
|
|
1487
|
+
58,
|
|
1488
|
+
10,
|
|
1489
|
+
73,
|
|
1490
|
+
6,
|
|
1491
|
+
36,
|
|
1492
|
+
92,
|
|
1493
|
+
194,
|
|
1494
|
+
211,
|
|
1495
|
+
172,
|
|
1496
|
+
98,
|
|
1497
|
+
145,
|
|
1498
|
+
149,
|
|
1499
|
+
228,
|
|
1500
|
+
121,
|
|
1501
|
+
231,
|
|
1502
|
+
200,
|
|
1503
|
+
55,
|
|
1504
|
+
109,
|
|
1505
|
+
141,
|
|
1506
|
+
213,
|
|
1507
|
+
78,
|
|
1508
|
+
169,
|
|
1509
|
+
108,
|
|
1510
|
+
86,
|
|
1511
|
+
244,
|
|
1512
|
+
234,
|
|
1513
|
+
101,
|
|
1514
|
+
122,
|
|
1515
|
+
174,
|
|
1516
|
+
8,
|
|
1517
|
+
186,
|
|
1518
|
+
120,
|
|
1519
|
+
37,
|
|
1520
|
+
46,
|
|
1521
|
+
28,
|
|
1522
|
+
166,
|
|
1523
|
+
180,
|
|
1524
|
+
198,
|
|
1525
|
+
232,
|
|
1526
|
+
221,
|
|
1527
|
+
116,
|
|
1528
|
+
31,
|
|
1529
|
+
75,
|
|
1530
|
+
189,
|
|
1531
|
+
139,
|
|
1532
|
+
138,
|
|
1533
|
+
112,
|
|
1534
|
+
62,
|
|
1535
|
+
181,
|
|
1536
|
+
102,
|
|
1537
|
+
72,
|
|
1538
|
+
3,
|
|
1539
|
+
246,
|
|
1540
|
+
14,
|
|
1541
|
+
97,
|
|
1542
|
+
53,
|
|
1543
|
+
87,
|
|
1544
|
+
185,
|
|
1545
|
+
134,
|
|
1546
|
+
193,
|
|
1547
|
+
29,
|
|
1548
|
+
158,
|
|
1549
|
+
225,
|
|
1550
|
+
248,
|
|
1551
|
+
152,
|
|
1552
|
+
17,
|
|
1553
|
+
105,
|
|
1554
|
+
217,
|
|
1555
|
+
142,
|
|
1556
|
+
148,
|
|
1557
|
+
155,
|
|
1558
|
+
30,
|
|
1559
|
+
135,
|
|
1560
|
+
233,
|
|
1561
|
+
206,
|
|
1562
|
+
85,
|
|
1563
|
+
40,
|
|
1564
|
+
223,
|
|
1565
|
+
140,
|
|
1566
|
+
161,
|
|
1567
|
+
137,
|
|
1568
|
+
13,
|
|
1569
|
+
191,
|
|
1570
|
+
230,
|
|
1571
|
+
66,
|
|
1572
|
+
104,
|
|
1573
|
+
65,
|
|
1574
|
+
153,
|
|
1575
|
+
45,
|
|
1576
|
+
15,
|
|
1577
|
+
176,
|
|
1578
|
+
84,
|
|
1579
|
+
187,
|
|
1580
|
+
22
|
|
1581
|
+
]);
|
|
1582
|
+
var INV_S_BOX = new Uint8Array([
|
|
1583
|
+
82,
|
|
1584
|
+
9,
|
|
1585
|
+
106,
|
|
1586
|
+
213,
|
|
1587
|
+
48,
|
|
1588
|
+
54,
|
|
1589
|
+
165,
|
|
1590
|
+
56,
|
|
1591
|
+
191,
|
|
1592
|
+
64,
|
|
1593
|
+
163,
|
|
1594
|
+
158,
|
|
1595
|
+
129,
|
|
1596
|
+
243,
|
|
1597
|
+
215,
|
|
1598
|
+
251,
|
|
1599
|
+
124,
|
|
1600
|
+
227,
|
|
1601
|
+
57,
|
|
1602
|
+
130,
|
|
1603
|
+
155,
|
|
1604
|
+
47,
|
|
1605
|
+
255,
|
|
1606
|
+
135,
|
|
1607
|
+
52,
|
|
1608
|
+
142,
|
|
1609
|
+
67,
|
|
1610
|
+
68,
|
|
1611
|
+
196,
|
|
1612
|
+
222,
|
|
1613
|
+
233,
|
|
1614
|
+
203,
|
|
1615
|
+
84,
|
|
1616
|
+
123,
|
|
1617
|
+
148,
|
|
1618
|
+
50,
|
|
1619
|
+
166,
|
|
1620
|
+
194,
|
|
1621
|
+
35,
|
|
1622
|
+
61,
|
|
1623
|
+
238,
|
|
1624
|
+
76,
|
|
1625
|
+
149,
|
|
1626
|
+
11,
|
|
1627
|
+
66,
|
|
1628
|
+
250,
|
|
1629
|
+
195,
|
|
1630
|
+
78,
|
|
1631
|
+
8,
|
|
1632
|
+
46,
|
|
1633
|
+
161,
|
|
1634
|
+
102,
|
|
1635
|
+
40,
|
|
1636
|
+
217,
|
|
1637
|
+
36,
|
|
1638
|
+
178,
|
|
1639
|
+
118,
|
|
1640
|
+
91,
|
|
1641
|
+
162,
|
|
1642
|
+
73,
|
|
1643
|
+
109,
|
|
1644
|
+
139,
|
|
1645
|
+
209,
|
|
1646
|
+
37,
|
|
1647
|
+
114,
|
|
1648
|
+
248,
|
|
1649
|
+
246,
|
|
1650
|
+
100,
|
|
1651
|
+
134,
|
|
1652
|
+
104,
|
|
1653
|
+
152,
|
|
1654
|
+
22,
|
|
1655
|
+
212,
|
|
1656
|
+
164,
|
|
1657
|
+
92,
|
|
1658
|
+
204,
|
|
1659
|
+
93,
|
|
1660
|
+
101,
|
|
1661
|
+
182,
|
|
1662
|
+
146,
|
|
1663
|
+
108,
|
|
1664
|
+
112,
|
|
1665
|
+
72,
|
|
1666
|
+
80,
|
|
1667
|
+
253,
|
|
1668
|
+
237,
|
|
1669
|
+
185,
|
|
1670
|
+
218,
|
|
1671
|
+
94,
|
|
1672
|
+
21,
|
|
1673
|
+
70,
|
|
1674
|
+
87,
|
|
1675
|
+
167,
|
|
1676
|
+
141,
|
|
1677
|
+
157,
|
|
1678
|
+
132,
|
|
1679
|
+
144,
|
|
1680
|
+
216,
|
|
1681
|
+
171,
|
|
1682
|
+
0,
|
|
1683
|
+
140,
|
|
1684
|
+
188,
|
|
1685
|
+
211,
|
|
1686
|
+
10,
|
|
1687
|
+
247,
|
|
1688
|
+
228,
|
|
1689
|
+
88,
|
|
1690
|
+
5,
|
|
1691
|
+
184,
|
|
1692
|
+
179,
|
|
1693
|
+
69,
|
|
1694
|
+
6,
|
|
1695
|
+
208,
|
|
1696
|
+
44,
|
|
1697
|
+
30,
|
|
1698
|
+
143,
|
|
1699
|
+
202,
|
|
1700
|
+
63,
|
|
1701
|
+
15,
|
|
1702
|
+
2,
|
|
1703
|
+
193,
|
|
1704
|
+
175,
|
|
1705
|
+
189,
|
|
1706
|
+
3,
|
|
1707
|
+
1,
|
|
1708
|
+
19,
|
|
1709
|
+
138,
|
|
1710
|
+
107,
|
|
1711
|
+
58,
|
|
1712
|
+
145,
|
|
1713
|
+
17,
|
|
1714
|
+
65,
|
|
1715
|
+
79,
|
|
1716
|
+
103,
|
|
1717
|
+
220,
|
|
1718
|
+
234,
|
|
1719
|
+
151,
|
|
1720
|
+
242,
|
|
1721
|
+
207,
|
|
1722
|
+
206,
|
|
1723
|
+
240,
|
|
1724
|
+
180,
|
|
1725
|
+
230,
|
|
1726
|
+
115,
|
|
1727
|
+
150,
|
|
1728
|
+
172,
|
|
1729
|
+
116,
|
|
1730
|
+
34,
|
|
1731
|
+
231,
|
|
1732
|
+
173,
|
|
1733
|
+
53,
|
|
1734
|
+
133,
|
|
1735
|
+
226,
|
|
1736
|
+
249,
|
|
1737
|
+
55,
|
|
1738
|
+
232,
|
|
1739
|
+
28,
|
|
1740
|
+
117,
|
|
1741
|
+
223,
|
|
1742
|
+
110,
|
|
1743
|
+
71,
|
|
1744
|
+
241,
|
|
1745
|
+
26,
|
|
1746
|
+
113,
|
|
1747
|
+
29,
|
|
1748
|
+
41,
|
|
1749
|
+
197,
|
|
1750
|
+
137,
|
|
1751
|
+
111,
|
|
1752
|
+
183,
|
|
1753
|
+
98,
|
|
1754
|
+
14,
|
|
1755
|
+
170,
|
|
1756
|
+
24,
|
|
1757
|
+
190,
|
|
1758
|
+
27,
|
|
1759
|
+
252,
|
|
1760
|
+
86,
|
|
1761
|
+
62,
|
|
1762
|
+
75,
|
|
1763
|
+
198,
|
|
1764
|
+
210,
|
|
1765
|
+
121,
|
|
1766
|
+
32,
|
|
1767
|
+
154,
|
|
1768
|
+
219,
|
|
1769
|
+
192,
|
|
1770
|
+
254,
|
|
1771
|
+
120,
|
|
1772
|
+
205,
|
|
1773
|
+
90,
|
|
1774
|
+
244,
|
|
1775
|
+
31,
|
|
1776
|
+
221,
|
|
1777
|
+
168,
|
|
1778
|
+
51,
|
|
1779
|
+
136,
|
|
1780
|
+
7,
|
|
1781
|
+
199,
|
|
1782
|
+
49,
|
|
1783
|
+
177,
|
|
1784
|
+
18,
|
|
1785
|
+
16,
|
|
1786
|
+
89,
|
|
1787
|
+
39,
|
|
1788
|
+
128,
|
|
1789
|
+
236,
|
|
1790
|
+
95,
|
|
1791
|
+
96,
|
|
1792
|
+
81,
|
|
1793
|
+
127,
|
|
1794
|
+
169,
|
|
1795
|
+
25,
|
|
1796
|
+
181,
|
|
1797
|
+
74,
|
|
1798
|
+
13,
|
|
1799
|
+
45,
|
|
1800
|
+
229,
|
|
1801
|
+
122,
|
|
1802
|
+
159,
|
|
1803
|
+
147,
|
|
1804
|
+
201,
|
|
1805
|
+
156,
|
|
1806
|
+
239,
|
|
1807
|
+
160,
|
|
1808
|
+
224,
|
|
1809
|
+
59,
|
|
1810
|
+
77,
|
|
1811
|
+
174,
|
|
1812
|
+
42,
|
|
1813
|
+
245,
|
|
1814
|
+
176,
|
|
1815
|
+
200,
|
|
1816
|
+
235,
|
|
1817
|
+
187,
|
|
1818
|
+
60,
|
|
1819
|
+
131,
|
|
1820
|
+
83,
|
|
1821
|
+
153,
|
|
1822
|
+
97,
|
|
1823
|
+
23,
|
|
1824
|
+
43,
|
|
1825
|
+
4,
|
|
1826
|
+
126,
|
|
1827
|
+
186,
|
|
1828
|
+
119,
|
|
1829
|
+
214,
|
|
1830
|
+
38,
|
|
1831
|
+
225,
|
|
1832
|
+
105,
|
|
1833
|
+
20,
|
|
1834
|
+
99,
|
|
1835
|
+
85,
|
|
1836
|
+
33,
|
|
1837
|
+
12,
|
|
1838
|
+
125
|
|
1839
|
+
]);
|
|
1840
|
+
var RCON = new Uint8Array([1, 2, 4, 8, 16, 32, 64, 128, 27, 54]);
|
|
1841
|
+
function gmul(a, b) {
|
|
1842
|
+
let p = 0;
|
|
1843
|
+
for (let i = 0; i < 8; i++) {
|
|
1844
|
+
if (b & 1) p ^= a;
|
|
1845
|
+
const hi = a & 128;
|
|
1846
|
+
a = a << 1 & 255;
|
|
1847
|
+
if (hi) a ^= 27;
|
|
1848
|
+
b >>= 1;
|
|
1849
|
+
}
|
|
1850
|
+
return p;
|
|
1851
|
+
}
|
|
1852
|
+
function expandKey(key) {
|
|
1853
|
+
const w = new Uint32Array(44);
|
|
1854
|
+
for (let i = 0; i < 4; i++) {
|
|
1855
|
+
w[i] = key[4 * i] << 24 | key[4 * i + 1] << 16 | key[4 * i + 2] << 8 | key[4 * i + 3];
|
|
1856
|
+
}
|
|
1857
|
+
for (let i = 4; i < 44; i++) {
|
|
1858
|
+
let temp = w[i - 1];
|
|
1859
|
+
if (i % 4 === 0) {
|
|
1860
|
+
temp = (temp << 8 | temp >>> 24) >>> 0;
|
|
1861
|
+
temp = S_BOX[temp >>> 24 & 255] << 24 | S_BOX[temp >>> 16 & 255] << 16 | S_BOX[temp >>> 8 & 255] << 8 | S_BOX[temp & 255];
|
|
1862
|
+
temp = (temp ^ RCON[i / 4 - 1] << 24) >>> 0;
|
|
1863
|
+
}
|
|
1864
|
+
w[i] = (w[i - 4] ^ temp) >>> 0;
|
|
1865
|
+
}
|
|
1866
|
+
return w;
|
|
1867
|
+
}
|
|
1868
|
+
function decryptBlock(block, roundKeys) {
|
|
1869
|
+
const s = new Uint8Array(16);
|
|
1870
|
+
for (let i = 0; i < 16; i++) s[i] = block[i];
|
|
1871
|
+
addRoundKey(s, roundKeys, 10);
|
|
1872
|
+
for (let round = 9; round >= 1; round--) {
|
|
1873
|
+
invShiftRows(s);
|
|
1874
|
+
invSubBytes(s);
|
|
1875
|
+
addRoundKey(s, roundKeys, round);
|
|
1876
|
+
invMixColumns(s);
|
|
1877
|
+
}
|
|
1878
|
+
invShiftRows(s);
|
|
1879
|
+
invSubBytes(s);
|
|
1880
|
+
addRoundKey(s, roundKeys, 0);
|
|
1881
|
+
return s;
|
|
1882
|
+
}
|
|
1883
|
+
function addRoundKey(s, w, round) {
|
|
1884
|
+
const base = round * 4;
|
|
1885
|
+
for (let c = 0; c < 4; c++) {
|
|
1886
|
+
const k = w[base + c];
|
|
1887
|
+
s[c * 4] ^= k >>> 24 & 255;
|
|
1888
|
+
s[c * 4 + 1] ^= k >>> 16 & 255;
|
|
1889
|
+
s[c * 4 + 2] ^= k >>> 8 & 255;
|
|
1890
|
+
s[c * 4 + 3] ^= k & 255;
|
|
1891
|
+
}
|
|
1892
|
+
}
|
|
1893
|
+
function invSubBytes(s) {
|
|
1894
|
+
for (let i = 0; i < 16; i++) s[i] = INV_S_BOX[s[i]];
|
|
1895
|
+
}
|
|
1896
|
+
function invShiftRows(s) {
|
|
1897
|
+
let t = s[13];
|
|
1898
|
+
s[13] = s[9];
|
|
1899
|
+
s[9] = s[5];
|
|
1900
|
+
s[5] = s[1];
|
|
1901
|
+
s[1] = t;
|
|
1902
|
+
t = s[2];
|
|
1903
|
+
s[2] = s[10];
|
|
1904
|
+
s[10] = t;
|
|
1905
|
+
t = s[6];
|
|
1906
|
+
s[6] = s[14];
|
|
1907
|
+
s[14] = t;
|
|
1908
|
+
t = s[3];
|
|
1909
|
+
s[3] = s[7];
|
|
1910
|
+
s[7] = s[11];
|
|
1911
|
+
s[11] = s[15];
|
|
1912
|
+
s[15] = t;
|
|
1913
|
+
}
|
|
1914
|
+
function invMixColumns(s) {
|
|
1915
|
+
for (let c = 0; c < 4; c++) {
|
|
1916
|
+
const i = c * 4;
|
|
1917
|
+
const a0 = s[i], a1 = s[i + 1], a2 = s[i + 2], a3 = s[i + 3];
|
|
1918
|
+
s[i] = gmul(a0, 14) ^ gmul(a1, 11) ^ gmul(a2, 13) ^ gmul(a3, 9);
|
|
1919
|
+
s[i + 1] = gmul(a0, 9) ^ gmul(a1, 14) ^ gmul(a2, 11) ^ gmul(a3, 13);
|
|
1920
|
+
s[i + 2] = gmul(a0, 13) ^ gmul(a1, 9) ^ gmul(a2, 14) ^ gmul(a3, 11);
|
|
1921
|
+
s[i + 3] = gmul(a0, 11) ^ gmul(a1, 13) ^ gmul(a2, 9) ^ gmul(a3, 14);
|
|
1922
|
+
}
|
|
1923
|
+
}
|
|
1924
|
+
function aes128EcbDecrypt(data, key) {
|
|
1925
|
+
if (key.length !== 16) throw new Error("AES-128 \uD0A4\uB294 16\uBC14\uC774\uD2B8\uC5EC\uC57C \uD569\uB2C8\uB2E4");
|
|
1926
|
+
if (data.length % 16 !== 0) throw new Error("AES ECB \uC785\uB825\uC740 16\uBC14\uC774\uD2B8\uC758 \uBC30\uC218\uC5EC\uC57C \uD569\uB2C8\uB2E4");
|
|
1927
|
+
const roundKeys = expandKey(key);
|
|
1928
|
+
const out = new Uint8Array(data.length);
|
|
1929
|
+
for (let offset = 0; offset < data.length; offset += 16) {
|
|
1930
|
+
const block = data.subarray(offset, offset + 16);
|
|
1931
|
+
const decrypted = decryptBlock(block, roundKeys);
|
|
1932
|
+
out.set(decrypted, offset);
|
|
1933
|
+
}
|
|
1934
|
+
return out;
|
|
1935
|
+
}
|
|
1936
|
+
|
|
1937
|
+
// src/hwp5/crypto.ts
|
|
1938
|
+
var MsvcLcg = class {
|
|
1939
|
+
seed;
|
|
1940
|
+
constructor(seed) {
|
|
1941
|
+
this.seed = seed >>> 0;
|
|
1942
|
+
}
|
|
1943
|
+
/** 0 ~ 0x7FFF 범위 난수 반환 (MSVC rand() 호환) */
|
|
1944
|
+
rand() {
|
|
1945
|
+
this.seed = Math.imul(this.seed, 214013) + 2531011 >>> 0;
|
|
1946
|
+
return this.seed >>> 16 & 32767;
|
|
1947
|
+
}
|
|
1948
|
+
};
|
|
1949
|
+
function decryptDistributePayload(payload) {
|
|
1950
|
+
if (payload.length < 256) throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 256\uBC14\uC774\uD2B8 \uBBF8\uB9CC\uC785\uB2C8\uB2E4");
|
|
1951
|
+
const seed = (payload[0] | payload[1] << 8 | payload[2] << 16 | payload[3] << 24) >>> 0;
|
|
1952
|
+
const lcg = new MsvcLcg(seed);
|
|
1953
|
+
const result = new Uint8Array(256);
|
|
1954
|
+
result[0] = payload[0];
|
|
1955
|
+
result[1] = payload[1];
|
|
1956
|
+
result[2] = payload[2];
|
|
1957
|
+
result[3] = payload[3];
|
|
1958
|
+
let i = 4;
|
|
1959
|
+
while (i < 256) {
|
|
1960
|
+
const keyByte = lcg.rand() & 255;
|
|
1961
|
+
const n = (lcg.rand() & 15) + 1;
|
|
1962
|
+
for (let j = 0; j < n && i < 256; j++, i++) {
|
|
1963
|
+
result[i] = payload[i] ^ keyByte;
|
|
1964
|
+
}
|
|
1965
|
+
}
|
|
1966
|
+
return result;
|
|
1967
|
+
}
|
|
1968
|
+
function extractAesKey(decryptedPayload) {
|
|
1969
|
+
const offset = 4 + (decryptedPayload[0] & 15);
|
|
1970
|
+
if (offset + 16 > decryptedPayload.length) {
|
|
1971
|
+
throw new Error("AES \uD0A4 \uCD94\uCD9C \uC2E4\uD328: \uC624\uD504\uC14B\uC774 payload \uBC94\uC704\uB97C \uCD08\uACFC\uD569\uB2C8\uB2E4");
|
|
1972
|
+
}
|
|
1973
|
+
return decryptedPayload.slice(offset, offset + 16);
|
|
1974
|
+
}
|
|
1975
|
+
function parseRecordHeader(data, offset) {
|
|
1976
|
+
if (offset + 4 > data.length) throw new Error("\uB808\uCF54\uB4DC \uD5E4\uB354 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
|
|
1977
|
+
const header = (data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 | data[offset + 3] << 24) >>> 0;
|
|
1978
|
+
const tagId = header & 1023;
|
|
1979
|
+
let size = header >>> 20 & 4095;
|
|
1980
|
+
let headerSize = 4;
|
|
1981
|
+
if (size === 4095) {
|
|
1982
|
+
if (offset + 8 > data.length) throw new Error("\uD655\uC7A5 \uB808\uCF54\uB4DC \uD06C\uAE30 \uD30C\uC2F1 \uC2E4\uD328: \uB370\uC774\uD130 \uBD80\uC871");
|
|
1983
|
+
size = (data[offset + 4] | data[offset + 5] << 8 | data[offset + 6] << 16 | data[offset + 7] << 24) >>> 0;
|
|
1984
|
+
headerSize = 8;
|
|
1985
|
+
}
|
|
1986
|
+
return { tagId, size, headerSize };
|
|
1987
|
+
}
|
|
1988
|
+
var TAG_DISTRIBUTE_DOC_DATA = 16 + 28;
|
|
1989
|
+
function decryptViewText(viewTextRaw, compressed) {
|
|
1990
|
+
const data = new Uint8Array(viewTextRaw);
|
|
1991
|
+
const rec = parseRecordHeader(data, 0);
|
|
1992
|
+
if (rec.tagId !== TAG_DISTRIBUTE_DOC_DATA) {
|
|
1993
|
+
throw new Error(`\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC758 \uCCAB \uB808\uCF54\uB4DC\uAC00 DISTRIBUTE_DOC_DATA(${TAG_DISTRIBUTE_DOC_DATA})\uAC00 \uC544\uB2D9\uB2C8\uB2E4 (\uC2E4\uC81C: ${rec.tagId})`);
|
|
1994
|
+
}
|
|
1995
|
+
const payloadStart = rec.headerSize;
|
|
1996
|
+
const payloadEnd = payloadStart + rec.size;
|
|
1997
|
+
if (payloadEnd > data.length || rec.size < 256) {
|
|
1998
|
+
throw new Error("\uBC30\uD3EC\uC6A9 payload\uAC00 \uC720\uD6A8\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
1999
|
+
}
|
|
2000
|
+
const payload = data.subarray(payloadStart, payloadStart + 256);
|
|
2001
|
+
const decryptedPayload = decryptDistributePayload(payload);
|
|
2002
|
+
const aesKey = extractAesKey(decryptedPayload);
|
|
2003
|
+
const encryptedStart = payloadEnd;
|
|
2004
|
+
const encryptedData = data.subarray(encryptedStart);
|
|
2005
|
+
if (encryptedData.length === 0) {
|
|
2006
|
+
throw new Error("\uBC30\uD3EC\uC6A9 \uBB38\uC11C\uC5D0 \uC554\uD638\uD654\uB41C \uBCF8\uBB38 \uB370\uC774\uD130\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2007
|
+
}
|
|
2008
|
+
const alignedLen = encryptedData.length - encryptedData.length % 16;
|
|
2009
|
+
if (alignedLen === 0) {
|
|
2010
|
+
throw new Error("\uC554\uD638\uD654\uB41C \uB370\uC774\uD130\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (16\uBC14\uC774\uD2B8 \uBBF8\uB9CC)");
|
|
2011
|
+
}
|
|
2012
|
+
const alignedData = encryptedData.subarray(0, alignedLen);
|
|
2013
|
+
const decrypted = aes128EcbDecrypt(alignedData, aesKey);
|
|
2014
|
+
if (compressed) {
|
|
2015
|
+
try {
|
|
2016
|
+
return decompressStream(Buffer.from(decrypted));
|
|
2017
|
+
} catch {
|
|
2018
|
+
return Buffer.from(decrypted);
|
|
2019
|
+
}
|
|
2020
|
+
}
|
|
2021
|
+
return Buffer.from(decrypted);
|
|
2022
|
+
}
|
|
2023
|
+
|
|
2024
|
+
// src/hwp5/cfb-lenient.ts
|
|
2025
|
+
var CFB_MAGIC = Buffer.from([208, 207, 17, 224, 161, 177, 26, 225]);
|
|
2026
|
+
var END_OF_CHAIN = 4294967294;
|
|
2027
|
+
var FREE_SECT = 4294967295;
|
|
2028
|
+
var MAX_CHAIN_LENGTH = 1e6;
|
|
2029
|
+
var MAX_DIR_ENTRIES = 1e5;
|
|
2030
|
+
var MAX_STREAM_SIZE = 100 * 1024 * 1024;
|
|
2031
|
+
function parseLenientCfb(data) {
|
|
2032
|
+
if (data.length < 512) throw new Error("CFB \uD30C\uC77C\uC774 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 512\uBC14\uC774\uD2B8)");
|
|
2033
|
+
if (!data.subarray(0, 8).equals(CFB_MAGIC)) throw new Error("CFB \uB9E4\uC9C1 \uBC14\uC774\uD2B8 \uBD88\uC77C\uCE58");
|
|
2034
|
+
const sectorSizeShift = data.readUInt16LE(30);
|
|
2035
|
+
if (sectorSizeShift < 7 || sectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + sectorSizeShift);
|
|
2036
|
+
const sectorSize = 1 << sectorSizeShift;
|
|
2037
|
+
const miniSectorSizeShift = data.readUInt16LE(32);
|
|
2038
|
+
if (miniSectorSizeShift > 16) throw new Error("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uBBF8\uB2C8 \uC139\uD130 \uD06C\uAE30 \uC2DC\uD504\uD2B8: " + miniSectorSizeShift);
|
|
2039
|
+
const miniSectorSize = 1 << miniSectorSizeShift;
|
|
2040
|
+
const fatSectorCount = data.readUInt32LE(44);
|
|
2041
|
+
const firstDirSector = data.readUInt32LE(48);
|
|
2042
|
+
const miniStreamCutoff = data.readUInt32LE(56);
|
|
2043
|
+
const firstMiniFatSector = data.readUInt32LE(60);
|
|
2044
|
+
const miniFatSectorCount = data.readUInt32LE(64);
|
|
2045
|
+
const firstDifatSector = data.readUInt32LE(68);
|
|
2046
|
+
const difatSectorCount = data.readUInt32LE(72);
|
|
2047
|
+
function sectorOffset(id) {
|
|
2048
|
+
return 512 + id * sectorSize;
|
|
2049
|
+
}
|
|
2050
|
+
function readSectorData(id) {
|
|
2051
|
+
const off = sectorOffset(id);
|
|
2052
|
+
if (off + sectorSize > data.length) return Buffer.alloc(0);
|
|
2053
|
+
return data.subarray(off, off + sectorSize);
|
|
2054
|
+
}
|
|
2055
|
+
const fatSectors = [];
|
|
2056
|
+
for (let i = 0; i < 109 && fatSectors.length < fatSectorCount; i++) {
|
|
2057
|
+
const sid = data.readUInt32LE(76 + i * 4);
|
|
2058
|
+
if (sid === FREE_SECT || sid === END_OF_CHAIN) break;
|
|
2059
|
+
fatSectors.push(sid);
|
|
2060
|
+
}
|
|
2061
|
+
let difatSector = firstDifatSector;
|
|
2062
|
+
const visitedDifat = /* @__PURE__ */ new Set();
|
|
2063
|
+
for (let d = 0; d < difatSectorCount && difatSector !== END_OF_CHAIN && difatSector !== FREE_SECT; d++) {
|
|
2064
|
+
if (visitedDifat.has(difatSector)) break;
|
|
2065
|
+
visitedDifat.add(difatSector);
|
|
2066
|
+
const buf = readSectorData(difatSector);
|
|
2067
|
+
const entriesPerSector = sectorSize / 4 - 1;
|
|
2068
|
+
for (let i = 0; i < entriesPerSector && fatSectors.length < fatSectorCount; i++) {
|
|
2069
|
+
const sid = buf.readUInt32LE(i * 4);
|
|
2070
|
+
if (sid === FREE_SECT || sid === END_OF_CHAIN) continue;
|
|
2071
|
+
fatSectors.push(sid);
|
|
2072
|
+
}
|
|
2073
|
+
difatSector = buf.readUInt32LE(entriesPerSector * 4);
|
|
2074
|
+
}
|
|
2075
|
+
const entriesPerFatSector = sectorSize / 4;
|
|
2076
|
+
const fatTable = new Uint32Array(fatSectors.length * entriesPerFatSector);
|
|
2077
|
+
for (let fi = 0; fi < fatSectors.length; fi++) {
|
|
2078
|
+
const buf = readSectorData(fatSectors[fi]);
|
|
2079
|
+
for (let i = 0; i < entriesPerFatSector; i++) {
|
|
2080
|
+
fatTable[fi * entriesPerFatSector + i] = i * 4 + 3 < buf.length ? buf.readUInt32LE(i * 4) : FREE_SECT;
|
|
2081
|
+
}
|
|
2082
|
+
}
|
|
2083
|
+
function readChain(startSector, maxBytes) {
|
|
2084
|
+
if (startSector === END_OF_CHAIN || startSector === FREE_SECT) return Buffer.alloc(0);
|
|
2085
|
+
if (maxBytes > MAX_STREAM_SIZE) throw new Error("\uC2A4\uD2B8\uB9BC\uC774 \uB108\uBB34 \uD07D\uB2C8\uB2E4");
|
|
2086
|
+
const chunks = [];
|
|
2087
|
+
let current = startSector;
|
|
2088
|
+
let totalRead = 0;
|
|
2089
|
+
const visited = /* @__PURE__ */ new Set();
|
|
2090
|
+
while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < maxBytes) {
|
|
2091
|
+
if (visited.has(current)) break;
|
|
2092
|
+
if (visited.size > MAX_CHAIN_LENGTH) break;
|
|
2093
|
+
visited.add(current);
|
|
2094
|
+
const buf = readSectorData(current);
|
|
2095
|
+
const remaining = maxBytes - totalRead;
|
|
2096
|
+
chunks.push(remaining < sectorSize ? buf.subarray(0, remaining) : buf);
|
|
2097
|
+
totalRead += Math.min(buf.length, remaining);
|
|
2098
|
+
current = current < fatTable.length ? fatTable[current] : END_OF_CHAIN;
|
|
2099
|
+
}
|
|
2100
|
+
return Buffer.concat(chunks);
|
|
2101
|
+
}
|
|
2102
|
+
let miniFatTable = null;
|
|
2103
|
+
function getMiniFatTable() {
|
|
2104
|
+
if (miniFatTable) return miniFatTable;
|
|
2105
|
+
if (miniFatSectorCount === 0 || firstMiniFatSector === END_OF_CHAIN) {
|
|
2106
|
+
miniFatTable = new Uint32Array(0);
|
|
2107
|
+
return miniFatTable;
|
|
2108
|
+
}
|
|
2109
|
+
const miniFatData = readChain(firstMiniFatSector, miniFatSectorCount * sectorSize);
|
|
2110
|
+
const entries = miniFatData.length / 4;
|
|
2111
|
+
miniFatTable = new Uint32Array(entries);
|
|
2112
|
+
for (let i = 0; i < entries; i++) {
|
|
2113
|
+
miniFatTable[i] = miniFatData.readUInt32LE(i * 4);
|
|
2114
|
+
}
|
|
2115
|
+
return miniFatTable;
|
|
2116
|
+
}
|
|
2117
|
+
const dirData = readChain(firstDirSector, MAX_DIR_ENTRIES * 128);
|
|
2118
|
+
const dirEntries = [];
|
|
2119
|
+
for (let offset = 0; offset + 128 <= dirData.length && dirEntries.length < MAX_DIR_ENTRIES; offset += 128) {
|
|
2120
|
+
const nameLen = dirData.readUInt16LE(offset + 64);
|
|
2121
|
+
if (nameLen <= 0 || nameLen > 64) {
|
|
2122
|
+
dirEntries.push({ name: "", type: 0, startSector: 0, size: 0 });
|
|
2123
|
+
continue;
|
|
2124
|
+
}
|
|
2125
|
+
const nameBytes = nameLen - 2;
|
|
2126
|
+
const name = nameBytes > 0 ? dirData.subarray(offset, offset + nameBytes).toString("utf16le") : "";
|
|
2127
|
+
const type = dirData[offset + 66];
|
|
2128
|
+
const startSector = dirData.readUInt32LE(offset + 116);
|
|
2129
|
+
const size = dirData.readUInt32LE(offset + 120);
|
|
2130
|
+
dirEntries.push({ name, type, startSector, size });
|
|
2131
|
+
}
|
|
2132
|
+
let miniStreamData = null;
|
|
2133
|
+
function getMiniStream() {
|
|
2134
|
+
if (miniStreamData) return miniStreamData;
|
|
2135
|
+
const root = dirEntries[0];
|
|
2136
|
+
if (!root || root.type !== 5) {
|
|
2137
|
+
miniStreamData = Buffer.alloc(0);
|
|
2138
|
+
return miniStreamData;
|
|
2139
|
+
}
|
|
2140
|
+
miniStreamData = readChain(root.startSector, root.size || MAX_STREAM_SIZE);
|
|
2141
|
+
return miniStreamData;
|
|
2142
|
+
}
|
|
2143
|
+
function readMiniStream(startSector, size) {
|
|
2144
|
+
const mft = getMiniFatTable();
|
|
2145
|
+
const ms = getMiniStream();
|
|
2146
|
+
if (mft.length === 0 || ms.length === 0) return Buffer.alloc(0);
|
|
2147
|
+
const chunks = [];
|
|
2148
|
+
let current = startSector;
|
|
2149
|
+
let totalRead = 0;
|
|
2150
|
+
const visited = /* @__PURE__ */ new Set();
|
|
2151
|
+
while (current !== END_OF_CHAIN && current !== FREE_SECT && totalRead < size) {
|
|
2152
|
+
if (visited.has(current)) break;
|
|
2153
|
+
if (visited.size > MAX_CHAIN_LENGTH) break;
|
|
2154
|
+
visited.add(current);
|
|
2155
|
+
const off = current * miniSectorSize;
|
|
2156
|
+
const remaining = size - totalRead;
|
|
2157
|
+
const chunkSize = Math.min(miniSectorSize, remaining);
|
|
2158
|
+
if (off + chunkSize <= ms.length) {
|
|
2159
|
+
chunks.push(ms.subarray(off, off + chunkSize));
|
|
2160
|
+
}
|
|
2161
|
+
totalRead += chunkSize;
|
|
2162
|
+
current = current < mft.length ? mft[current] : END_OF_CHAIN;
|
|
2163
|
+
}
|
|
2164
|
+
return Buffer.concat(chunks);
|
|
2165
|
+
}
|
|
2166
|
+
function readStreamData(entry) {
|
|
2167
|
+
if (entry.size === 0) return Buffer.alloc(0);
|
|
2168
|
+
if (entry.size < miniStreamCutoff) {
|
|
2169
|
+
const miniResult = readMiniStream(entry.startSector, entry.size);
|
|
2170
|
+
if (miniResult.length > 0) return miniResult;
|
|
2171
|
+
}
|
|
2172
|
+
return readChain(entry.startSector, entry.size);
|
|
2173
|
+
}
|
|
2174
|
+
function findEntryByPath(path) {
|
|
2175
|
+
const parts = path.replace(/^\//, "").split("/");
|
|
2176
|
+
if (parts.length === 1) {
|
|
2177
|
+
return dirEntries.find((e) => e.name === parts[0] && e.type === 2) ?? null;
|
|
2178
|
+
}
|
|
2179
|
+
const storageName = parts[0];
|
|
2180
|
+
const streamName = parts.slice(1).join("/");
|
|
2181
|
+
for (const e of dirEntries) {
|
|
2182
|
+
if (e.type === 2 && e.name === streamName) {
|
|
2183
|
+
return e;
|
|
2184
|
+
}
|
|
2185
|
+
}
|
|
2186
|
+
const lastPart = parts[parts.length - 1];
|
|
2187
|
+
return dirEntries.find((e) => e.type === 2 && e.name === lastPart) ?? null;
|
|
2188
|
+
}
|
|
2189
|
+
return {
|
|
2190
|
+
findStream(path) {
|
|
2191
|
+
const normalized = path.replace(/^\//, "");
|
|
2192
|
+
const entry = findEntryByPath(normalized);
|
|
2193
|
+
if (!entry || entry.type !== 2) return null;
|
|
2194
|
+
const stream = readStreamData(entry);
|
|
2195
|
+
return stream.length > 0 ? stream : null;
|
|
2196
|
+
},
|
|
2197
|
+
entries() {
|
|
2198
|
+
return dirEntries.filter((e) => e.type === 2);
|
|
2199
|
+
}
|
|
2200
|
+
};
|
|
2201
|
+
}
|
|
2202
|
+
|
|
1271
2203
|
// src/hwp5/parser.ts
|
|
1272
2204
|
init_page_range();
|
|
1273
2205
|
import { createRequire } from "module";
|
|
@@ -1276,21 +2208,40 @@ var CFB = require2("cfb");
|
|
|
1276
2208
|
var MAX_SECTIONS = 100;
|
|
1277
2209
|
var MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024;
|
|
1278
2210
|
function parseHwp5Document(buffer, options) {
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
2211
|
+
let cfb = null;
|
|
2212
|
+
let lenientCfb = null;
|
|
2213
|
+
const warnings = [];
|
|
2214
|
+
try {
|
|
2215
|
+
cfb = CFB.parse(buffer);
|
|
2216
|
+
} catch {
|
|
2217
|
+
try {
|
|
2218
|
+
lenientCfb = parseLenientCfb(buffer);
|
|
2219
|
+
warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
|
|
2220
|
+
} catch {
|
|
2221
|
+
throw new KordocError("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
|
|
2222
|
+
}
|
|
2223
|
+
}
|
|
2224
|
+
const findStream = (path) => {
|
|
2225
|
+
if (cfb) {
|
|
2226
|
+
const entry = CFB.find(cfb, path);
|
|
2227
|
+
return entry?.content ? Buffer.from(entry.content) : null;
|
|
2228
|
+
}
|
|
2229
|
+
return lenientCfb.findStream(path);
|
|
2230
|
+
};
|
|
2231
|
+
const headerData = findStream("/FileHeader");
|
|
2232
|
+
if (!headerData) throw new KordocError("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
|
|
2233
|
+
const header = parseFileHeader(headerData);
|
|
1283
2234
|
if (header.signature !== "HWP Document File") throw new KordocError("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
1284
2235
|
if (header.flags & FLAG_ENCRYPTED) throw new KordocError("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
1285
2236
|
if (header.flags & FLAG_DRM) throw new KordocError("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
1286
2237
|
const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
|
|
2238
|
+
const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
|
|
1287
2239
|
const metadata = {
|
|
1288
2240
|
version: `${header.versionMajor}.x`
|
|
1289
2241
|
};
|
|
1290
|
-
extractHwp5Metadata(cfb, metadata);
|
|
1291
|
-
const docInfo = parseDocInfoStream(cfb, compressed);
|
|
1292
|
-
const
|
|
1293
|
-
const sections = findSections(cfb);
|
|
2242
|
+
if (cfb) extractHwp5Metadata(cfb, metadata);
|
|
2243
|
+
const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
|
|
2244
|
+
const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
|
|
1294
2245
|
if (sections.length === 0) throw new KordocError("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
1295
2246
|
metadata.pageCount = sections.length;
|
|
1296
2247
|
const pageFilter = options?.pages ? parsePageRange(options.pages, sections.length) : null;
|
|
@@ -1302,7 +2253,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
1302
2253
|
if (pageFilter && !pageFilter.has(si + 1)) continue;
|
|
1303
2254
|
try {
|
|
1304
2255
|
const sectionData = sections[si];
|
|
1305
|
-
const data = compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
2256
|
+
const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
1306
2257
|
totalDecompressed += data.length;
|
|
1307
2258
|
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new KordocError("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
1308
2259
|
const records = readRecords(data);
|
|
@@ -1315,7 +2266,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
1315
2266
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
1316
2267
|
}
|
|
1317
2268
|
}
|
|
1318
|
-
const images = extractHwp5Images(cfb, blocks, compressed, warnings);
|
|
2269
|
+
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
1319
2270
|
if (docInfo) {
|
|
1320
2271
|
detectHwp5Headings(blocks, docInfo);
|
|
1321
2272
|
}
|
|
@@ -1334,6 +2285,15 @@ function parseDocInfoStream(cfb, compressed) {
|
|
|
1334
2285
|
return null;
|
|
1335
2286
|
}
|
|
1336
2287
|
}
|
|
2288
|
+
function parseDocInfoFromStream(raw, compressed) {
|
|
2289
|
+
if (!raw) return null;
|
|
2290
|
+
try {
|
|
2291
|
+
const data = compressed ? decompressStream(raw) : raw;
|
|
2292
|
+
return parseDocInfo(readRecords(data));
|
|
2293
|
+
} catch {
|
|
2294
|
+
return null;
|
|
2295
|
+
}
|
|
2296
|
+
}
|
|
1337
2297
|
function detectHwp5Headings(blocks, docInfo) {
|
|
1338
2298
|
let baseFontSize = 0;
|
|
1339
2299
|
for (const style of docInfo.styles) {
|
|
@@ -1413,6 +2373,20 @@ function extractHwp5Metadata(cfb, metadata) {
|
|
|
1413
2373
|
} catch {
|
|
1414
2374
|
}
|
|
1415
2375
|
}
|
|
2376
|
+
function findViewTextSections(cfb, compressed) {
|
|
2377
|
+
const sections = [];
|
|
2378
|
+
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2379
|
+
const entry = CFB.find(cfb, `/ViewText/Section${i}`);
|
|
2380
|
+
if (!entry?.content) break;
|
|
2381
|
+
try {
|
|
2382
|
+
const decrypted = decryptViewText(Buffer.from(entry.content), compressed);
|
|
2383
|
+
sections.push({ idx: i, content: decrypted });
|
|
2384
|
+
} catch {
|
|
2385
|
+
break;
|
|
2386
|
+
}
|
|
2387
|
+
}
|
|
2388
|
+
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
2389
|
+
}
|
|
1416
2390
|
function findSections(cfb) {
|
|
1417
2391
|
const sections = [];
|
|
1418
2392
|
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
@@ -1431,6 +2405,38 @@ function findSections(cfb) {
|
|
|
1431
2405
|
}
|
|
1432
2406
|
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
1433
2407
|
}
|
|
2408
|
+
function findSectionsLenient(lcfb, compressed) {
|
|
2409
|
+
const sections = [];
|
|
2410
|
+
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2411
|
+
const raw = lcfb.findStream(`/BodyText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
|
|
2412
|
+
if (!raw) break;
|
|
2413
|
+
sections.push({ idx: i, content: compressed ? decompressStream(raw) : raw });
|
|
2414
|
+
}
|
|
2415
|
+
if (sections.length === 0) {
|
|
2416
|
+
for (const e of lcfb.entries()) {
|
|
2417
|
+
if (sections.length >= MAX_SECTIONS) break;
|
|
2418
|
+
if (e.name.startsWith("Section")) {
|
|
2419
|
+
const idx = parseInt(e.name.replace("Section", ""), 10) || 0;
|
|
2420
|
+
const raw = lcfb.findStream(e.name);
|
|
2421
|
+
if (raw) sections.push({ idx, content: compressed ? decompressStream(raw) : raw });
|
|
2422
|
+
}
|
|
2423
|
+
}
|
|
2424
|
+
}
|
|
2425
|
+
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
2426
|
+
}
|
|
2427
|
+
function findViewTextSectionsLenient(lcfb, compressed) {
|
|
2428
|
+
const sections = [];
|
|
2429
|
+
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2430
|
+
const raw = lcfb.findStream(`/ViewText/Section${i}`) ?? lcfb.findStream(`Section${i}`);
|
|
2431
|
+
if (!raw) break;
|
|
2432
|
+
try {
|
|
2433
|
+
sections.push({ idx: i, content: decryptViewText(raw, compressed) });
|
|
2434
|
+
} catch {
|
|
2435
|
+
break;
|
|
2436
|
+
}
|
|
2437
|
+
}
|
|
2438
|
+
return sections.sort((a, b) => a.idx - b.idx).map((s) => s.content);
|
|
2439
|
+
}
|
|
1434
2440
|
var TAG_SHAPE_COMPONENT = 74;
|
|
1435
2441
|
function extractBinDataId(records, ctrlIdx) {
|
|
1436
2442
|
const ctrlLevel = records[ctrlIdx].level;
|
|
@@ -1505,6 +2511,53 @@ function extractHwp5Images(cfb, blocks, compressed, warnings) {
|
|
|
1505
2511
|
}
|
|
1506
2512
|
return images;
|
|
1507
2513
|
}
|
|
2514
|
+
function extractHwp5ImagesLenient(lcfb, blocks, compressed, warnings) {
|
|
2515
|
+
const binDataMap = /* @__PURE__ */ new Map();
|
|
2516
|
+
const binRe = /^BIN(\d{4})/i;
|
|
2517
|
+
for (const e of lcfb.entries()) {
|
|
2518
|
+
const match = e.name.match(binRe);
|
|
2519
|
+
if (!match) continue;
|
|
2520
|
+
const idx = parseInt(match[1], 10);
|
|
2521
|
+
let raw = lcfb.findStream(e.name);
|
|
2522
|
+
if (!raw) continue;
|
|
2523
|
+
if (compressed) {
|
|
2524
|
+
try {
|
|
2525
|
+
raw = decompressStream(raw);
|
|
2526
|
+
} catch {
|
|
2527
|
+
}
|
|
2528
|
+
}
|
|
2529
|
+
binDataMap.set(idx, { data: raw, name: e.name });
|
|
2530
|
+
}
|
|
2531
|
+
if (binDataMap.size === 0) return [];
|
|
2532
|
+
const images = [];
|
|
2533
|
+
let imageIndex = 0;
|
|
2534
|
+
for (const block of blocks) {
|
|
2535
|
+
if (block.type !== "image" || !block.text) continue;
|
|
2536
|
+
const binId = parseInt(block.text, 10);
|
|
2537
|
+
if (isNaN(binId)) continue;
|
|
2538
|
+
const bin = binDataMap.get(binId);
|
|
2539
|
+
if (!bin) {
|
|
2540
|
+
warnings.push({ page: block.pageNumber, message: `BinData ${binId} \uFFFD\uFFFD\uFFFD\uC74C`, code: "SKIPPED_IMAGE" });
|
|
2541
|
+
block.type = "paragraph";
|
|
2542
|
+
block.text = `[\uC774\uBBF8\uC9C0: BinData ${binId}]`;
|
|
2543
|
+
continue;
|
|
2544
|
+
}
|
|
2545
|
+
const mime = detectImageMime(bin.data);
|
|
2546
|
+
if (!mime) {
|
|
2547
|
+
warnings.push({ page: block.pageNumber, message: `BinData ${binId}: \uC54C \uC218 \uC5C6\uB294 \uC774\uBBF8\uC9C0 \uD615\uC2DD`, code: "SKIPPED_IMAGE" });
|
|
2548
|
+
block.type = "paragraph";
|
|
2549
|
+
block.text = `[\uC774\uBBF8\uC9C0: ${bin.name}]`;
|
|
2550
|
+
continue;
|
|
2551
|
+
}
|
|
2552
|
+
imageIndex++;
|
|
2553
|
+
const ext = mime.includes("jpeg") ? "jpg" : mime.includes("png") ? "png" : mime.includes("gif") ? "gif" : mime.includes("bmp") ? "bmp" : "bin";
|
|
2554
|
+
const filename = `image_${String(imageIndex).padStart(3, "0")}.${ext}`;
|
|
2555
|
+
images.push({ filename, data: new Uint8Array(bin.data), mimeType: mime });
|
|
2556
|
+
block.text = filename;
|
|
2557
|
+
block.imageData = { data: new Uint8Array(bin.data), mimeType: mime, filename: bin.name };
|
|
2558
|
+
}
|
|
2559
|
+
return images;
|
|
2560
|
+
}
|
|
1508
2561
|
function parseSection(records, docInfo, warnings, sectionNum) {
|
|
1509
2562
|
const blocks = [];
|
|
1510
2563
|
let i = 0;
|
|
@@ -1541,12 +2594,61 @@ function parseSection(records, docInfo, warnings, sectionNum) {
|
|
|
1541
2594
|
}
|
|
1542
2595
|
} else if (ctrlId === " elo" || ctrlId === "ole ") {
|
|
1543
2596
|
warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
|
|
2597
|
+
} else if (ctrlId === "fn " || ctrlId === " nf " || ctrlId === "en " || ctrlId === " ne ") {
|
|
2598
|
+
const noteText = extractNoteText(records, i);
|
|
2599
|
+
if (noteText && blocks.length > 0) {
|
|
2600
|
+
const lastBlock = blocks[blocks.length - 1];
|
|
2601
|
+
if (lastBlock.type === "paragraph") {
|
|
2602
|
+
lastBlock.footnoteText = lastBlock.footnoteText ? lastBlock.footnoteText + "; " + noteText : noteText;
|
|
2603
|
+
}
|
|
2604
|
+
}
|
|
2605
|
+
} else if (ctrlId === "%tok" || ctrlId === "klnk") {
|
|
2606
|
+
const url = extractHyperlinkUrl(rec.data);
|
|
2607
|
+
if (url && blocks.length > 0) {
|
|
2608
|
+
const lastBlock = blocks[blocks.length - 1];
|
|
2609
|
+
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
2610
|
+
lastBlock.href = sanitizeHref(url) ?? void 0;
|
|
2611
|
+
}
|
|
2612
|
+
}
|
|
1544
2613
|
}
|
|
1545
2614
|
}
|
|
1546
2615
|
i++;
|
|
1547
2616
|
}
|
|
1548
2617
|
return blocks;
|
|
1549
2618
|
}
|
|
2619
|
+
function extractNoteText(records, ctrlIdx) {
|
|
2620
|
+
const ctrlLevel = records[ctrlIdx].level;
|
|
2621
|
+
const texts = [];
|
|
2622
|
+
for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 100; j++) {
|
|
2623
|
+
const r = records[j];
|
|
2624
|
+
if (r.level <= ctrlLevel) break;
|
|
2625
|
+
if (r.tagId === TAG_PARA_TEXT) {
|
|
2626
|
+
const t = extractText(r.data).trim();
|
|
2627
|
+
if (t) texts.push(t);
|
|
2628
|
+
}
|
|
2629
|
+
}
|
|
2630
|
+
return texts.length > 0 ? texts.join(" ") : null;
|
|
2631
|
+
}
|
|
2632
|
+
function extractHyperlinkUrl(data) {
|
|
2633
|
+
try {
|
|
2634
|
+
const httpSig = Buffer.from("http", "utf16le");
|
|
2635
|
+
const idx = data.indexOf(httpSig);
|
|
2636
|
+
if (idx >= 0) {
|
|
2637
|
+
let end = idx;
|
|
2638
|
+
while (end + 1 < data.length) {
|
|
2639
|
+
const ch = data.readUInt16LE(end);
|
|
2640
|
+
if (ch === 0) break;
|
|
2641
|
+
end += 2;
|
|
2642
|
+
}
|
|
2643
|
+
const url = data.subarray(idx, end).toString("utf16le");
|
|
2644
|
+
if (/^https?:\/\/.+/.test(url) && url.length < 2e3) {
|
|
2645
|
+
return url;
|
|
2646
|
+
}
|
|
2647
|
+
}
|
|
2648
|
+
} catch {
|
|
2649
|
+
}
|
|
2650
|
+
return null;
|
|
2651
|
+
}
|
|
1550
2652
|
function resolveCharStyle(charShapeIds, docInfo) {
|
|
1551
2653
|
if (charShapeIds.length === 0 || docInfo.charShapes.length === 0) return void 0;
|
|
1552
2654
|
const freq = /* @__PURE__ */ new Map();
|