kordoc 2.4.1 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -1
- package/dist/{chunk-IVC5CB2Q.cjs → chunk-5HWLDNT5.cjs} +2 -2
- package/dist/{chunk-IVC5CB2Q.cjs.map → chunk-5HWLDNT5.cjs.map} +1 -1
- package/dist/{chunk-JFPF7B5L.js → chunk-JU7NRDCV.js} +78 -8
- package/dist/chunk-JU7NRDCV.js.map +1 -0
- package/dist/{chunk-T65PPCNU.js → chunk-UOBENOSJ.js} +2 -2
- package/dist/{chunk-VYFIAYCW.js → chunk-V6STPG3I.js} +2 -2
- package/dist/cli.js +3 -3
- package/dist/index.cjs +162 -92
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +77 -7
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +3 -3
- package/dist/{parser-ZORW4RSC.cjs → parser-BYHUJ5GP.cjs} +16 -15
- package/dist/{parser-ZORW4RSC.cjs.map → parser-BYHUJ5GP.cjs.map} +1 -1
- package/dist/{parser-VXUBNDG4.js → parser-W4P5VX7T.js} +3 -2
- package/dist/parser-W4P5VX7T.js.map +1 -0
- package/dist/{parser-UHUCMAA7.js → parser-YHW6R62S.js} +3 -2
- package/dist/parser-YHW6R62S.js.map +1 -0
- package/dist/{watch-SSENKOE2.js → watch-Z2YSFSQ3.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-JFPF7B5L.js.map +0 -1
- package/dist/parser-UHUCMAA7.js.map +0 -1
- package/dist/parser-VXUBNDG4.js.map +0 -1
- /package/dist/{chunk-T65PPCNU.js.map → chunk-UOBENOSJ.js.map} +0 -0
- /package/dist/{chunk-VYFIAYCW.js.map → chunk-V6STPG3I.js.map} +0 -0
- /package/dist/{watch-SSENKOE2.js.map → watch-Z2YSFSQ3.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
var
|
|
19
|
+
var _chunk5HWLDNT5cjs = require('./chunk-5HWLDNT5.cjs');
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -193,7 +193,7 @@ var MAX_XML_DEPTH = 200;
|
|
|
193
193
|
function createXmlParser(warnings) {
|
|
194
194
|
return new (0, _xmldom.DOMParser)({
|
|
195
195
|
onError(level, msg) {
|
|
196
|
-
if (level === "fatalError") throw new (0,
|
|
196
|
+
if (level === "fatalError") throw new (0, _chunk5HWLDNT5cjs.KordocError)(`XML \uD30C\uC2F1 \uC2E4\uD328: ${msg}`);
|
|
197
197
|
_optionalChain([warnings, 'optionalAccess', _2 => _2.push, 'call', _3 => _3({ code: "MALFORMED_XML", message: `XML ${level === "warn" ? "\uACBD\uACE0" : "\uC624\uB958"}: ${msg}` })]);
|
|
198
198
|
}
|
|
199
199
|
});
|
|
@@ -212,10 +212,10 @@ async function extractHwpxStyles(zip, decompressed) {
|
|
|
212
212
|
const xml = await file.async("text");
|
|
213
213
|
if (decompressed) {
|
|
214
214
|
decompressed.total += xml.length * 2;
|
|
215
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
215
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunk5HWLDNT5cjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
216
216
|
}
|
|
217
217
|
const parser = createXmlParser();
|
|
218
|
-
const doc = parser.parseFromString(
|
|
218
|
+
const doc = parser.parseFromString(_chunk5HWLDNT5cjs.stripDtd.call(void 0, xml), "text/xml");
|
|
219
219
|
if (!doc.documentElement) continue;
|
|
220
220
|
parseCharProperties(doc, result.charProperties);
|
|
221
221
|
parseStyleElements(doc, result.styles);
|
|
@@ -277,7 +277,7 @@ function parseStyleElements(doc, map) {
|
|
|
277
277
|
}
|
|
278
278
|
}
|
|
279
279
|
async function parseHwpxDocument(buffer, options) {
|
|
280
|
-
|
|
280
|
+
_chunk5HWLDNT5cjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
|
|
281
281
|
let zip;
|
|
282
282
|
try {
|
|
283
283
|
zip = await _jszip2.default.loadAsync(buffer);
|
|
@@ -286,7 +286,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
286
286
|
}
|
|
287
287
|
const actualEntryCount = Object.keys(zip.files).length;
|
|
288
288
|
if (actualEntryCount > MAX_ZIP_ENTRIES) {
|
|
289
|
-
throw new (0,
|
|
289
|
+
throw new (0, _chunk5HWLDNT5cjs.KordocError)("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
290
290
|
}
|
|
291
291
|
const manifestFile = zip.file("META-INF/manifest.xml");
|
|
292
292
|
if (manifestFile) {
|
|
@@ -298,7 +298,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
298
298
|
return comResultToParseResult(pages, pageCount, warnings2);
|
|
299
299
|
}
|
|
300
300
|
}
|
|
301
|
-
throw new (0,
|
|
301
|
+
throw new (0, _chunk5HWLDNT5cjs.KordocError)("DRM \uC554\uD638\uD654\uB41C HWPX \uD30C\uC77C\uC785\uB2C8\uB2E4. Windows + \uD55C\uCEF4 \uC624\uD53C\uC2A4 \uC124\uCE58 \uC2DC \uC790\uB3D9 \uCD94\uCD9C\uB429\uB2C8\uB2E4.");
|
|
302
302
|
}
|
|
303
303
|
}
|
|
304
304
|
const decompressed = { total: 0 };
|
|
@@ -307,7 +307,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
307
307
|
const styleMap = await extractHwpxStyles(zip, decompressed);
|
|
308
308
|
const warnings = [];
|
|
309
309
|
const sectionPaths = await resolveSectionPaths(zip);
|
|
310
|
-
if (sectionPaths.length === 0) throw new (0,
|
|
310
|
+
if (sectionPaths.length === 0) throw new (0, _chunk5HWLDNT5cjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
311
311
|
metadata.pageCount = sectionPaths.length;
|
|
312
312
|
const pageFilter = _optionalChain([options, 'optionalAccess', _5 => _5.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sectionPaths.length) : null;
|
|
313
313
|
const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
|
|
@@ -321,19 +321,19 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
321
321
|
try {
|
|
322
322
|
const xml = await file.async("text");
|
|
323
323
|
decompressed.total += xml.length * 2;
|
|
324
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
324
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunk5HWLDNT5cjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
325
325
|
blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, nestedTableCounter));
|
|
326
326
|
parsedSections++;
|
|
327
327
|
_optionalChain([options, 'optionalAccess', _6 => _6.onProgress, 'optionalCall', _7 => _7(parsedSections, totalTarget)]);
|
|
328
328
|
} catch (secErr) {
|
|
329
|
-
if (secErr instanceof
|
|
329
|
+
if (secErr instanceof _chunk5HWLDNT5cjs.KordocError) throw secErr;
|
|
330
330
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
331
331
|
}
|
|
332
332
|
}
|
|
333
333
|
const images = await extractImagesFromZip(zip, blocks, decompressed, warnings);
|
|
334
334
|
detectHwpxHeadings(blocks, styleMap);
|
|
335
335
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
336
|
-
const markdown =
|
|
336
|
+
const markdown = _chunk5HWLDNT5cjs.blocksToMarkdown.call(void 0, blocks);
|
|
337
337
|
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
338
338
|
}
|
|
339
339
|
function imageExtToMime(ext) {
|
|
@@ -397,13 +397,13 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
397
397
|
let found = false;
|
|
398
398
|
const allCandidates = resolvedPath ? [resolvedPath, ...candidates] : candidates;
|
|
399
399
|
for (const path of allCandidates) {
|
|
400
|
-
if (
|
|
400
|
+
if (_chunk5HWLDNT5cjs.isPathTraversal.call(void 0, path)) continue;
|
|
401
401
|
const file = zip.file(path);
|
|
402
402
|
if (!file) continue;
|
|
403
403
|
try {
|
|
404
404
|
const data = await file.async("uint8array");
|
|
405
405
|
decompressed.total += data.length;
|
|
406
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
406
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunk5HWLDNT5cjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
407
407
|
const actualPath = path;
|
|
408
408
|
const ext = actualPath.includes(".") ? actualPath.split(".").pop() || "png" : "png";
|
|
409
409
|
const mimeType = imageExtToMime(ext);
|
|
@@ -415,7 +415,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
415
415
|
found = true;
|
|
416
416
|
break;
|
|
417
417
|
} catch (err) {
|
|
418
|
-
if (err instanceof
|
|
418
|
+
if (err instanceof _chunk5HWLDNT5cjs.KordocError) throw err;
|
|
419
419
|
}
|
|
420
420
|
}
|
|
421
421
|
if (!found) {
|
|
@@ -435,7 +435,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
435
435
|
const xml = await file.async("text");
|
|
436
436
|
if (decompressed) {
|
|
437
437
|
decompressed.total += xml.length * 2;
|
|
438
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
438
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunk5HWLDNT5cjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
439
439
|
}
|
|
440
440
|
parseDublinCoreMetadata(xml, metadata);
|
|
441
441
|
if (metadata.title || metadata.author) return;
|
|
@@ -445,7 +445,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
445
445
|
}
|
|
446
446
|
function parseDublinCoreMetadata(xml, metadata) {
|
|
447
447
|
const parser = createXmlParser();
|
|
448
|
-
const doc = parser.parseFromString(
|
|
448
|
+
const doc = parser.parseFromString(_chunk5HWLDNT5cjs.stripDtd.call(void 0, xml), "text/xml");
|
|
449
449
|
if (!doc.documentElement) return;
|
|
450
450
|
const getText = (tagNames) => {
|
|
451
451
|
for (const tag of tagNames) {
|
|
@@ -505,7 +505,7 @@ function extractFromBrokenZip(buffer) {
|
|
|
505
505
|
}
|
|
506
506
|
const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
|
|
507
507
|
const name = new TextDecoder().decode(nameBytes);
|
|
508
|
-
if (
|
|
508
|
+
if (_chunk5HWLDNT5cjs.isPathTraversal.call(void 0, name)) {
|
|
509
509
|
pos = fileStart + compSize;
|
|
510
510
|
continue;
|
|
511
511
|
}
|
|
@@ -523,15 +523,15 @@ function extractFromBrokenZip(buffer) {
|
|
|
523
523
|
continue;
|
|
524
524
|
}
|
|
525
525
|
totalDecompressed += content.length * 2;
|
|
526
|
-
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
526
|
+
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0, _chunk5HWLDNT5cjs.KordocError)("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
|
|
527
527
|
sectionNum++;
|
|
528
528
|
blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, nestedTableCounter));
|
|
529
529
|
} catch (e6) {
|
|
530
530
|
continue;
|
|
531
531
|
}
|
|
532
532
|
}
|
|
533
|
-
if (blocks.length === 0) throw new (0,
|
|
534
|
-
const markdown =
|
|
533
|
+
if (blocks.length === 0) throw new (0, _chunk5HWLDNT5cjs.KordocError)("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
534
|
+
const markdown = _chunk5HWLDNT5cjs.blocksToMarkdown.call(void 0, blocks);
|
|
535
535
|
return { markdown, blocks, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
536
536
|
}
|
|
537
537
|
async function resolveSectionPaths(zip) {
|
|
@@ -549,7 +549,7 @@ async function resolveSectionPaths(zip) {
|
|
|
549
549
|
}
|
|
550
550
|
function parseSectionPathsFromManifest(xml) {
|
|
551
551
|
const parser = createXmlParser();
|
|
552
|
-
const doc = parser.parseFromString(
|
|
552
|
+
const doc = parser.parseFromString(_chunk5HWLDNT5cjs.stripDtd.call(void 0, xml), "text/xml");
|
|
553
553
|
const items = doc.getElementsByTagName("opf:item");
|
|
554
554
|
const spine = doc.getElementsByTagName("opf:itemref");
|
|
555
555
|
const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
|
|
@@ -596,9 +596,9 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
596
596
|
let level = 0;
|
|
597
597
|
if (baseFontSize > 0 && _optionalChain([block, 'access', _16 => _16.style, 'optionalAccess', _17 => _17.fontSize])) {
|
|
598
598
|
const ratio = block.style.fontSize / baseFontSize;
|
|
599
|
-
if (ratio >=
|
|
600
|
-
else if (ratio >=
|
|
601
|
-
else if (ratio >=
|
|
599
|
+
if (ratio >= _chunk5HWLDNT5cjs.HEADING_RATIO_H1) level = 1;
|
|
600
|
+
else if (ratio >= _chunk5HWLDNT5cjs.HEADING_RATIO_H2) level = 2;
|
|
601
|
+
else if (ratio >= _chunk5HWLDNT5cjs.HEADING_RATIO_H3) level = 3;
|
|
602
602
|
}
|
|
603
603
|
const compactText = text.replace(/\s+/g, "");
|
|
604
604
|
if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
|
|
@@ -623,13 +623,13 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
623
623
|
let nestedCols = 0;
|
|
624
624
|
for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
|
|
625
625
|
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
626
|
-
blocks.push({ type: "table", table:
|
|
626
|
+
blocks.push({ type: "table", table: _chunk5HWLDNT5cjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
627
627
|
if (parentTable.cell) {
|
|
628
628
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
629
629
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker;
|
|
630
630
|
}
|
|
631
631
|
} else {
|
|
632
|
-
const nestedText =
|
|
632
|
+
const nestedText = _chunk5HWLDNT5cjs.convertTableToText.call(void 0, newTable.rows);
|
|
633
633
|
if (parentTable.cell) {
|
|
634
634
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
635
635
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker + "\n" + nestedText;
|
|
@@ -639,7 +639,7 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
639
639
|
}
|
|
640
640
|
function parseSectionXml(xml, styleMap, warnings, sectionNum, counter) {
|
|
641
641
|
const parser = createXmlParser(warnings);
|
|
642
|
-
const doc = parser.parseFromString(
|
|
642
|
+
const doc = parser.parseFromString(_chunk5HWLDNT5cjs.stripDtd.call(void 0, xml), "text/xml");
|
|
643
643
|
if (!doc.documentElement) return [];
|
|
644
644
|
const blocks = [];
|
|
645
645
|
const ctx = { styleMap, warnings, sectionNum, counter };
|
|
@@ -682,7 +682,7 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
682
682
|
if (tableStack.length > 0) {
|
|
683
683
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
684
684
|
} else {
|
|
685
|
-
blocks.push({ type: "table", table:
|
|
685
|
+
blocks.push({ type: "table", table: _chunk5HWLDNT5cjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
686
686
|
tableCtx = null;
|
|
687
687
|
}
|
|
688
688
|
} else {
|
|
@@ -722,8 +722,8 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
722
722
|
const cs = isNaN(rawCs) ? 1 : rawCs;
|
|
723
723
|
const rawRs = parseInt(el.getAttribute("rowSpan") || "1", 10);
|
|
724
724
|
const rs = isNaN(rawRs) ? 1 : rawRs;
|
|
725
|
-
tableCtx.cell.colSpan = clampSpan(cs,
|
|
726
|
-
tableCtx.cell.rowSpan = clampSpan(rs,
|
|
725
|
+
tableCtx.cell.colSpan = clampSpan(cs, _chunk5HWLDNT5cjs.MAX_COLS);
|
|
726
|
+
tableCtx.cell.rowSpan = clampSpan(rs, _chunk5HWLDNT5cjs.MAX_ROWS);
|
|
727
727
|
}
|
|
728
728
|
break;
|
|
729
729
|
case "p": {
|
|
@@ -781,7 +781,7 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth =
|
|
|
781
781
|
if (tableStack.length > 0) {
|
|
782
782
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
783
783
|
} else {
|
|
784
|
-
blocks.push({ type: "table", table:
|
|
784
|
+
blocks.push({ type: "table", table: _chunk5HWLDNT5cjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
785
785
|
tableCtx = null;
|
|
786
786
|
}
|
|
787
787
|
} else {
|
|
@@ -889,7 +889,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
889
889
|
case "hyperlink": {
|
|
890
890
|
const url = child.getAttribute("url") || child.getAttribute("href") || "";
|
|
891
891
|
if (url) {
|
|
892
|
-
const safe =
|
|
892
|
+
const safe = _chunk5HWLDNT5cjs.sanitizeHref.call(void 0, url);
|
|
893
893
|
if (safe) href = safe;
|
|
894
894
|
}
|
|
895
895
|
walk(child);
|
|
@@ -1029,7 +1029,7 @@ function decompressStream(data) {
|
|
|
1029
1029
|
return _zlib.inflateRawSync.call(void 0, data, opts);
|
|
1030
1030
|
}
|
|
1031
1031
|
function parseFileHeader(data) {
|
|
1032
|
-
if (data.length < 40) throw new (0,
|
|
1032
|
+
if (data.length < 40) throw new (0, _chunk5HWLDNT5cjs.KordocError)("FileHeader\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 40\uBC14\uC774\uD2B8)");
|
|
1033
1033
|
const sig = data.subarray(0, 32).toString("utf8").replace(/\0+$/, "");
|
|
1034
1034
|
return {
|
|
1035
1035
|
signature: sig,
|
|
@@ -2048,7 +2048,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
2048
2048
|
lenientCfb = parseLenientCfb(buffer);
|
|
2049
2049
|
warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
|
|
2050
2050
|
} catch (e11) {
|
|
2051
|
-
throw new (0,
|
|
2051
|
+
throw new (0, _chunk5HWLDNT5cjs.KordocError)("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
|
|
2052
2052
|
}
|
|
2053
2053
|
}
|
|
2054
2054
|
const findStream = (path) => {
|
|
@@ -2059,11 +2059,11 @@ function parseHwp5Document(buffer, options) {
|
|
|
2059
2059
|
return lenientCfb.findStream(path);
|
|
2060
2060
|
};
|
|
2061
2061
|
const headerData = findStream("/FileHeader");
|
|
2062
|
-
if (!headerData) throw new (0,
|
|
2062
|
+
if (!headerData) throw new (0, _chunk5HWLDNT5cjs.KordocError)("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
|
|
2063
2063
|
const header = parseFileHeader(headerData);
|
|
2064
|
-
if (header.signature !== "HWP Document File") throw new (0,
|
|
2065
|
-
if (header.flags & FLAG_ENCRYPTED) throw new (0,
|
|
2066
|
-
if (header.flags & FLAG_DRM) throw new (0,
|
|
2064
|
+
if (header.signature !== "HWP Document File") throw new (0, _chunk5HWLDNT5cjs.KordocError)("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
2065
|
+
if (header.flags & FLAG_ENCRYPTED) throw new (0, _chunk5HWLDNT5cjs.KordocError)("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2066
|
+
if (header.flags & FLAG_DRM) throw new (0, _chunk5HWLDNT5cjs.KordocError)("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2067
2067
|
const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
|
|
2068
2068
|
const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
|
|
2069
2069
|
const metadata = {
|
|
@@ -2072,7 +2072,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
2072
2072
|
if (cfb) extractHwp5Metadata(cfb, metadata);
|
|
2073
2073
|
const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
|
|
2074
2074
|
const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
|
|
2075
|
-
if (sections.length === 0) throw new (0,
|
|
2075
|
+
if (sections.length === 0) throw new (0, _chunk5HWLDNT5cjs.KordocError)("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2076
2076
|
metadata.pageCount = sections.length;
|
|
2077
2077
|
const pageFilter = _optionalChain([options, 'optionalAccess', _22 => _22.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sections.length) : null;
|
|
2078
2078
|
const totalTarget = pageFilter ? pageFilter.size : sections.length;
|
|
@@ -2086,24 +2086,24 @@ function parseHwp5Document(buffer, options) {
|
|
|
2086
2086
|
const sectionData = sections[si];
|
|
2087
2087
|
const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
2088
2088
|
totalDecompressed += data.length;
|
|
2089
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2089
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunk5HWLDNT5cjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2090
2090
|
const records = readRecords(data);
|
|
2091
2091
|
const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, nestedTableCounter);
|
|
2092
2092
|
blocks.push(...sectionBlocks);
|
|
2093
2093
|
parsedSections++;
|
|
2094
2094
|
_optionalChain([options, 'optionalAccess', _23 => _23.onProgress, 'optionalCall', _24 => _24(parsedSections, totalTarget)]);
|
|
2095
2095
|
} catch (secErr) {
|
|
2096
|
-
if (secErr instanceof
|
|
2096
|
+
if (secErr instanceof _chunk5HWLDNT5cjs.KordocError) throw secErr;
|
|
2097
2097
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
2098
2098
|
}
|
|
2099
2099
|
}
|
|
2100
2100
|
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
2101
|
-
const flatBlocks =
|
|
2101
|
+
const flatBlocks = _chunk5HWLDNT5cjs.flattenLayoutTables.call(void 0, blocks);
|
|
2102
2102
|
if (docInfo) {
|
|
2103
2103
|
detectHwp5Headings(flatBlocks, docInfo);
|
|
2104
2104
|
}
|
|
2105
2105
|
const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
2106
|
-
const markdown =
|
|
2106
|
+
const markdown = _chunk5HWLDNT5cjs.blocksToMarkdown.call(void 0, flatBlocks);
|
|
2107
2107
|
return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
2108
2108
|
}
|
|
2109
2109
|
function parseDocInfoStream(cfb, compressed) {
|
|
@@ -2163,9 +2163,9 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2163
2163
|
let level = 0;
|
|
2164
2164
|
if (_optionalChain([block, 'access', _29 => _29.style, 'optionalAccess', _30 => _30.fontSize]) && baseFontSize > 0) {
|
|
2165
2165
|
const ratio = block.style.fontSize / baseFontSize;
|
|
2166
|
-
if (ratio >=
|
|
2167
|
-
else if (ratio >=
|
|
2168
|
-
else if (ratio >=
|
|
2166
|
+
if (ratio >= _chunk5HWLDNT5cjs.HEADING_RATIO_H1) level = 1;
|
|
2167
|
+
else if (ratio >= _chunk5HWLDNT5cjs.HEADING_RATIO_H2) level = 2;
|
|
2168
|
+
else if (ratio >= _chunk5HWLDNT5cjs.HEADING_RATIO_H3) level = 3;
|
|
2169
2169
|
}
|
|
2170
2170
|
if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
|
|
2171
2171
|
if (level === 0) level = 2;
|
|
@@ -2250,7 +2250,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2250
2250
|
if (!raw) break;
|
|
2251
2251
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2252
2252
|
totalDecompressed += content.length;
|
|
2253
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2253
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunk5HWLDNT5cjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2254
2254
|
sections.push({ idx: i, content });
|
|
2255
2255
|
}
|
|
2256
2256
|
if (sections.length === 0) {
|
|
@@ -2262,7 +2262,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2262
2262
|
if (raw) {
|
|
2263
2263
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2264
2264
|
totalDecompressed += content.length;
|
|
2265
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2265
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunk5HWLDNT5cjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2266
2266
|
sections.push({ idx, content });
|
|
2267
2267
|
}
|
|
2268
2268
|
}
|
|
@@ -2279,7 +2279,7 @@ function findViewTextSectionsLenient(lcfb, compressed) {
|
|
|
2279
2279
|
try {
|
|
2280
2280
|
const content = decryptViewText(raw, compressed);
|
|
2281
2281
|
totalDecompressed += content.length;
|
|
2282
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2282
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunk5HWLDNT5cjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2283
2283
|
sections.push({ idx: i, content });
|
|
2284
2284
|
} catch (e16) {
|
|
2285
2285
|
break;
|
|
@@ -2467,7 +2467,7 @@ function parseSection(records, docInfo, warnings, sectionNum, counter) {
|
|
|
2467
2467
|
if (url && blocks.length > 0) {
|
|
2468
2468
|
const lastBlock = blocks[blocks.length - 1];
|
|
2469
2469
|
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
2470
|
-
lastBlock.href = _nullishCoalesce(
|
|
2470
|
+
lastBlock.href = _nullishCoalesce(_chunk5HWLDNT5cjs.sanitizeHref.call(void 0, url), () => ( void 0));
|
|
2471
2471
|
}
|
|
2472
2472
|
}
|
|
2473
2473
|
}
|
|
@@ -2585,8 +2585,8 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
2585
2585
|
if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
|
|
2586
2586
|
if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
|
|
2587
2587
|
if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
|
|
2588
|
-
rows = Math.min(rec.data.readUInt16LE(4),
|
|
2589
|
-
cols = Math.min(rec.data.readUInt16LE(6),
|
|
2588
|
+
rows = Math.min(rec.data.readUInt16LE(4), _chunk5HWLDNT5cjs.MAX_ROWS);
|
|
2589
|
+
cols = Math.min(rec.data.readUInt16LE(6), _chunk5HWLDNT5cjs.MAX_COLS);
|
|
2590
2590
|
}
|
|
2591
2591
|
if (rec.tagId === TAG_LIST_HEADER) {
|
|
2592
2592
|
const { cell, nextIdx } = parseCellBlock(records, i, tableLevel, counter);
|
|
@@ -2608,7 +2608,7 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
2608
2608
|
return { table: { rows, cols, cells: irCells, hasHeader: rows > 1 }, nextIdx: i };
|
|
2609
2609
|
}
|
|
2610
2610
|
const cellRows = arrangeCells(rows, cols, cells);
|
|
2611
|
-
return { table:
|
|
2611
|
+
return { table: _chunk5HWLDNT5cjs.buildTable.call(void 0, cellRows), nextIdx: i };
|
|
2612
2612
|
}
|
|
2613
2613
|
function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
2614
2614
|
const rec = records[startIdx];
|
|
@@ -2623,8 +2623,8 @@ function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
|
2623
2623
|
rowAddr = rec.data.readUInt16LE(10);
|
|
2624
2624
|
const cs = rec.data.readUInt16LE(12);
|
|
2625
2625
|
const rs = rec.data.readUInt16LE(14);
|
|
2626
|
-
if (cs > 0) colSpan = Math.min(cs,
|
|
2627
|
-
if (rs > 0) rowSpan = Math.min(rs,
|
|
2626
|
+
if (cs > 0) colSpan = Math.min(cs, _chunk5HWLDNT5cjs.MAX_COLS);
|
|
2627
|
+
if (rs > 0) rowSpan = Math.min(rs, _chunk5HWLDNT5cjs.MAX_ROWS);
|
|
2628
2628
|
}
|
|
2629
2629
|
let i = startIdx + 1;
|
|
2630
2630
|
while (i < records.length) {
|
|
@@ -2687,6 +2687,20 @@ function arrangeCells(rows, cols, cells) {
|
|
|
2687
2687
|
return grid.map((row) => row.map((c) => c || { text: "", colSpan: 1, rowSpan: 1 }));
|
|
2688
2688
|
}
|
|
2689
2689
|
|
|
2690
|
+
// src/hwp5/sentinel.ts
|
|
2691
|
+
var SENTINEL_PATTERNS = [
|
|
2692
|
+
/상위\s*버전의\s*배포용\s*문서/,
|
|
2693
|
+
/최신\s*버전의\s*한글.*뷰어/,
|
|
2694
|
+
/문서를\s*읽으려면/
|
|
2695
|
+
];
|
|
2696
|
+
function isDistributionSentinel(markdown) {
|
|
2697
|
+
if (!markdown) return false;
|
|
2698
|
+
const hit = SENTINEL_PATTERNS.some((p) => p.test(markdown));
|
|
2699
|
+
if (!hit) return false;
|
|
2700
|
+
const stripped = markdown.split(/\r?\n/).filter((line) => !SENTINEL_PATTERNS.some((p) => p.test(line))).join("").replace(/\s+/g, "");
|
|
2701
|
+
return stripped.length < 120;
|
|
2702
|
+
}
|
|
2703
|
+
|
|
2690
2704
|
// src/xlsx/parser.ts
|
|
2691
2705
|
|
|
2692
2706
|
|
|
@@ -2726,7 +2740,7 @@ function getTextContent(el) {
|
|
|
2726
2740
|
return _nullishCoalesce(_optionalChain([el, 'access', _38 => _38.textContent, 'optionalAccess', _39 => _39.trim, 'call', _40 => _40()]), () => ( ""));
|
|
2727
2741
|
}
|
|
2728
2742
|
function parseXml(text) {
|
|
2729
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
2743
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunk5HWLDNT5cjs.stripDtd.call(void 0, text), "text/xml");
|
|
2730
2744
|
}
|
|
2731
2745
|
function parseSharedStrings(xml) {
|
|
2732
2746
|
const doc = parseXml(xml);
|
|
@@ -2870,7 +2884,7 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2870
2884
|
cellRows.push(row);
|
|
2871
2885
|
}
|
|
2872
2886
|
if (cellRows.length > 0) {
|
|
2873
|
-
const table =
|
|
2887
|
+
const table = _chunk5HWLDNT5cjs.buildTable.call(void 0, cellRows);
|
|
2874
2888
|
if (table.rows > 0) {
|
|
2875
2889
|
blocks.push({ type: "table", table, pageNumber: sheetIndex + 1 });
|
|
2876
2890
|
}
|
|
@@ -2878,12 +2892,12 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2878
2892
|
return blocks;
|
|
2879
2893
|
}
|
|
2880
2894
|
async function parseXlsxDocument(buffer, options) {
|
|
2881
|
-
|
|
2895
|
+
_chunk5HWLDNT5cjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE3);
|
|
2882
2896
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
2883
2897
|
const warnings = [];
|
|
2884
2898
|
const workbookFile = zip.file("xl/workbook.xml");
|
|
2885
2899
|
if (!workbookFile) {
|
|
2886
|
-
throw new (0,
|
|
2900
|
+
throw new (0, _chunk5HWLDNT5cjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 XLSX \uD30C\uC77C: xl/workbook.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2887
2901
|
}
|
|
2888
2902
|
let sharedStrings = [];
|
|
2889
2903
|
const ssFile = zip.file("xl/sharedStrings.xml");
|
|
@@ -2892,7 +2906,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2892
2906
|
}
|
|
2893
2907
|
const sheets = parseWorkbook(await workbookFile.async("text"));
|
|
2894
2908
|
if (sheets.length === 0) {
|
|
2895
|
-
throw new (0,
|
|
2909
|
+
throw new (0, _chunk5HWLDNT5cjs.KordocError)("XLSX \uD30C\uC77C\uC5D0 \uC2DC\uD2B8\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2896
2910
|
}
|
|
2897
2911
|
let relsMap = /* @__PURE__ */ new Map();
|
|
2898
2912
|
const relsFile = zip.file("xl/_rels/workbook.xml.rels");
|
|
@@ -2964,7 +2978,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2964
2978
|
} catch (e20) {
|
|
2965
2979
|
}
|
|
2966
2980
|
}
|
|
2967
|
-
const markdown =
|
|
2981
|
+
const markdown = _chunk5HWLDNT5cjs.blocksToMarkdown.call(void 0, blocks);
|
|
2968
2982
|
return { markdown, blocks, metadata, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
2969
2983
|
}
|
|
2970
2984
|
|
|
@@ -3013,7 +3027,7 @@ function getAttr(el, localName3) {
|
|
|
3013
3027
|
return null;
|
|
3014
3028
|
}
|
|
3015
3029
|
function parseXml2(text) {
|
|
3016
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
3030
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunk5HWLDNT5cjs.stripDtd.call(void 0, text), "text/xml");
|
|
3017
3031
|
}
|
|
3018
3032
|
function parseStyles(xml) {
|
|
3019
3033
|
const doc = parseXml2(xml);
|
|
@@ -3306,12 +3320,12 @@ async function extractImages(zip, rels, doc) {
|
|
|
3306
3320
|
return { blocks, images };
|
|
3307
3321
|
}
|
|
3308
3322
|
async function parseDocxDocument(buffer, options) {
|
|
3309
|
-
|
|
3323
|
+
_chunk5HWLDNT5cjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE4);
|
|
3310
3324
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
3311
3325
|
const warnings = [];
|
|
3312
3326
|
const docFile = zip.file("word/document.xml");
|
|
3313
3327
|
if (!docFile) {
|
|
3314
|
-
throw new (0,
|
|
3328
|
+
throw new (0, _chunk5HWLDNT5cjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 DOCX \uD30C\uC77C: word/document.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3315
3329
|
}
|
|
3316
3330
|
let rels = /* @__PURE__ */ new Map();
|
|
3317
3331
|
const relsFile = zip.file("word/_rels/document.xml.rels");
|
|
@@ -3346,7 +3360,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3346
3360
|
const doc = parseXml2(docXml);
|
|
3347
3361
|
const body = findElements(doc, "body");
|
|
3348
3362
|
if (body.length === 0) {
|
|
3349
|
-
throw new (0,
|
|
3363
|
+
throw new (0, _chunk5HWLDNT5cjs.KordocError)("DOCX \uBCF8\uBB38(w:body)\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3350
3364
|
}
|
|
3351
3365
|
const blocks = [];
|
|
3352
3366
|
const bodyEl = body[0];
|
|
@@ -3386,7 +3400,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3386
3400
|
}
|
|
3387
3401
|
}
|
|
3388
3402
|
const outline = blocks.filter((b) => b.type === "heading").map((b) => ({ level: _nullishCoalesce(b.level, () => ( 2)), text: _nullishCoalesce(b.text, () => ( "")) }));
|
|
3389
|
-
const markdown =
|
|
3403
|
+
const markdown = _chunk5HWLDNT5cjs.blocksToMarkdown.call(void 0, blocks);
|
|
3390
3404
|
return {
|
|
3391
3405
|
markdown,
|
|
3392
3406
|
blocks,
|
|
@@ -3409,7 +3423,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3409
3423
|
}
|
|
3410
3424
|
const text = new TextDecoder("utf-8").decode(buffer).replace(/^\uFEFF/, "");
|
|
3411
3425
|
const normalized = text.replace(/ /g, " ");
|
|
3412
|
-
const xml =
|
|
3426
|
+
const xml = _chunk5HWLDNT5cjs.stripDtd.call(void 0, normalized);
|
|
3413
3427
|
const warnings = [];
|
|
3414
3428
|
const parser = new (0, _xmldom.DOMParser)({
|
|
3415
3429
|
onError: (_level, msg) => {
|
|
@@ -3449,7 +3463,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3449
3463
|
parseSection2(el, blocks, paraShapeMap, sectionIdx, warnings);
|
|
3450
3464
|
}
|
|
3451
3465
|
const outline = blocks.filter((b) => b.type === "heading" && b.text).map((b) => ({ level: _nullishCoalesce(b.level, () => ( 1)), text: b.text, pageNumber: b.pageNumber }));
|
|
3452
|
-
const markdown =
|
|
3466
|
+
const markdown = _chunk5HWLDNT5cjs.blocksToMarkdown.call(void 0, blocks);
|
|
3453
3467
|
return {
|
|
3454
3468
|
markdown,
|
|
3455
3469
|
blocks,
|
|
@@ -3591,7 +3605,7 @@ function parseTable2(el, blocks, paraShapeMap, sectionNum, warnings) {
|
|
|
3591
3605
|
const cellRows = grid.map(
|
|
3592
3606
|
(row) => row.map((cell) => _nullishCoalesce(cell, () => ( { text: "", colSpan: 1, rowSpan: 1 })))
|
|
3593
3607
|
);
|
|
3594
|
-
const table =
|
|
3608
|
+
const table = _chunk5HWLDNT5cjs.buildTable.call(void 0, cellRows);
|
|
3595
3609
|
blocks.push({ type: "table", table, pageNumber: sectionNum });
|
|
3596
3610
|
}
|
|
3597
3611
|
function extractCellText(cellEl) {
|
|
@@ -3992,7 +4006,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
3992
4006
|
const normalizedValues = normalizeValues(values);
|
|
3993
4007
|
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
3994
4008
|
if (sectionFiles.length === 0) {
|
|
3995
|
-
throw new (0,
|
|
4009
|
+
throw new (0, _chunk5HWLDNT5cjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3996
4010
|
}
|
|
3997
4011
|
const xmlParser = new (0, _xmldom.DOMParser)();
|
|
3998
4012
|
const xmlSerializer = new (0, _xmldom.XMLSerializer)();
|
|
@@ -4000,7 +4014,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
4000
4014
|
const zipEntry = zip.file(sectionPath);
|
|
4001
4015
|
if (!zipEntry) continue;
|
|
4002
4016
|
const rawXml = await zipEntry.async("text");
|
|
4003
|
-
const doc = xmlParser.parseFromString(
|
|
4017
|
+
const doc = xmlParser.parseFromString(_chunk5HWLDNT5cjs.stripDtd.call(void 0, rawXml), "text/xml");
|
|
4004
4018
|
if (!doc.documentElement) continue;
|
|
4005
4019
|
let modified = false;
|
|
4006
4020
|
const tables = findAllElements(doc.documentElement, "tbl");
|
|
@@ -4324,8 +4338,21 @@ async function markdownToHwpx(markdown) {
|
|
|
4324
4338
|
zip.file("Contents/content.hpf", generateManifest());
|
|
4325
4339
|
zip.file("Contents/header.xml", generateHeaderXml());
|
|
4326
4340
|
zip.file("Contents/section0.xml", sectionXml);
|
|
4341
|
+
zip.file("Preview/PrvText.txt", buildPrvText(blocks));
|
|
4327
4342
|
return await zip.generateAsync({ type: "arraybuffer" });
|
|
4328
4343
|
}
|
|
4344
|
+
function buildPrvText(blocks) {
|
|
4345
|
+
const lines = [];
|
|
4346
|
+
let bytes = 0;
|
|
4347
|
+
for (const b of blocks) {
|
|
4348
|
+
const text = b.text || (b.rows ? b.rows.map((r) => r.join(" ")).join("\n") : "");
|
|
4349
|
+
if (!text) continue;
|
|
4350
|
+
lines.push(text);
|
|
4351
|
+
bytes += text.length * 3;
|
|
4352
|
+
if (bytes > 1024) break;
|
|
4353
|
+
}
|
|
4354
|
+
return lines.join("\n").slice(0, 1024);
|
|
4355
|
+
}
|
|
4329
4356
|
function parseMarkdownToBlocks(md) {
|
|
4330
4357
|
const lines = md.split("\n");
|
|
4331
4358
|
const blocks = [];
|
|
@@ -4560,7 +4587,7 @@ function generateHeaderXml() {
|
|
|
4560
4587
|
</hh:font>
|
|
4561
4588
|
</hh:fontface>
|
|
4562
4589
|
</hh:fontfaces>
|
|
4563
|
-
<hh:borderFills itemCnt="
|
|
4590
|
+
<hh:borderFills itemCnt="2">
|
|
4564
4591
|
<hh:borderFill id="0" threeD="0" shadow="0" centerLine="0" breakCellSeparateLine="0">
|
|
4565
4592
|
<hh:slash type="NONE" Crooked="0" isCounter="0"/>
|
|
4566
4593
|
<hh:backSlash type="NONE" Crooked="0" isCounter="0"/>
|
|
@@ -4571,6 +4598,16 @@ function generateHeaderXml() {
|
|
|
4571
4598
|
<hh:diagonal type="NONE" width="0.1mm" color="#000000"/>
|
|
4572
4599
|
<hh:fillInfo/>
|
|
4573
4600
|
</hh:borderFill>
|
|
4601
|
+
<hh:borderFill id="1" threeD="0" shadow="0" centerLine="0" breakCellSeparateLine="0">
|
|
4602
|
+
<hh:slash type="NONE" Crooked="0" isCounter="0"/>
|
|
4603
|
+
<hh:backSlash type="NONE" Crooked="0" isCounter="0"/>
|
|
4604
|
+
<hh:leftBorder type="SOLID" width="0.12mm" color="#000000"/>
|
|
4605
|
+
<hh:rightBorder type="SOLID" width="0.12mm" color="#000000"/>
|
|
4606
|
+
<hh:topBorder type="SOLID" width="0.12mm" color="#000000"/>
|
|
4607
|
+
<hh:bottomBorder type="SOLID" width="0.12mm" color="#000000"/>
|
|
4608
|
+
<hh:diagonal type="NONE" width="0.1mm" color="#000000"/>
|
|
4609
|
+
<hh:fillInfo/>
|
|
4610
|
+
</hh:borderFill>
|
|
4574
4611
|
</hh:borderFills>
|
|
4575
4612
|
<hh:charProperties itemCnt="9">
|
|
4576
4613
|
${charPr(0, 1e3, false, false)}
|
|
@@ -4606,15 +4643,31 @@ ${paraPr(7, { align: "LEFT", lineSpacing: 160, indent: 600 })}
|
|
|
4606
4643
|
function generateSecPr() {
|
|
4607
4644
|
return `<hp:secPr textDirection="HORIZONTAL" spaceColumns="1134" tabStop="8000" outlineShapeIDRef="0" memoShapeIDRef="0" textVerticalWidthHead="0" masterPageCnt="0"><hp:grid lineGrid="0" charGrid="0" wonggojiFormat="0"/><hp:startNum pageStartsOn="BOTH" page="0" pic="0" tbl="0" equation="0"/><hp:visibility hideFirstHeader="0" hideFirstFooter="0" hideFirstMasterPage="0" border="SHOW_ALL" fill="SHOW_ALL" hideFirstPageNum="0" hideFirstEmptyLine="0" showLineNumber="0"/><hp:pagePr landscape="WIDELY" width="59528" height="84188" gutterType="LEFT_ONLY"><hp:margin header="2835" footer="2835" gutter="0" left="5670" right="4252" top="8504" bottom="4252"/></hp:pagePr><hp:footNotePr><hp:autoNumFormat type="DIGIT" userChar="" prefixChar="" suffixChar=")" supscript="0"/><hp:noteLine length="-1" type="SOLID" width="0.12 mm" color="#000000"/><hp:noteSpacing betweenNotes="283" belowLine="567" aboveLine="850"/><hp:numbering type="CONTINUOUS" newNum="1"/><hp:placement place="EACH_COLUMN" beneathText="0"/></hp:footNotePr><hp:endNotePr><hp:autoNumFormat type="DIGIT" userChar="" prefixChar="" suffixChar=")" supscript="0"/><hp:noteLine length="14692344" type="SOLID" width="0.12 mm" color="#000000"/><hp:noteSpacing betweenNotes="0" belowLine="567" aboveLine="850"/><hp:numbering type="CONTINUOUS" newNum="1"/><hp:placement place="END_OF_DOCUMENT" beneathText="0"/></hp:endNotePr></hp:secPr>`;
|
|
4608
4645
|
}
|
|
4646
|
+
var TABLE_ID_BASE = 1e3;
|
|
4647
|
+
var tableIdCounter = TABLE_ID_BASE;
|
|
4648
|
+
function nextTableId() {
|
|
4649
|
+
return ++tableIdCounter;
|
|
4650
|
+
}
|
|
4609
4651
|
function generateTable(rows) {
|
|
4610
|
-
const
|
|
4611
|
-
|
|
4652
|
+
const rowCnt = rows.length;
|
|
4653
|
+
const colCnt = Math.max(...rows.map((r) => r.length), 1);
|
|
4654
|
+
const cellW = Math.floor(44e3 / colCnt);
|
|
4655
|
+
const cellH = 1500;
|
|
4656
|
+
const tblW = cellW * colCnt;
|
|
4657
|
+
const tblH = cellH * rowCnt;
|
|
4658
|
+
const tblId = nextTableId();
|
|
4659
|
+
const trElements = rows.map((row, rowIdx) => {
|
|
4660
|
+
const cells = row.length < colCnt ? [...row, ...Array(colCnt - row.length).fill("")] : row;
|
|
4661
|
+
const tdElements = cells.map((cell, colIdx) => {
|
|
4612
4662
|
const runs = generateRuns(cell);
|
|
4613
|
-
|
|
4663
|
+
const p = `<hp:p paraPrIDRef="0" styleIDRef="0">${runs}</hp:p>`;
|
|
4664
|
+
return `<hp:tc name="" header="${rowIdx === 0 ? 1 : 0}" hasMargin="0" protect="0" editable="1" dirty="0" borderFillIDRef="1"><hp:subList id="" textDirection="HORIZONTAL" lineWrap="BREAK" vertAlign="TOP" linkListIDRef="0" linkListNextIDRef="0" textWidth="0" textHeight="0" hasTextRef="0" hasNumRef="0">${p}</hp:subList><hp:cellAddr colAddr="${colIdx}" rowAddr="${rowIdx}"/><hp:cellSpan colSpan="1" rowSpan="1"/><hp:cellSz width="${cellW}" height="${cellH}"/><hp:cellMargin left="141" right="141" top="141" bottom="141"/></hp:tc>`;
|
|
4614
4665
|
}).join("");
|
|
4615
4666
|
return `<hp:tr>${tdElements}</hp:tr>`;
|
|
4616
4667
|
}).join("");
|
|
4617
|
-
|
|
4668
|
+
const tblInner = `<hp:sz width="${tblW}" widthRelTo="ABSOLUTE" height="${tblH}" heightRelTo="ABSOLUTE" protect="0"/><hp:pos treatAsChar="1" affectLSpacing="0" flowWithText="0" allowOverlap="0" holdAnchorAndSO="0" vertRelTo="PARA" horzRelTo="PARA" vertAlign="TOP" horzAlign="LEFT" vertOffset="0" horzOffset="0"/><hp:outMargin left="0" right="0" top="0" bottom="0"/><hp:inMargin left="510" right="510" top="141" bottom="141"/>` + trElements;
|
|
4669
|
+
const tbl = `<hp:tbl id="${tblId}" zOrder="0" numberingType="TABLE" pageBreak="CELL" repeatHeader="0" rowCnt="${rowCnt}" colCnt="${colCnt}" cellSpacing="0" borderFillIDRef="1" noShading="0">${tblInner}</hp:tbl>`;
|
|
4670
|
+
return `<hp:p paraPrIDRef="0" styleIDRef="0"><hp:run charPrIDRef="0">${tbl}</hp:run></hp:p>`;
|
|
4618
4671
|
}
|
|
4619
4672
|
function blocksToSectionXml(blocks) {
|
|
4620
4673
|
const paraXmls = [];
|
|
@@ -4862,13 +4915,13 @@ async function parse(input, options) {
|
|
|
4862
4915
|
if (typeof input === "string") {
|
|
4863
4916
|
try {
|
|
4864
4917
|
const buf = await _promises.readFile.call(void 0, input);
|
|
4865
|
-
buffer =
|
|
4918
|
+
buffer = _chunk5HWLDNT5cjs.toArrayBuffer.call(void 0, buf);
|
|
4866
4919
|
} catch (err) {
|
|
4867
4920
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
4868
4921
|
return { success: false, fileType: "unknown", error: msg, code: "PARSE_ERROR" };
|
|
4869
4922
|
}
|
|
4870
4923
|
} else if (Buffer.isBuffer(input)) {
|
|
4871
|
-
buffer =
|
|
4924
|
+
buffer = _chunk5HWLDNT5cjs.toArrayBuffer.call(void 0, input);
|
|
4872
4925
|
} else {
|
|
4873
4926
|
buffer = input;
|
|
4874
4927
|
}
|
|
@@ -4898,23 +4951,40 @@ async function parseHwpx(buffer, options) {
|
|
|
4898
4951
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseHwpxDocument(buffer, options);
|
|
4899
4952
|
return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _80 => _80.length]) ? images : void 0 };
|
|
4900
4953
|
} catch (err) {
|
|
4901
|
-
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4954
|
+
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code: _chunk5HWLDNT5cjs.classifyError.call(void 0, err) };
|
|
4902
4955
|
}
|
|
4903
4956
|
}
|
|
4904
4957
|
async function parseHwp(buffer, options) {
|
|
4905
4958
|
try {
|
|
4906
4959
|
const { markdown, blocks, metadata, outline, warnings, images } = parseHwp5Document(Buffer.from(buffer), options);
|
|
4907
|
-
|
|
4960
|
+
if (isDistributionSentinel(markdown) && isComFallbackAvailable() && _optionalChain([options, 'optionalAccess', _81 => _81.filePath])) {
|
|
4961
|
+
try {
|
|
4962
|
+
const { pages, pageCount, warnings: comWarns } = extractTextViaCom(options.filePath);
|
|
4963
|
+
if (pages.some((p) => p && p.trim().length > 0)) {
|
|
4964
|
+
const com = comResultToParseResult(pages, pageCount, comWarns);
|
|
4965
|
+
return {
|
|
4966
|
+
success: true,
|
|
4967
|
+
fileType: "hwp",
|
|
4968
|
+
markdown: com.markdown,
|
|
4969
|
+
blocks: com.blocks,
|
|
4970
|
+
metadata: com.metadata,
|
|
4971
|
+
warnings: com.warnings
|
|
4972
|
+
};
|
|
4973
|
+
}
|
|
4974
|
+
} catch (e26) {
|
|
4975
|
+
}
|
|
4976
|
+
}
|
|
4977
|
+
return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _82 => _82.length]) ? images : void 0 };
|
|
4908
4978
|
} catch (err) {
|
|
4909
|
-
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4979
|
+
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code: _chunk5HWLDNT5cjs.classifyError.call(void 0, err) };
|
|
4910
4980
|
}
|
|
4911
4981
|
}
|
|
4912
4982
|
async function parsePdf(buffer, options) {
|
|
4913
4983
|
let parsePdfDocument;
|
|
4914
4984
|
try {
|
|
4915
|
-
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-
|
|
4985
|
+
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-BYHUJ5GP.cjs")));
|
|
4916
4986
|
parsePdfDocument = mod.parsePdfDocument;
|
|
4917
|
-
} catch (
|
|
4987
|
+
} catch (e27) {
|
|
4918
4988
|
return {
|
|
4919
4989
|
success: false,
|
|
4920
4990
|
fileType: "pdf",
|
|
@@ -4927,7 +4997,7 @@ async function parsePdf(buffer, options) {
|
|
|
4927
4997
|
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
|
|
4928
4998
|
} catch (err) {
|
|
4929
4999
|
const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
|
|
4930
|
-
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
5000
|
+
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: _chunk5HWLDNT5cjs.classifyError.call(void 0, err), isImageBased };
|
|
4931
5001
|
}
|
|
4932
5002
|
}
|
|
4933
5003
|
async function parseXlsx(buffer, options) {
|
|
@@ -4935,15 +5005,15 @@ async function parseXlsx(buffer, options) {
|
|
|
4935
5005
|
const { markdown, blocks, metadata, warnings } = await parseXlsxDocument(buffer, options);
|
|
4936
5006
|
return { success: true, fileType: "xlsx", markdown, blocks, metadata, warnings };
|
|
4937
5007
|
} catch (err) {
|
|
4938
|
-
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
5008
|
+
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code: _chunk5HWLDNT5cjs.classifyError.call(void 0, err) };
|
|
4939
5009
|
}
|
|
4940
5010
|
}
|
|
4941
5011
|
async function parseDocx(buffer, options) {
|
|
4942
5012
|
try {
|
|
4943
5013
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseDocxDocument(buffer, options);
|
|
4944
|
-
return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess',
|
|
5014
|
+
return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _83 => _83.length]) ? images : void 0 };
|
|
4945
5015
|
} catch (err) {
|
|
4946
|
-
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
5016
|
+
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: _chunk5HWLDNT5cjs.classifyError.call(void 0, err) };
|
|
4947
5017
|
}
|
|
4948
5018
|
}
|
|
4949
5019
|
async function parseHwpml(buffer, options) {
|
|
@@ -4951,16 +5021,16 @@ async function parseHwpml(buffer, options) {
|
|
|
4951
5021
|
const { markdown, blocks, metadata, outline, warnings } = parseHwpmlDocument(buffer, options);
|
|
4952
5022
|
return { success: true, fileType: "hwpml", markdown, blocks, metadata, outline, warnings };
|
|
4953
5023
|
} catch (err) {
|
|
4954
|
-
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
5024
|
+
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code: _chunk5HWLDNT5cjs.classifyError.call(void 0, err) };
|
|
4955
5025
|
}
|
|
4956
5026
|
}
|
|
4957
5027
|
async function fillForm(input, values, outputFormat = "markdown") {
|
|
4958
5028
|
let buffer;
|
|
4959
5029
|
if (typeof input === "string") {
|
|
4960
5030
|
const buf = await _promises.readFile.call(void 0, input);
|
|
4961
|
-
buffer =
|
|
5031
|
+
buffer = _chunk5HWLDNT5cjs.toArrayBuffer.call(void 0, buf);
|
|
4962
5032
|
} else if (Buffer.isBuffer(input)) {
|
|
4963
|
-
buffer =
|
|
5033
|
+
buffer = _chunk5HWLDNT5cjs.toArrayBuffer.call(void 0, input);
|
|
4964
5034
|
} else {
|
|
4965
5035
|
buffer = input;
|
|
4966
5036
|
}
|
|
@@ -4986,7 +5056,7 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
4986
5056
|
throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
|
|
4987
5057
|
}
|
|
4988
5058
|
const fill = fillFormFields(parsed.blocks, values);
|
|
4989
|
-
const markdown =
|
|
5059
|
+
const markdown = _chunk5HWLDNT5cjs.blocksToMarkdown.call(void 0, fill.blocks);
|
|
4990
5060
|
if (outputFormat === "hwpx") {
|
|
4991
5061
|
const hwpxBuffer = await markdownToHwpx(markdown);
|
|
4992
5062
|
return { output: hwpxBuffer, format: "hwpx", fill };
|
|
@@ -5017,5 +5087,5 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
5017
5087
|
|
|
5018
5088
|
|
|
5019
5089
|
|
|
5020
|
-
exports.VERSION =
|
|
5090
|
+
exports.VERSION = _chunk5HWLDNT5cjs.VERSION; exports.blocksToMarkdown = _chunk5HWLDNT5cjs.blocksToMarkdown; exports.compare = compare; exports.detectFormat = detectFormat; exports.detectZipFormat = detectZipFormat; exports.diffBlocks = diffBlocks; exports.extractFormFields = extractFormFields; exports.fillForm = fillForm; exports.fillFormFields = fillFormFields; exports.fillHwpx = fillHwpx; exports.isHwpxFile = isHwpxFile; exports.isLabelCell = isLabelCell; exports.isOldHwpFile = isOldHwpFile; exports.isPdfFile = isPdfFile; exports.isZipFile = isZipFile; exports.markdownToHwpx = markdownToHwpx; exports.parse = parse; exports.parseDocx = parseDocx; exports.parseHwp = parseHwp; exports.parseHwpml = parseHwpml; exports.parseHwpx = parseHwpx; exports.parsePdf = parsePdf; exports.parseXlsx = parseXlsx;
|
|
5021
5091
|
//# sourceMappingURL=index.cjs.map
|