kordoc 2.5.1 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-OCVWJSG7.js → chunk-24NKFRB4.js} +2 -2
- package/dist/{chunk-QEZ4CUF7.js → chunk-2CAJSQK5.js} +48 -18
- package/dist/chunk-2CAJSQK5.js.map +1 -0
- package/dist/{chunk-KO7DKAXW.js → chunk-NKKLA43G.js} +2 -2
- package/dist/{chunk-TTSFPEDM.cjs → chunk-Z65OQP3H.cjs} +2 -2
- package/dist/{chunk-TTSFPEDM.cjs.map → chunk-Z65OQP3H.cjs.map} +1 -1
- package/dist/cli.js +3 -3
- package/dist/index.cjs +129 -99
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +47 -17
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +3 -3
- package/dist/{parser-BOIVVDYI.cjs → parser-AZYPOKAR.cjs} +15 -15
- package/dist/{parser-BOIVVDYI.cjs.map → parser-AZYPOKAR.cjs.map} +1 -1
- package/dist/{parser-NZFDRZLS.js → parser-BQKQOIJU.js} +2 -2
- package/dist/{parser-DA3CGOZF.js → parser-FRROKAB7.js} +2 -2
- package/dist/{watch-HWN6Y6Q2.js → watch-ZJAUWUAE.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-QEZ4CUF7.js.map +0 -1
- /package/dist/{chunk-OCVWJSG7.js.map → chunk-24NKFRB4.js.map} +0 -0
- /package/dist/{chunk-KO7DKAXW.js.map → chunk-NKKLA43G.js.map} +0 -0
- /package/dist/{parser-NZFDRZLS.js.map → parser-BQKQOIJU.js.map} +0 -0
- /package/dist/{parser-DA3CGOZF.js.map → parser-FRROKAB7.js.map} +0 -0
- /package/dist/{watch-HWN6Y6Q2.js.map → watch-ZJAUWUAE.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
var
|
|
19
|
+
var _chunkZ65OQP3Hcjs = require('./chunk-Z65OQP3H.cjs');
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -193,7 +193,7 @@ var MAX_XML_DEPTH = 200;
|
|
|
193
193
|
function createXmlParser(warnings) {
|
|
194
194
|
return new (0, _xmldom.DOMParser)({
|
|
195
195
|
onError(level, msg) {
|
|
196
|
-
if (level === "fatalError") throw new (0,
|
|
196
|
+
if (level === "fatalError") throw new (0, _chunkZ65OQP3Hcjs.KordocError)(`XML \uD30C\uC2F1 \uC2E4\uD328: ${msg}`);
|
|
197
197
|
_optionalChain([warnings, 'optionalAccess', _2 => _2.push, 'call', _3 => _3({ code: "MALFORMED_XML", message: `XML ${level === "warn" ? "\uACBD\uACE0" : "\uC624\uB958"}: ${msg}` })]);
|
|
198
198
|
}
|
|
199
199
|
});
|
|
@@ -212,10 +212,10 @@ async function extractHwpxStyles(zip, decompressed) {
|
|
|
212
212
|
const xml = await file.async("text");
|
|
213
213
|
if (decompressed) {
|
|
214
214
|
decompressed.total += xml.length * 2;
|
|
215
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
215
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
216
216
|
}
|
|
217
217
|
const parser = createXmlParser();
|
|
218
|
-
const doc = parser.parseFromString(
|
|
218
|
+
const doc = parser.parseFromString(_chunkZ65OQP3Hcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
219
219
|
if (!doc.documentElement) continue;
|
|
220
220
|
parseCharProperties(doc, result.charProperties);
|
|
221
221
|
parseStyleElements(doc, result.styles);
|
|
@@ -277,7 +277,7 @@ function parseStyleElements(doc, map) {
|
|
|
277
277
|
}
|
|
278
278
|
}
|
|
279
279
|
async function parseHwpxDocument(buffer, options) {
|
|
280
|
-
|
|
280
|
+
_chunkZ65OQP3Hcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
|
|
281
281
|
let zip;
|
|
282
282
|
try {
|
|
283
283
|
zip = await _jszip2.default.loadAsync(buffer);
|
|
@@ -286,7 +286,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
286
286
|
}
|
|
287
287
|
const actualEntryCount = Object.keys(zip.files).length;
|
|
288
288
|
if (actualEntryCount > MAX_ZIP_ENTRIES) {
|
|
289
|
-
throw new (0,
|
|
289
|
+
throw new (0, _chunkZ65OQP3Hcjs.KordocError)("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
290
290
|
}
|
|
291
291
|
const manifestFile = zip.file("META-INF/manifest.xml");
|
|
292
292
|
if (manifestFile) {
|
|
@@ -298,7 +298,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
298
298
|
return comResultToParseResult(pages, pageCount, warnings2);
|
|
299
299
|
}
|
|
300
300
|
}
|
|
301
|
-
throw new (0,
|
|
301
|
+
throw new (0, _chunkZ65OQP3Hcjs.KordocError)("DRM \uC554\uD638\uD654\uB41C HWPX \uD30C\uC77C\uC785\uB2C8\uB2E4. Windows + \uD55C\uCEF4 \uC624\uD53C\uC2A4 \uC124\uCE58 \uC2DC \uC790\uB3D9 \uCD94\uCD9C\uB429\uB2C8\uB2E4.");
|
|
302
302
|
}
|
|
303
303
|
}
|
|
304
304
|
const decompressed = { total: 0 };
|
|
@@ -307,7 +307,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
307
307
|
const styleMap = await extractHwpxStyles(zip, decompressed);
|
|
308
308
|
const warnings = [];
|
|
309
309
|
const sectionPaths = await resolveSectionPaths(zip);
|
|
310
|
-
if (sectionPaths.length === 0) throw new (0,
|
|
310
|
+
if (sectionPaths.length === 0) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
311
311
|
metadata.pageCount = sectionPaths.length;
|
|
312
312
|
const pageFilter = _optionalChain([options, 'optionalAccess', _5 => _5.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sectionPaths.length) : null;
|
|
313
313
|
const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
|
|
@@ -321,19 +321,19 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
321
321
|
try {
|
|
322
322
|
const xml = await file.async("text");
|
|
323
323
|
decompressed.total += xml.length * 2;
|
|
324
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
324
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
325
325
|
blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, nestedTableCounter));
|
|
326
326
|
parsedSections++;
|
|
327
327
|
_optionalChain([options, 'optionalAccess', _6 => _6.onProgress, 'optionalCall', _7 => _7(parsedSections, totalTarget)]);
|
|
328
328
|
} catch (secErr) {
|
|
329
|
-
if (secErr instanceof
|
|
329
|
+
if (secErr instanceof _chunkZ65OQP3Hcjs.KordocError) throw secErr;
|
|
330
330
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
331
331
|
}
|
|
332
332
|
}
|
|
333
333
|
const images = await extractImagesFromZip(zip, blocks, decompressed, warnings);
|
|
334
334
|
detectHwpxHeadings(blocks, styleMap);
|
|
335
335
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
336
|
-
const markdown =
|
|
336
|
+
const markdown = _chunkZ65OQP3Hcjs.blocksToMarkdown.call(void 0, blocks);
|
|
337
337
|
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
338
338
|
}
|
|
339
339
|
function imageExtToMime(ext) {
|
|
@@ -397,13 +397,13 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
397
397
|
let found = false;
|
|
398
398
|
const allCandidates = resolvedPath ? [resolvedPath, ...candidates] : candidates;
|
|
399
399
|
for (const path of allCandidates) {
|
|
400
|
-
if (
|
|
400
|
+
if (_chunkZ65OQP3Hcjs.isPathTraversal.call(void 0, path)) continue;
|
|
401
401
|
const file = zip.file(path);
|
|
402
402
|
if (!file) continue;
|
|
403
403
|
try {
|
|
404
404
|
const data = await file.async("uint8array");
|
|
405
405
|
decompressed.total += data.length;
|
|
406
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
406
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
407
407
|
const actualPath = path;
|
|
408
408
|
const ext = actualPath.includes(".") ? actualPath.split(".").pop() || "png" : "png";
|
|
409
409
|
const mimeType = imageExtToMime(ext);
|
|
@@ -415,7 +415,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
415
415
|
found = true;
|
|
416
416
|
break;
|
|
417
417
|
} catch (err) {
|
|
418
|
-
if (err instanceof
|
|
418
|
+
if (err instanceof _chunkZ65OQP3Hcjs.KordocError) throw err;
|
|
419
419
|
}
|
|
420
420
|
}
|
|
421
421
|
if (!found) {
|
|
@@ -435,7 +435,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
435
435
|
const xml = await file.async("text");
|
|
436
436
|
if (decompressed) {
|
|
437
437
|
decompressed.total += xml.length * 2;
|
|
438
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
438
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
439
439
|
}
|
|
440
440
|
parseDublinCoreMetadata(xml, metadata);
|
|
441
441
|
if (metadata.title || metadata.author) return;
|
|
@@ -445,7 +445,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
445
445
|
}
|
|
446
446
|
function parseDublinCoreMetadata(xml, metadata) {
|
|
447
447
|
const parser = createXmlParser();
|
|
448
|
-
const doc = parser.parseFromString(
|
|
448
|
+
const doc = parser.parseFromString(_chunkZ65OQP3Hcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
449
449
|
if (!doc.documentElement) return;
|
|
450
450
|
const getText = (tagNames) => {
|
|
451
451
|
for (const tag of tagNames) {
|
|
@@ -505,7 +505,7 @@ function extractFromBrokenZip(buffer) {
|
|
|
505
505
|
}
|
|
506
506
|
const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
|
|
507
507
|
const name = new TextDecoder().decode(nameBytes);
|
|
508
|
-
if (
|
|
508
|
+
if (_chunkZ65OQP3Hcjs.isPathTraversal.call(void 0, name)) {
|
|
509
509
|
pos = fileStart + compSize;
|
|
510
510
|
continue;
|
|
511
511
|
}
|
|
@@ -523,15 +523,15 @@ function extractFromBrokenZip(buffer) {
|
|
|
523
523
|
continue;
|
|
524
524
|
}
|
|
525
525
|
totalDecompressed += content.length * 2;
|
|
526
|
-
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
526
|
+
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
|
|
527
527
|
sectionNum++;
|
|
528
528
|
blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, nestedTableCounter));
|
|
529
529
|
} catch (e6) {
|
|
530
530
|
continue;
|
|
531
531
|
}
|
|
532
532
|
}
|
|
533
|
-
if (blocks.length === 0) throw new (0,
|
|
534
|
-
const markdown =
|
|
533
|
+
if (blocks.length === 0) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
534
|
+
const markdown = _chunkZ65OQP3Hcjs.blocksToMarkdown.call(void 0, blocks);
|
|
535
535
|
return { markdown, blocks, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
536
536
|
}
|
|
537
537
|
async function resolveSectionPaths(zip) {
|
|
@@ -549,7 +549,7 @@ async function resolveSectionPaths(zip) {
|
|
|
549
549
|
}
|
|
550
550
|
function parseSectionPathsFromManifest(xml) {
|
|
551
551
|
const parser = createXmlParser();
|
|
552
|
-
const doc = parser.parseFromString(
|
|
552
|
+
const doc = parser.parseFromString(_chunkZ65OQP3Hcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
553
553
|
const items = doc.getElementsByTagName("opf:item");
|
|
554
554
|
const spine = doc.getElementsByTagName("opf:itemref");
|
|
555
555
|
const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
|
|
@@ -596,9 +596,9 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
596
596
|
let level = 0;
|
|
597
597
|
if (baseFontSize > 0 && _optionalChain([block, 'access', _16 => _16.style, 'optionalAccess', _17 => _17.fontSize])) {
|
|
598
598
|
const ratio = block.style.fontSize / baseFontSize;
|
|
599
|
-
if (ratio >=
|
|
600
|
-
else if (ratio >=
|
|
601
|
-
else if (ratio >=
|
|
599
|
+
if (ratio >= _chunkZ65OQP3Hcjs.HEADING_RATIO_H1) level = 1;
|
|
600
|
+
else if (ratio >= _chunkZ65OQP3Hcjs.HEADING_RATIO_H2) level = 2;
|
|
601
|
+
else if (ratio >= _chunkZ65OQP3Hcjs.HEADING_RATIO_H3) level = 3;
|
|
602
602
|
}
|
|
603
603
|
const compactText = text.replace(/\s+/g, "");
|
|
604
604
|
if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
|
|
@@ -623,13 +623,13 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
623
623
|
let nestedCols = 0;
|
|
624
624
|
for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
|
|
625
625
|
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
626
|
-
blocks.push({ type: "table", table:
|
|
626
|
+
blocks.push({ type: "table", table: _chunkZ65OQP3Hcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
627
627
|
if (parentTable.cell) {
|
|
628
628
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
629
629
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker;
|
|
630
630
|
}
|
|
631
631
|
} else {
|
|
632
|
-
const nestedText =
|
|
632
|
+
const nestedText = _chunkZ65OQP3Hcjs.convertTableToText.call(void 0, newTable.rows);
|
|
633
633
|
if (parentTable.cell) {
|
|
634
634
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
635
635
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker + "\n" + nestedText;
|
|
@@ -639,7 +639,7 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
639
639
|
}
|
|
640
640
|
function parseSectionXml(xml, styleMap, warnings, sectionNum, counter) {
|
|
641
641
|
const parser = createXmlParser(warnings);
|
|
642
|
-
const doc = parser.parseFromString(
|
|
642
|
+
const doc = parser.parseFromString(_chunkZ65OQP3Hcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
643
643
|
if (!doc.documentElement) return [];
|
|
644
644
|
const blocks = [];
|
|
645
645
|
const ctx = { styleMap, warnings, sectionNum, counter };
|
|
@@ -682,7 +682,7 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
682
682
|
if (tableStack.length > 0) {
|
|
683
683
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
684
684
|
} else {
|
|
685
|
-
blocks.push({ type: "table", table:
|
|
685
|
+
blocks.push({ type: "table", table: _chunkZ65OQP3Hcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
686
686
|
tableCtx = null;
|
|
687
687
|
}
|
|
688
688
|
} else {
|
|
@@ -722,8 +722,8 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
722
722
|
const cs = isNaN(rawCs) ? 1 : rawCs;
|
|
723
723
|
const rawRs = parseInt(el.getAttribute("rowSpan") || "1", 10);
|
|
724
724
|
const rs = isNaN(rawRs) ? 1 : rawRs;
|
|
725
|
-
tableCtx.cell.colSpan = clampSpan(cs,
|
|
726
|
-
tableCtx.cell.rowSpan = clampSpan(rs,
|
|
725
|
+
tableCtx.cell.colSpan = clampSpan(cs, _chunkZ65OQP3Hcjs.MAX_COLS);
|
|
726
|
+
tableCtx.cell.rowSpan = clampSpan(rs, _chunkZ65OQP3Hcjs.MAX_ROWS);
|
|
727
727
|
}
|
|
728
728
|
break;
|
|
729
729
|
case "p": {
|
|
@@ -781,7 +781,7 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth =
|
|
|
781
781
|
if (tableStack.length > 0) {
|
|
782
782
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
783
783
|
} else {
|
|
784
|
-
blocks.push({ type: "table", table:
|
|
784
|
+
blocks.push({ type: "table", table: _chunkZ65OQP3Hcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
785
785
|
tableCtx = null;
|
|
786
786
|
}
|
|
787
787
|
} else {
|
|
@@ -889,7 +889,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
889
889
|
case "hyperlink": {
|
|
890
890
|
const url = child.getAttribute("url") || child.getAttribute("href") || "";
|
|
891
891
|
if (url) {
|
|
892
|
-
const safe =
|
|
892
|
+
const safe = _chunkZ65OQP3Hcjs.sanitizeHref.call(void 0, url);
|
|
893
893
|
if (safe) href = safe;
|
|
894
894
|
}
|
|
895
895
|
walk(child);
|
|
@@ -1029,7 +1029,7 @@ function decompressStream(data) {
|
|
|
1029
1029
|
return _zlib.inflateRawSync.call(void 0, data, opts);
|
|
1030
1030
|
}
|
|
1031
1031
|
function parseFileHeader(data) {
|
|
1032
|
-
if (data.length < 40) throw new (0,
|
|
1032
|
+
if (data.length < 40) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("FileHeader\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 40\uBC14\uC774\uD2B8)");
|
|
1033
1033
|
const sig = data.subarray(0, 32).toString("utf8").replace(/\0+$/, "");
|
|
1034
1034
|
return {
|
|
1035
1035
|
signature: sig,
|
|
@@ -2048,7 +2048,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
2048
2048
|
lenientCfb = parseLenientCfb(buffer);
|
|
2049
2049
|
warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
|
|
2050
2050
|
} catch (e11) {
|
|
2051
|
-
throw new (0,
|
|
2051
|
+
throw new (0, _chunkZ65OQP3Hcjs.KordocError)("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
|
|
2052
2052
|
}
|
|
2053
2053
|
}
|
|
2054
2054
|
const findStream = (path) => {
|
|
@@ -2059,11 +2059,11 @@ function parseHwp5Document(buffer, options) {
|
|
|
2059
2059
|
return lenientCfb.findStream(path);
|
|
2060
2060
|
};
|
|
2061
2061
|
const headerData = findStream("/FileHeader");
|
|
2062
|
-
if (!headerData) throw new (0,
|
|
2062
|
+
if (!headerData) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
|
|
2063
2063
|
const header = parseFileHeader(headerData);
|
|
2064
|
-
if (header.signature !== "HWP Document File") throw new (0,
|
|
2065
|
-
if (header.flags & FLAG_ENCRYPTED) throw new (0,
|
|
2066
|
-
if (header.flags & FLAG_DRM) throw new (0,
|
|
2064
|
+
if (header.signature !== "HWP Document File") throw new (0, _chunkZ65OQP3Hcjs.KordocError)("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
2065
|
+
if (header.flags & FLAG_ENCRYPTED) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2066
|
+
if (header.flags & FLAG_DRM) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2067
2067
|
const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
|
|
2068
2068
|
const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
|
|
2069
2069
|
const metadata = {
|
|
@@ -2072,7 +2072,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
2072
2072
|
if (cfb) extractHwp5Metadata(cfb, metadata);
|
|
2073
2073
|
const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
|
|
2074
2074
|
const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
|
|
2075
|
-
if (sections.length === 0) throw new (0,
|
|
2075
|
+
if (sections.length === 0) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2076
2076
|
metadata.pageCount = sections.length;
|
|
2077
2077
|
const pageFilter = _optionalChain([options, 'optionalAccess', _22 => _22.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sections.length) : null;
|
|
2078
2078
|
const totalTarget = pageFilter ? pageFilter.size : sections.length;
|
|
@@ -2086,24 +2086,24 @@ function parseHwp5Document(buffer, options) {
|
|
|
2086
2086
|
const sectionData = sections[si];
|
|
2087
2087
|
const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
2088
2088
|
totalDecompressed += data.length;
|
|
2089
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2089
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2090
2090
|
const records = readRecords(data);
|
|
2091
2091
|
const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, nestedTableCounter);
|
|
2092
2092
|
blocks.push(...sectionBlocks);
|
|
2093
2093
|
parsedSections++;
|
|
2094
2094
|
_optionalChain([options, 'optionalAccess', _23 => _23.onProgress, 'optionalCall', _24 => _24(parsedSections, totalTarget)]);
|
|
2095
2095
|
} catch (secErr) {
|
|
2096
|
-
if (secErr instanceof
|
|
2096
|
+
if (secErr instanceof _chunkZ65OQP3Hcjs.KordocError) throw secErr;
|
|
2097
2097
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
2098
2098
|
}
|
|
2099
2099
|
}
|
|
2100
2100
|
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
2101
|
-
const flatBlocks =
|
|
2101
|
+
const flatBlocks = _chunkZ65OQP3Hcjs.flattenLayoutTables.call(void 0, blocks);
|
|
2102
2102
|
if (docInfo) {
|
|
2103
2103
|
detectHwp5Headings(flatBlocks, docInfo);
|
|
2104
2104
|
}
|
|
2105
2105
|
const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
2106
|
-
const markdown =
|
|
2106
|
+
const markdown = _chunkZ65OQP3Hcjs.blocksToMarkdown.call(void 0, flatBlocks);
|
|
2107
2107
|
return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
2108
2108
|
}
|
|
2109
2109
|
function parseDocInfoStream(cfb, compressed) {
|
|
@@ -2163,9 +2163,9 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2163
2163
|
let level = 0;
|
|
2164
2164
|
if (_optionalChain([block, 'access', _29 => _29.style, 'optionalAccess', _30 => _30.fontSize]) && baseFontSize > 0) {
|
|
2165
2165
|
const ratio = block.style.fontSize / baseFontSize;
|
|
2166
|
-
if (ratio >=
|
|
2167
|
-
else if (ratio >=
|
|
2168
|
-
else if (ratio >=
|
|
2166
|
+
if (ratio >= _chunkZ65OQP3Hcjs.HEADING_RATIO_H1) level = 1;
|
|
2167
|
+
else if (ratio >= _chunkZ65OQP3Hcjs.HEADING_RATIO_H2) level = 2;
|
|
2168
|
+
else if (ratio >= _chunkZ65OQP3Hcjs.HEADING_RATIO_H3) level = 3;
|
|
2169
2169
|
}
|
|
2170
2170
|
if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
|
|
2171
2171
|
if (level === 0) level = 2;
|
|
@@ -2250,7 +2250,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2250
2250
|
if (!raw) break;
|
|
2251
2251
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2252
2252
|
totalDecompressed += content.length;
|
|
2253
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2253
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2254
2254
|
sections.push({ idx: i, content });
|
|
2255
2255
|
}
|
|
2256
2256
|
if (sections.length === 0) {
|
|
@@ -2262,7 +2262,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2262
2262
|
if (raw) {
|
|
2263
2263
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2264
2264
|
totalDecompressed += content.length;
|
|
2265
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2265
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2266
2266
|
sections.push({ idx, content });
|
|
2267
2267
|
}
|
|
2268
2268
|
}
|
|
@@ -2279,7 +2279,7 @@ function findViewTextSectionsLenient(lcfb, compressed) {
|
|
|
2279
2279
|
try {
|
|
2280
2280
|
const content = decryptViewText(raw, compressed);
|
|
2281
2281
|
totalDecompressed += content.length;
|
|
2282
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2282
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkZ65OQP3Hcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2283
2283
|
sections.push({ idx: i, content });
|
|
2284
2284
|
} catch (e16) {
|
|
2285
2285
|
break;
|
|
@@ -2467,7 +2467,7 @@ function parseSection(records, docInfo, warnings, sectionNum, counter) {
|
|
|
2467
2467
|
if (url && blocks.length > 0) {
|
|
2468
2468
|
const lastBlock = blocks[blocks.length - 1];
|
|
2469
2469
|
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
2470
|
-
lastBlock.href = _nullishCoalesce(
|
|
2470
|
+
lastBlock.href = _nullishCoalesce(_chunkZ65OQP3Hcjs.sanitizeHref.call(void 0, url), () => ( void 0));
|
|
2471
2471
|
}
|
|
2472
2472
|
}
|
|
2473
2473
|
}
|
|
@@ -2585,8 +2585,8 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
2585
2585
|
if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
|
|
2586
2586
|
if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
|
|
2587
2587
|
if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
|
|
2588
|
-
rows = Math.min(rec.data.readUInt16LE(4),
|
|
2589
|
-
cols = Math.min(rec.data.readUInt16LE(6),
|
|
2588
|
+
rows = Math.min(rec.data.readUInt16LE(4), _chunkZ65OQP3Hcjs.MAX_ROWS);
|
|
2589
|
+
cols = Math.min(rec.data.readUInt16LE(6), _chunkZ65OQP3Hcjs.MAX_COLS);
|
|
2590
2590
|
}
|
|
2591
2591
|
if (rec.tagId === TAG_LIST_HEADER) {
|
|
2592
2592
|
const { cell, nextIdx } = parseCellBlock(records, i, tableLevel, counter);
|
|
@@ -2608,7 +2608,7 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
2608
2608
|
return { table: { rows, cols, cells: irCells, hasHeader: rows > 1 }, nextIdx: i };
|
|
2609
2609
|
}
|
|
2610
2610
|
const cellRows = arrangeCells(rows, cols, cells);
|
|
2611
|
-
return { table:
|
|
2611
|
+
return { table: _chunkZ65OQP3Hcjs.buildTable.call(void 0, cellRows), nextIdx: i };
|
|
2612
2612
|
}
|
|
2613
2613
|
function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
2614
2614
|
const rec = records[startIdx];
|
|
@@ -2623,8 +2623,8 @@ function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
|
2623
2623
|
rowAddr = rec.data.readUInt16LE(10);
|
|
2624
2624
|
const cs = rec.data.readUInt16LE(12);
|
|
2625
2625
|
const rs = rec.data.readUInt16LE(14);
|
|
2626
|
-
if (cs > 0) colSpan = Math.min(cs,
|
|
2627
|
-
if (rs > 0) rowSpan = Math.min(rs,
|
|
2626
|
+
if (cs > 0) colSpan = Math.min(cs, _chunkZ65OQP3Hcjs.MAX_COLS);
|
|
2627
|
+
if (rs > 0) rowSpan = Math.min(rs, _chunkZ65OQP3Hcjs.MAX_ROWS);
|
|
2628
2628
|
}
|
|
2629
2629
|
let i = startIdx + 1;
|
|
2630
2630
|
while (i < records.length) {
|
|
@@ -2740,7 +2740,7 @@ function getTextContent(el) {
|
|
|
2740
2740
|
return _nullishCoalesce(_optionalChain([el, 'access', _38 => _38.textContent, 'optionalAccess', _39 => _39.trim, 'call', _40 => _40()]), () => ( ""));
|
|
2741
2741
|
}
|
|
2742
2742
|
function parseXml(text) {
|
|
2743
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
2743
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkZ65OQP3Hcjs.stripDtd.call(void 0, text), "text/xml");
|
|
2744
2744
|
}
|
|
2745
2745
|
function parseSharedStrings(xml) {
|
|
2746
2746
|
const doc = parseXml(xml);
|
|
@@ -2884,7 +2884,7 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2884
2884
|
cellRows.push(row);
|
|
2885
2885
|
}
|
|
2886
2886
|
if (cellRows.length > 0) {
|
|
2887
|
-
const table =
|
|
2887
|
+
const table = _chunkZ65OQP3Hcjs.buildTable.call(void 0, cellRows);
|
|
2888
2888
|
if (table.rows > 0) {
|
|
2889
2889
|
blocks.push({ type: "table", table, pageNumber: sheetIndex + 1 });
|
|
2890
2890
|
}
|
|
@@ -2892,12 +2892,12 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2892
2892
|
return blocks;
|
|
2893
2893
|
}
|
|
2894
2894
|
async function parseXlsxDocument(buffer, options) {
|
|
2895
|
-
|
|
2895
|
+
_chunkZ65OQP3Hcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE3);
|
|
2896
2896
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
2897
2897
|
const warnings = [];
|
|
2898
2898
|
const workbookFile = zip.file("xl/workbook.xml");
|
|
2899
2899
|
if (!workbookFile) {
|
|
2900
|
-
throw new (0,
|
|
2900
|
+
throw new (0, _chunkZ65OQP3Hcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 XLSX \uD30C\uC77C: xl/workbook.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2901
2901
|
}
|
|
2902
2902
|
let sharedStrings = [];
|
|
2903
2903
|
const ssFile = zip.file("xl/sharedStrings.xml");
|
|
@@ -2906,7 +2906,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2906
2906
|
}
|
|
2907
2907
|
const sheets = parseWorkbook(await workbookFile.async("text"));
|
|
2908
2908
|
if (sheets.length === 0) {
|
|
2909
|
-
throw new (0,
|
|
2909
|
+
throw new (0, _chunkZ65OQP3Hcjs.KordocError)("XLSX \uD30C\uC77C\uC5D0 \uC2DC\uD2B8\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2910
2910
|
}
|
|
2911
2911
|
let relsMap = /* @__PURE__ */ new Map();
|
|
2912
2912
|
const relsFile = zip.file("xl/_rels/workbook.xml.rels");
|
|
@@ -2978,7 +2978,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2978
2978
|
} catch (e20) {
|
|
2979
2979
|
}
|
|
2980
2980
|
}
|
|
2981
|
-
const markdown =
|
|
2981
|
+
const markdown = _chunkZ65OQP3Hcjs.blocksToMarkdown.call(void 0, blocks);
|
|
2982
2982
|
return { markdown, blocks, metadata, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
2983
2983
|
}
|
|
2984
2984
|
|
|
@@ -3027,7 +3027,7 @@ function getAttr(el, localName3) {
|
|
|
3027
3027
|
return null;
|
|
3028
3028
|
}
|
|
3029
3029
|
function parseXml2(text) {
|
|
3030
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
3030
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkZ65OQP3Hcjs.stripDtd.call(void 0, text), "text/xml");
|
|
3031
3031
|
}
|
|
3032
3032
|
function parseStyles(xml) {
|
|
3033
3033
|
const doc = parseXml2(xml);
|
|
@@ -3320,12 +3320,12 @@ async function extractImages(zip, rels, doc) {
|
|
|
3320
3320
|
return { blocks, images };
|
|
3321
3321
|
}
|
|
3322
3322
|
async function parseDocxDocument(buffer, options) {
|
|
3323
|
-
|
|
3323
|
+
_chunkZ65OQP3Hcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE4);
|
|
3324
3324
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
3325
3325
|
const warnings = [];
|
|
3326
3326
|
const docFile = zip.file("word/document.xml");
|
|
3327
3327
|
if (!docFile) {
|
|
3328
|
-
throw new (0,
|
|
3328
|
+
throw new (0, _chunkZ65OQP3Hcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 DOCX \uD30C\uC77C: word/document.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3329
3329
|
}
|
|
3330
3330
|
let rels = /* @__PURE__ */ new Map();
|
|
3331
3331
|
const relsFile = zip.file("word/_rels/document.xml.rels");
|
|
@@ -3360,7 +3360,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3360
3360
|
const doc = parseXml2(docXml);
|
|
3361
3361
|
const body = findElements(doc, "body");
|
|
3362
3362
|
if (body.length === 0) {
|
|
3363
|
-
throw new (0,
|
|
3363
|
+
throw new (0, _chunkZ65OQP3Hcjs.KordocError)("DOCX \uBCF8\uBB38(w:body)\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3364
3364
|
}
|
|
3365
3365
|
const blocks = [];
|
|
3366
3366
|
const bodyEl = body[0];
|
|
@@ -3400,7 +3400,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3400
3400
|
}
|
|
3401
3401
|
}
|
|
3402
3402
|
const outline = blocks.filter((b) => b.type === "heading").map((b) => ({ level: _nullishCoalesce(b.level, () => ( 2)), text: _nullishCoalesce(b.text, () => ( "")) }));
|
|
3403
|
-
const markdown =
|
|
3403
|
+
const markdown = _chunkZ65OQP3Hcjs.blocksToMarkdown.call(void 0, blocks);
|
|
3404
3404
|
return {
|
|
3405
3405
|
markdown,
|
|
3406
3406
|
blocks,
|
|
@@ -3423,7 +3423,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3423
3423
|
}
|
|
3424
3424
|
const text = new TextDecoder("utf-8").decode(buffer).replace(/^\uFEFF/, "");
|
|
3425
3425
|
const normalized = text.replace(/ /g, " ");
|
|
3426
|
-
const xml =
|
|
3426
|
+
const xml = _chunkZ65OQP3Hcjs.stripDtd.call(void 0, normalized);
|
|
3427
3427
|
const warnings = [];
|
|
3428
3428
|
const parser = new (0, _xmldom.DOMParser)({
|
|
3429
3429
|
onError: (_level, msg) => {
|
|
@@ -3463,7 +3463,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3463
3463
|
parseSection2(el, blocks, paraShapeMap, sectionIdx, warnings);
|
|
3464
3464
|
}
|
|
3465
3465
|
const outline = blocks.filter((b) => b.type === "heading" && b.text).map((b) => ({ level: _nullishCoalesce(b.level, () => ( 1)), text: b.text, pageNumber: b.pageNumber }));
|
|
3466
|
-
const markdown =
|
|
3466
|
+
const markdown = _chunkZ65OQP3Hcjs.blocksToMarkdown.call(void 0, blocks);
|
|
3467
3467
|
return {
|
|
3468
3468
|
markdown,
|
|
3469
3469
|
blocks,
|
|
@@ -3605,7 +3605,7 @@ function parseTable2(el, blocks, paraShapeMap, sectionNum, warnings) {
|
|
|
3605
3605
|
const cellRows = grid.map(
|
|
3606
3606
|
(row) => row.map((cell) => _nullishCoalesce(cell, () => ( { text: "", colSpan: 1, rowSpan: 1 })))
|
|
3607
3607
|
);
|
|
3608
|
-
const table =
|
|
3608
|
+
const table = _chunkZ65OQP3Hcjs.buildTable.call(void 0, cellRows);
|
|
3609
3609
|
blocks.push({ type: "table", table, pageNumber: sectionNum });
|
|
3610
3610
|
}
|
|
3611
3611
|
function extractCellText(cellEl) {
|
|
@@ -4006,7 +4006,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
4006
4006
|
const normalizedValues = normalizeValues(values);
|
|
4007
4007
|
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
4008
4008
|
if (sectionFiles.length === 0) {
|
|
4009
|
-
throw new (0,
|
|
4009
|
+
throw new (0, _chunkZ65OQP3Hcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
4010
4010
|
}
|
|
4011
4011
|
const xmlParser = new (0, _xmldom.DOMParser)();
|
|
4012
4012
|
const xmlSerializer = new (0, _xmldom.XMLSerializer)();
|
|
@@ -4014,7 +4014,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
4014
4014
|
const zipEntry = zip.file(sectionPath);
|
|
4015
4015
|
if (!zipEntry) continue;
|
|
4016
4016
|
const rawXml = await zipEntry.async("text");
|
|
4017
|
-
const doc = xmlParser.parseFromString(
|
|
4017
|
+
const doc = xmlParser.parseFromString(_chunkZ65OQP3Hcjs.stripDtd.call(void 0, rawXml), "text/xml");
|
|
4018
4018
|
if (!doc.documentElement) continue;
|
|
4019
4019
|
let modified = false;
|
|
4020
4020
|
const tables = findAllElements(doc.documentElement, "tbl");
|
|
@@ -4519,8 +4519,9 @@ function generateManifest() {
|
|
|
4519
4519
|
function charPr(id, height, bold, italic, fontId = 0) {
|
|
4520
4520
|
const boldAttr = bold ? ` bold="1"` : "";
|
|
4521
4521
|
const italicAttr = italic ? ` italic="1"` : "";
|
|
4522
|
+
const effFont = bold ? 2 : fontId;
|
|
4522
4523
|
return ` <hh:charPr id="${id}" height="${height}" textColor="#000000" shadeColor="none" useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="0"${boldAttr}${italicAttr}>
|
|
4523
|
-
<hh:fontRef hangul="${
|
|
4524
|
+
<hh:fontRef hangul="${effFont}" latin="${effFont}" hanja="${effFont}" japanese="${effFont}" other="${effFont}" symbol="${effFont}" user="${effFont}"/>
|
|
4524
4525
|
<hh:ratio hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
|
|
4525
4526
|
<hh:spacing hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
|
|
4526
4527
|
<hh:relSz hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
|
|
@@ -4545,21 +4546,27 @@ function generateHeaderXml() {
|
|
|
4545
4546
|
<hh:beginNum page="1" footnote="1" endnote="1" pic="1" tbl="1" equation="1"/>
|
|
4546
4547
|
<hh:refList>
|
|
4547
4548
|
<hh:fontfaces itemCnt="7">
|
|
4548
|
-
<hh:fontface lang="HANGUL" fontCnt="
|
|
4549
|
+
<hh:fontface lang="HANGUL" fontCnt="3">
|
|
4549
4550
|
<hh:font id="0" face="\uD568\uCD08\uB86C\uBC14\uD0D5" type="TTF" isEmbedded="0">
|
|
4550
4551
|
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
4551
4552
|
</hh:font>
|
|
4552
4553
|
<hh:font id="1" face="\uD568\uCD08\uB86C\uB3CB\uC6C0" type="TTF" isEmbedded="0">
|
|
4553
4554
|
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
4554
4555
|
</hh:font>
|
|
4556
|
+
<hh:font id="2" face="HY\uACAC\uACE0\uB515" type="TTF" isEmbedded="0">
|
|
4557
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="9" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
4558
|
+
</hh:font>
|
|
4555
4559
|
</hh:fontface>
|
|
4556
|
-
<hh:fontface lang="LATIN" fontCnt="
|
|
4560
|
+
<hh:fontface lang="LATIN" fontCnt="3">
|
|
4557
4561
|
<hh:font id="0" face="Times New Roman" type="TTF" isEmbedded="0">
|
|
4558
4562
|
<hh:typeInfo familyType="FCAT_OLDSTYLE" weight="5" proportion="4" contrast="2" strokeVariation="0" armStyle="0" letterform="0" midline="0" xHeight="4"/>
|
|
4559
4563
|
</hh:font>
|
|
4560
4564
|
<hh:font id="1" face="Consolas" type="TTF" isEmbedded="0">
|
|
4561
4565
|
<hh:typeInfo familyType="FCAT_MODERN" weight="5" proportion="0" contrast="0" strokeVariation="0" armStyle="0" letterform="0" midline="0" xHeight="0"/>
|
|
4562
4566
|
</hh:font>
|
|
4567
|
+
<hh:font id="2" face="Arial Black" type="TTF" isEmbedded="0">
|
|
4568
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="9" proportion="0" contrast="0" strokeVariation="0" armStyle="0" letterform="0" midline="0" xHeight="0"/>
|
|
4569
|
+
</hh:font>
|
|
4563
4570
|
</hh:fontface>
|
|
4564
4571
|
<hh:fontface lang="HANJA" fontCnt="1">
|
|
4565
4572
|
<hh:font id="0" face="\uD568\uCD08\uB86C\uBC14\uD0D5" type="TTF" isEmbedded="0">
|
|
@@ -4591,21 +4598,21 @@ function generateHeaderXml() {
|
|
|
4591
4598
|
<hh:borderFill id="0" threeD="0" shadow="0" centerLine="0" breakCellSeparateLine="0">
|
|
4592
4599
|
<hh:slash type="NONE" Crooked="0" isCounter="0"/>
|
|
4593
4600
|
<hh:backSlash type="NONE" Crooked="0" isCounter="0"/>
|
|
4594
|
-
<hh:leftBorder type="NONE" width="0.
|
|
4595
|
-
<hh:rightBorder type="NONE" width="0.
|
|
4596
|
-
<hh:topBorder type="NONE" width="0.
|
|
4597
|
-
<hh:bottomBorder type="NONE" width="0.
|
|
4598
|
-
<hh:diagonal type="NONE" width="0.
|
|
4601
|
+
<hh:leftBorder type="NONE" width="0.1 mm" color="#000000"/>
|
|
4602
|
+
<hh:rightBorder type="NONE" width="0.1 mm" color="#000000"/>
|
|
4603
|
+
<hh:topBorder type="NONE" width="0.1 mm" color="#000000"/>
|
|
4604
|
+
<hh:bottomBorder type="NONE" width="0.1 mm" color="#000000"/>
|
|
4605
|
+
<hh:diagonal type="NONE" width="0.1 mm" color="#000000"/>
|
|
4599
4606
|
<hh:fillInfo/>
|
|
4600
4607
|
</hh:borderFill>
|
|
4601
4608
|
<hh:borderFill id="1" threeD="0" shadow="0" centerLine="0" breakCellSeparateLine="0">
|
|
4602
4609
|
<hh:slash type="NONE" Crooked="0" isCounter="0"/>
|
|
4603
4610
|
<hh:backSlash type="NONE" Crooked="0" isCounter="0"/>
|
|
4604
|
-
<hh:leftBorder type="SOLID" width="0.
|
|
4605
|
-
<hh:rightBorder type="SOLID" width="0.
|
|
4606
|
-
<hh:topBorder type="SOLID" width="0.
|
|
4607
|
-
<hh:bottomBorder type="SOLID" width="0.
|
|
4608
|
-
<hh:diagonal type="NONE" width="0.
|
|
4611
|
+
<hh:leftBorder type="SOLID" width="0.12 mm" color="#000000"/>
|
|
4612
|
+
<hh:rightBorder type="SOLID" width="0.12 mm" color="#000000"/>
|
|
4613
|
+
<hh:topBorder type="SOLID" width="0.12 mm" color="#000000"/>
|
|
4614
|
+
<hh:bottomBorder type="SOLID" width="0.12 mm" color="#000000"/>
|
|
4615
|
+
<hh:diagonal type="NONE" width="0.1 mm" color="#000000"/>
|
|
4609
4616
|
<hh:fillInfo/>
|
|
4610
4617
|
</hh:borderFill>
|
|
4611
4618
|
</hh:borderFills>
|
|
@@ -4672,8 +4679,16 @@ function generateTable(rows) {
|
|
|
4672
4679
|
function blocksToSectionXml(blocks) {
|
|
4673
4680
|
const paraXmls = [];
|
|
4674
4681
|
let isFirst = true;
|
|
4682
|
+
const orderedCounters = {};
|
|
4683
|
+
let prevWasOrdered = false;
|
|
4675
4684
|
for (const block of blocks) {
|
|
4676
4685
|
let xml = "";
|
|
4686
|
+
if (block.type !== "list_item" || !block.ordered) {
|
|
4687
|
+
if (prevWasOrdered) {
|
|
4688
|
+
for (const k of Object.keys(orderedCounters)) delete orderedCounters[+k];
|
|
4689
|
+
}
|
|
4690
|
+
prevWasOrdered = false;
|
|
4691
|
+
}
|
|
4677
4692
|
switch (block.type) {
|
|
4678
4693
|
case "heading": {
|
|
4679
4694
|
const pId = headingParaPrId(block.level || 1);
|
|
@@ -4693,8 +4708,23 @@ function blocksToSectionXml(blocks) {
|
|
|
4693
4708
|
xml = generateParagraph(block.text || "", PARA_QUOTE);
|
|
4694
4709
|
break;
|
|
4695
4710
|
case "list_item": {
|
|
4696
|
-
const
|
|
4697
|
-
|
|
4711
|
+
const indent = block.indent || 0;
|
|
4712
|
+
let marker;
|
|
4713
|
+
if (block.ordered) {
|
|
4714
|
+
orderedCounters[indent] = (orderedCounters[indent] || 0) + 1;
|
|
4715
|
+
for (const k of Object.keys(orderedCounters)) {
|
|
4716
|
+
if (+k > indent) delete orderedCounters[+k];
|
|
4717
|
+
}
|
|
4718
|
+
marker = `${orderedCounters[indent]}. `;
|
|
4719
|
+
prevWasOrdered = true;
|
|
4720
|
+
} else {
|
|
4721
|
+
marker = "\xB7 ";
|
|
4722
|
+
if (prevWasOrdered) {
|
|
4723
|
+
for (const k of Object.keys(orderedCounters)) delete orderedCounters[+k];
|
|
4724
|
+
}
|
|
4725
|
+
prevWasOrdered = false;
|
|
4726
|
+
}
|
|
4727
|
+
const indentPrefix = " ".repeat(indent);
|
|
4698
4728
|
xml = generateParagraph(indentPrefix + marker + (block.text || ""), PARA_LIST);
|
|
4699
4729
|
break;
|
|
4700
4730
|
}
|
|
@@ -4915,13 +4945,13 @@ async function parse(input, options) {
|
|
|
4915
4945
|
if (typeof input === "string") {
|
|
4916
4946
|
try {
|
|
4917
4947
|
const buf = await _promises.readFile.call(void 0, input);
|
|
4918
|
-
buffer =
|
|
4948
|
+
buffer = _chunkZ65OQP3Hcjs.toArrayBuffer.call(void 0, buf);
|
|
4919
4949
|
} catch (err) {
|
|
4920
4950
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
4921
4951
|
return { success: false, fileType: "unknown", error: msg, code: "PARSE_ERROR" };
|
|
4922
4952
|
}
|
|
4923
4953
|
} else if (Buffer.isBuffer(input)) {
|
|
4924
|
-
buffer =
|
|
4954
|
+
buffer = _chunkZ65OQP3Hcjs.toArrayBuffer.call(void 0, input);
|
|
4925
4955
|
} else {
|
|
4926
4956
|
buffer = input;
|
|
4927
4957
|
}
|
|
@@ -4951,7 +4981,7 @@ async function parseHwpx(buffer, options) {
|
|
|
4951
4981
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseHwpxDocument(buffer, options);
|
|
4952
4982
|
return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _80 => _80.length]) ? images : void 0 };
|
|
4953
4983
|
} catch (err) {
|
|
4954
|
-
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4984
|
+
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZ65OQP3Hcjs.classifyError.call(void 0, err) };
|
|
4955
4985
|
}
|
|
4956
4986
|
}
|
|
4957
4987
|
async function parseHwp(buffer, options) {
|
|
@@ -4976,13 +5006,13 @@ async function parseHwp(buffer, options) {
|
|
|
4976
5006
|
}
|
|
4977
5007
|
return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _82 => _82.length]) ? images : void 0 };
|
|
4978
5008
|
} catch (err) {
|
|
4979
|
-
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
5009
|
+
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZ65OQP3Hcjs.classifyError.call(void 0, err) };
|
|
4980
5010
|
}
|
|
4981
5011
|
}
|
|
4982
5012
|
async function parsePdf(buffer, options) {
|
|
4983
5013
|
let parsePdfDocument;
|
|
4984
5014
|
try {
|
|
4985
|
-
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-
|
|
5015
|
+
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-AZYPOKAR.cjs")));
|
|
4986
5016
|
parsePdfDocument = mod.parsePdfDocument;
|
|
4987
5017
|
} catch (e27) {
|
|
4988
5018
|
return {
|
|
@@ -4997,7 +5027,7 @@ async function parsePdf(buffer, options) {
|
|
|
4997
5027
|
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
|
|
4998
5028
|
} catch (err) {
|
|
4999
5029
|
const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
|
|
5000
|
-
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
5030
|
+
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZ65OQP3Hcjs.classifyError.call(void 0, err), isImageBased };
|
|
5001
5031
|
}
|
|
5002
5032
|
}
|
|
5003
5033
|
async function parseXlsx(buffer, options) {
|
|
@@ -5005,7 +5035,7 @@ async function parseXlsx(buffer, options) {
|
|
|
5005
5035
|
const { markdown, blocks, metadata, warnings } = await parseXlsxDocument(buffer, options);
|
|
5006
5036
|
return { success: true, fileType: "xlsx", markdown, blocks, metadata, warnings };
|
|
5007
5037
|
} catch (err) {
|
|
5008
|
-
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
5038
|
+
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZ65OQP3Hcjs.classifyError.call(void 0, err) };
|
|
5009
5039
|
}
|
|
5010
5040
|
}
|
|
5011
5041
|
async function parseDocx(buffer, options) {
|
|
@@ -5013,7 +5043,7 @@ async function parseDocx(buffer, options) {
|
|
|
5013
5043
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseDocxDocument(buffer, options);
|
|
5014
5044
|
return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _83 => _83.length]) ? images : void 0 };
|
|
5015
5045
|
} catch (err) {
|
|
5016
|
-
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
5046
|
+
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZ65OQP3Hcjs.classifyError.call(void 0, err) };
|
|
5017
5047
|
}
|
|
5018
5048
|
}
|
|
5019
5049
|
async function parseHwpml(buffer, options) {
|
|
@@ -5021,16 +5051,16 @@ async function parseHwpml(buffer, options) {
|
|
|
5021
5051
|
const { markdown, blocks, metadata, outline, warnings } = parseHwpmlDocument(buffer, options);
|
|
5022
5052
|
return { success: true, fileType: "hwpml", markdown, blocks, metadata, outline, warnings };
|
|
5023
5053
|
} catch (err) {
|
|
5024
|
-
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
5054
|
+
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code: _chunkZ65OQP3Hcjs.classifyError.call(void 0, err) };
|
|
5025
5055
|
}
|
|
5026
5056
|
}
|
|
5027
5057
|
async function fillForm(input, values, outputFormat = "markdown") {
|
|
5028
5058
|
let buffer;
|
|
5029
5059
|
if (typeof input === "string") {
|
|
5030
5060
|
const buf = await _promises.readFile.call(void 0, input);
|
|
5031
|
-
buffer =
|
|
5061
|
+
buffer = _chunkZ65OQP3Hcjs.toArrayBuffer.call(void 0, buf);
|
|
5032
5062
|
} else if (Buffer.isBuffer(input)) {
|
|
5033
|
-
buffer =
|
|
5063
|
+
buffer = _chunkZ65OQP3Hcjs.toArrayBuffer.call(void 0, input);
|
|
5034
5064
|
} else {
|
|
5035
5065
|
buffer = input;
|
|
5036
5066
|
}
|
|
@@ -5056,7 +5086,7 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
5056
5086
|
throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
|
|
5057
5087
|
}
|
|
5058
5088
|
const fill = fillFormFields(parsed.blocks, values);
|
|
5059
|
-
const markdown =
|
|
5089
|
+
const markdown = _chunkZ65OQP3Hcjs.blocksToMarkdown.call(void 0, fill.blocks);
|
|
5060
5090
|
if (outputFormat === "hwpx") {
|
|
5061
5091
|
const hwpxBuffer = await markdownToHwpx(markdown);
|
|
5062
5092
|
return { output: hwpxBuffer, format: "hwpx", fill };
|
|
@@ -5087,5 +5117,5 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
5087
5117
|
|
|
5088
5118
|
|
|
5089
5119
|
|
|
5090
|
-
exports.VERSION =
|
|
5120
|
+
exports.VERSION = _chunkZ65OQP3Hcjs.VERSION; exports.blocksToMarkdown = _chunkZ65OQP3Hcjs.blocksToMarkdown; exports.compare = compare; exports.detectFormat = detectFormat; exports.detectZipFormat = detectZipFormat; exports.diffBlocks = diffBlocks; exports.extractFormFields = extractFormFields; exports.fillForm = fillForm; exports.fillFormFields = fillFormFields; exports.fillHwpx = fillHwpx; exports.isHwpxFile = isHwpxFile; exports.isLabelCell = isLabelCell; exports.isOldHwpFile = isOldHwpFile; exports.isPdfFile = isPdfFile; exports.isZipFile = isZipFile; exports.markdownToHwpx = markdownToHwpx; exports.parse = parse; exports.parseDocx = parseDocx; exports.parseHwp = parseHwp; exports.parseHwpml = parseHwpml; exports.parseHwpx = parseHwpx; exports.parsePdf = parsePdf; exports.parseXlsx = parseXlsx;
|
|
5091
5121
|
//# sourceMappingURL=index.cjs.map
|