kordoc 2.8.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +469 -450
- package/dist/{chunk-3QA624ON.js → chunk-M24KMDAR.js} +6 -6
- package/dist/chunk-M24KMDAR.js.map +1 -0
- package/dist/{chunk-5CJGKKMZ.js → chunk-MEPHGCPQ.js} +1 -1
- package/dist/chunk-MEPHGCPQ.js.map +1 -0
- package/dist/chunk-MOL7MDBG.js +0 -0
- package/dist/chunk-MUOQXDZ4.cjs.map +1 -1
- package/dist/{chunk-HXWPJPRO.cjs → chunk-QB7CS534.cjs} +2 -2
- package/dist/chunk-QB7CS534.cjs.map +1 -0
- package/dist/{chunk-DLQY6FJH.js → chunk-RXZLTACX.js} +2 -2
- package/dist/chunk-RXZLTACX.js.map +1 -0
- package/dist/{chunk-XSF3N6GU.js → chunk-SJ5TPMBT.js} +2 -2
- package/dist/chunk-SJ5TPMBT.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/cli.js.map +1 -1
- package/dist/{detect-PJZMUL2Z.js → detect-RI2MQ33K.js} +2 -2
- package/dist/formula-JCNF43NE.js +0 -0
- package/dist/formula-XGG6ZP42.cjs.map +1 -1
- package/dist/index.cjs +99 -99
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +28 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.js +4 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +5 -5
- package/dist/mcp.js.map +1 -1
- package/dist/page-range-3C7UGGEK.cjs.map +1 -1
- package/dist/page-range-737B4EZW.js +0 -0
- package/dist/{parser-LKF6PGPD.cjs → parser-EL5YETUA.cjs} +159 -19
- package/dist/parser-EL5YETUA.cjs.map +1 -0
- package/dist/{parser-ZQQM6J7T.js → parser-OMPBVEFU.js} +146 -6
- package/dist/parser-OMPBVEFU.js.map +1 -0
- package/dist/{parser-UCO6WPUW.js → parser-XBYGROQB.js} +146 -6
- package/dist/parser-XBYGROQB.js.map +1 -0
- package/dist/{provider-WPIYEALY.js → provider-2SEHU2FM.js} +1 -1
- package/dist/provider-2SEHU2FM.js.map +1 -0
- package/dist/{provider-7H4CPZYS.js → provider-AKROB7WQ.js} +1 -1
- package/dist/provider-AKROB7WQ.js.map +1 -0
- package/dist/{provider-YN2SSK4X.cjs → provider-SNONEZNW.cjs} +1 -1
- package/dist/provider-SNONEZNW.cjs.map +1 -0
- package/dist/setup-57FB3LSP.js +0 -0
- package/dist/{watch-MRHNFJPC.js → watch-ULLLK7ID.js} +4 -4
- package/dist/watch-ULLLK7ID.js.map +1 -0
- package/package.json +98 -98
- package/dist/chunk-3QA624ON.js.map +0 -1
- package/dist/chunk-5CJGKKMZ.js.map +0 -1
- package/dist/chunk-DLQY6FJH.js.map +0 -1
- package/dist/chunk-HXWPJPRO.cjs.map +0 -1
- package/dist/chunk-XSF3N6GU.js.map +0 -1
- package/dist/parser-LKF6PGPD.cjs.map +0 -1
- package/dist/parser-UCO6WPUW.js.map +0 -1
- package/dist/parser-ZQQM6J7T.js.map +0 -1
- package/dist/provider-7H4CPZYS.js.map +0 -1
- package/dist/provider-WPIYEALY.js.map +0 -1
- package/dist/provider-YN2SSK4X.cjs.map +0 -1
- package/dist/watch-MRHNFJPC.js.map +0 -1
- /package/dist/{detect-PJZMUL2Z.js.map → detect-RI2MQ33K.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
var
|
|
19
|
+
var _chunkQB7CS534cjs = require('./chunk-QB7CS534.cjs');
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -821,7 +821,7 @@ var MAX_XML_DEPTH = 200;
|
|
|
821
821
|
function createXmlParser(warnings) {
|
|
822
822
|
return new (0, _xmldom.DOMParser)({
|
|
823
823
|
onError(level, msg) {
|
|
824
|
-
if (level === "fatalError") throw new (0,
|
|
824
|
+
if (level === "fatalError") throw new (0, _chunkQB7CS534cjs.KordocError)(`XML \uD30C\uC2F1 \uC2E4\uD328: ${msg}`);
|
|
825
825
|
_optionalChain([warnings, 'optionalAccess', _2 => _2.push, 'call', _3 => _3({ code: "MALFORMED_XML", message: `XML ${level === "warn" ? "\uACBD\uACE0" : "\uC624\uB958"}: ${msg}` })]);
|
|
826
826
|
}
|
|
827
827
|
});
|
|
@@ -840,10 +840,10 @@ async function extractHwpxStyles(zip, decompressed) {
|
|
|
840
840
|
const xml = await file.async("text");
|
|
841
841
|
if (decompressed) {
|
|
842
842
|
decompressed.total += xml.length * 2;
|
|
843
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
843
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkQB7CS534cjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
844
844
|
}
|
|
845
845
|
const parser = createXmlParser();
|
|
846
|
-
const doc = parser.parseFromString(
|
|
846
|
+
const doc = parser.parseFromString(_chunkQB7CS534cjs.stripDtd.call(void 0, xml), "text/xml");
|
|
847
847
|
if (!doc.documentElement) continue;
|
|
848
848
|
parseCharProperties(doc, result.charProperties);
|
|
849
849
|
parseStyleElements(doc, result.styles);
|
|
@@ -905,7 +905,7 @@ function parseStyleElements(doc, map) {
|
|
|
905
905
|
}
|
|
906
906
|
}
|
|
907
907
|
async function parseHwpxDocument(buffer, options) {
|
|
908
|
-
|
|
908
|
+
_chunkQB7CS534cjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
|
|
909
909
|
let zip;
|
|
910
910
|
try {
|
|
911
911
|
zip = await _jszip2.default.loadAsync(buffer);
|
|
@@ -914,7 +914,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
914
914
|
}
|
|
915
915
|
const actualEntryCount = Object.keys(zip.files).length;
|
|
916
916
|
if (actualEntryCount > MAX_ZIP_ENTRIES) {
|
|
917
|
-
throw new (0,
|
|
917
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
918
918
|
}
|
|
919
919
|
const manifestFile = zip.file("META-INF/manifest.xml");
|
|
920
920
|
if (manifestFile) {
|
|
@@ -926,7 +926,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
926
926
|
return comResultToParseResult(pages, pageCount, warnings2);
|
|
927
927
|
}
|
|
928
928
|
}
|
|
929
|
-
throw new (0,
|
|
929
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("DRM \uC554\uD638\uD654\uB41C HWPX \uD30C\uC77C\uC785\uB2C8\uB2E4. Windows + \uD55C\uCEF4 \uC624\uD53C\uC2A4 \uC124\uCE58 \uC2DC \uC790\uB3D9 \uCD94\uCD9C\uB429\uB2C8\uB2E4.");
|
|
930
930
|
}
|
|
931
931
|
}
|
|
932
932
|
const decompressed = { total: 0 };
|
|
@@ -935,7 +935,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
935
935
|
const styleMap = await extractHwpxStyles(zip, decompressed);
|
|
936
936
|
const warnings = [];
|
|
937
937
|
const sectionPaths = await resolveSectionPaths(zip);
|
|
938
|
-
if (sectionPaths.length === 0) throw new (0,
|
|
938
|
+
if (sectionPaths.length === 0) throw new (0, _chunkQB7CS534cjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
939
939
|
metadata.pageCount = sectionPaths.length;
|
|
940
940
|
const pageFilter = _optionalChain([options, 'optionalAccess', _5 => _5.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sectionPaths.length) : null;
|
|
941
941
|
const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
|
|
@@ -949,19 +949,19 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
949
949
|
try {
|
|
950
950
|
const xml = await file.async("text");
|
|
951
951
|
decompressed.total += xml.length * 2;
|
|
952
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
952
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkQB7CS534cjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
953
953
|
blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, nestedTableCounter));
|
|
954
954
|
parsedSections++;
|
|
955
955
|
_optionalChain([options, 'optionalAccess', _6 => _6.onProgress, 'optionalCall', _7 => _7(parsedSections, totalTarget)]);
|
|
956
956
|
} catch (secErr) {
|
|
957
|
-
if (secErr instanceof
|
|
957
|
+
if (secErr instanceof _chunkQB7CS534cjs.KordocError) throw secErr;
|
|
958
958
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
959
959
|
}
|
|
960
960
|
}
|
|
961
961
|
const images = await extractImagesFromZip(zip, blocks, decompressed, warnings);
|
|
962
962
|
detectHwpxHeadings(blocks, styleMap);
|
|
963
963
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
964
|
-
const markdown =
|
|
964
|
+
const markdown = _chunkQB7CS534cjs.blocksToMarkdown.call(void 0, blocks);
|
|
965
965
|
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
966
966
|
}
|
|
967
967
|
function imageExtToMime(ext) {
|
|
@@ -1025,13 +1025,13 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
1025
1025
|
let found = false;
|
|
1026
1026
|
const allCandidates = resolvedPath ? [resolvedPath, ...candidates] : candidates;
|
|
1027
1027
|
for (const path of allCandidates) {
|
|
1028
|
-
if (
|
|
1028
|
+
if (_chunkQB7CS534cjs.isPathTraversal.call(void 0, path)) continue;
|
|
1029
1029
|
const file = zip.file(path);
|
|
1030
1030
|
if (!file) continue;
|
|
1031
1031
|
try {
|
|
1032
1032
|
const data = await file.async("uint8array");
|
|
1033
1033
|
decompressed.total += data.length;
|
|
1034
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
1034
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkQB7CS534cjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
1035
1035
|
const actualPath = path;
|
|
1036
1036
|
const ext = actualPath.includes(".") ? actualPath.split(".").pop() || "png" : "png";
|
|
1037
1037
|
const mimeType = imageExtToMime(ext);
|
|
@@ -1043,7 +1043,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
1043
1043
|
found = true;
|
|
1044
1044
|
break;
|
|
1045
1045
|
} catch (err) {
|
|
1046
|
-
if (err instanceof
|
|
1046
|
+
if (err instanceof _chunkQB7CS534cjs.KordocError) throw err;
|
|
1047
1047
|
}
|
|
1048
1048
|
}
|
|
1049
1049
|
if (!found) {
|
|
@@ -1063,7 +1063,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
1063
1063
|
const xml = await file.async("text");
|
|
1064
1064
|
if (decompressed) {
|
|
1065
1065
|
decompressed.total += xml.length * 2;
|
|
1066
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
1066
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkQB7CS534cjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
1067
1067
|
}
|
|
1068
1068
|
parseDublinCoreMetadata(xml, metadata);
|
|
1069
1069
|
if (metadata.title || metadata.author) return;
|
|
@@ -1073,7 +1073,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
1073
1073
|
}
|
|
1074
1074
|
function parseDublinCoreMetadata(xml, metadata) {
|
|
1075
1075
|
const parser = createXmlParser();
|
|
1076
|
-
const doc = parser.parseFromString(
|
|
1076
|
+
const doc = parser.parseFromString(_chunkQB7CS534cjs.stripDtd.call(void 0, xml), "text/xml");
|
|
1077
1077
|
if (!doc.documentElement) return;
|
|
1078
1078
|
const getText = (tagNames) => {
|
|
1079
1079
|
for (const tag of tagNames) {
|
|
@@ -1133,7 +1133,7 @@ function extractFromBrokenZip(buffer) {
|
|
|
1133
1133
|
}
|
|
1134
1134
|
const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
|
|
1135
1135
|
const name = new TextDecoder().decode(nameBytes);
|
|
1136
|
-
if (
|
|
1136
|
+
if (_chunkQB7CS534cjs.isPathTraversal.call(void 0, name)) {
|
|
1137
1137
|
pos = fileStart + compSize;
|
|
1138
1138
|
continue;
|
|
1139
1139
|
}
|
|
@@ -1151,15 +1151,15 @@ function extractFromBrokenZip(buffer) {
|
|
|
1151
1151
|
continue;
|
|
1152
1152
|
}
|
|
1153
1153
|
totalDecompressed += content.length * 2;
|
|
1154
|
-
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
1154
|
+
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0, _chunkQB7CS534cjs.KordocError)("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
|
|
1155
1155
|
sectionNum++;
|
|
1156
1156
|
blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, nestedTableCounter));
|
|
1157
1157
|
} catch (e13) {
|
|
1158
1158
|
continue;
|
|
1159
1159
|
}
|
|
1160
1160
|
}
|
|
1161
|
-
if (blocks.length === 0) throw new (0,
|
|
1162
|
-
const markdown =
|
|
1161
|
+
if (blocks.length === 0) throw new (0, _chunkQB7CS534cjs.KordocError)("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
1162
|
+
const markdown = _chunkQB7CS534cjs.blocksToMarkdown.call(void 0, blocks);
|
|
1163
1163
|
return { markdown, blocks, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
1164
1164
|
}
|
|
1165
1165
|
async function resolveSectionPaths(zip) {
|
|
@@ -1177,7 +1177,7 @@ async function resolveSectionPaths(zip) {
|
|
|
1177
1177
|
}
|
|
1178
1178
|
function parseSectionPathsFromManifest(xml) {
|
|
1179
1179
|
const parser = createXmlParser();
|
|
1180
|
-
const doc = parser.parseFromString(
|
|
1180
|
+
const doc = parser.parseFromString(_chunkQB7CS534cjs.stripDtd.call(void 0, xml), "text/xml");
|
|
1181
1181
|
const items = doc.getElementsByTagName("opf:item");
|
|
1182
1182
|
const spine = doc.getElementsByTagName("opf:itemref");
|
|
1183
1183
|
const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
|
|
@@ -1224,9 +1224,9 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
1224
1224
|
let level = 0;
|
|
1225
1225
|
if (baseFontSize > 0 && _optionalChain([block, 'access', _16 => _16.style, 'optionalAccess', _17 => _17.fontSize])) {
|
|
1226
1226
|
const ratio = block.style.fontSize / baseFontSize;
|
|
1227
|
-
if (ratio >=
|
|
1228
|
-
else if (ratio >=
|
|
1229
|
-
else if (ratio >=
|
|
1227
|
+
if (ratio >= _chunkQB7CS534cjs.HEADING_RATIO_H1) level = 1;
|
|
1228
|
+
else if (ratio >= _chunkQB7CS534cjs.HEADING_RATIO_H2) level = 2;
|
|
1229
|
+
else if (ratio >= _chunkQB7CS534cjs.HEADING_RATIO_H3) level = 3;
|
|
1230
1230
|
}
|
|
1231
1231
|
const compactText = text.replace(/\s+/g, "");
|
|
1232
1232
|
if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
|
|
@@ -1251,13 +1251,13 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
1251
1251
|
let nestedCols = 0;
|
|
1252
1252
|
for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
|
|
1253
1253
|
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
1254
|
-
blocks.push({ type: "table", table:
|
|
1254
|
+
blocks.push({ type: "table", table: _chunkQB7CS534cjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
1255
1255
|
if (parentTable.cell) {
|
|
1256
1256
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
1257
1257
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker;
|
|
1258
1258
|
}
|
|
1259
1259
|
} else {
|
|
1260
|
-
const nestedText =
|
|
1260
|
+
const nestedText = _chunkQB7CS534cjs.convertTableToText.call(void 0, newTable.rows);
|
|
1261
1261
|
if (parentTable.cell) {
|
|
1262
1262
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
1263
1263
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker + "\n" + nestedText;
|
|
@@ -1267,7 +1267,7 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
1267
1267
|
}
|
|
1268
1268
|
function parseSectionXml(xml, styleMap, warnings, sectionNum, counter) {
|
|
1269
1269
|
const parser = createXmlParser(warnings);
|
|
1270
|
-
const doc = parser.parseFromString(
|
|
1270
|
+
const doc = parser.parseFromString(_chunkQB7CS534cjs.stripDtd.call(void 0, xml), "text/xml");
|
|
1271
1271
|
if (!doc.documentElement) return [];
|
|
1272
1272
|
const blocks = [];
|
|
1273
1273
|
const ctx = { styleMap, warnings, sectionNum, counter };
|
|
@@ -1310,7 +1310,7 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
1310
1310
|
if (tableStack.length > 0) {
|
|
1311
1311
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
1312
1312
|
} else {
|
|
1313
|
-
blocks.push({ type: "table", table:
|
|
1313
|
+
blocks.push({ type: "table", table: _chunkQB7CS534cjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
1314
1314
|
tableCtx = null;
|
|
1315
1315
|
}
|
|
1316
1316
|
} else {
|
|
@@ -1350,8 +1350,8 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
1350
1350
|
const cs = isNaN(rawCs) ? 1 : rawCs;
|
|
1351
1351
|
const rawRs = parseInt(el.getAttribute("rowSpan") || "1", 10);
|
|
1352
1352
|
const rs = isNaN(rawRs) ? 1 : rawRs;
|
|
1353
|
-
tableCtx.cell.colSpan = clampSpan(cs,
|
|
1354
|
-
tableCtx.cell.rowSpan = clampSpan(rs,
|
|
1353
|
+
tableCtx.cell.colSpan = clampSpan(cs, _chunkQB7CS534cjs.MAX_COLS);
|
|
1354
|
+
tableCtx.cell.rowSpan = clampSpan(rs, _chunkQB7CS534cjs.MAX_ROWS);
|
|
1355
1355
|
}
|
|
1356
1356
|
break;
|
|
1357
1357
|
case "p": {
|
|
@@ -1409,7 +1409,7 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth =
|
|
|
1409
1409
|
if (tableStack.length > 0) {
|
|
1410
1410
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
1411
1411
|
} else {
|
|
1412
|
-
blocks.push({ type: "table", table:
|
|
1412
|
+
blocks.push({ type: "table", table: _chunkQB7CS534cjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
1413
1413
|
tableCtx = null;
|
|
1414
1414
|
}
|
|
1415
1415
|
} else {
|
|
@@ -1517,7 +1517,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1517
1517
|
case "hyperlink": {
|
|
1518
1518
|
const url = child.getAttribute("url") || child.getAttribute("href") || "";
|
|
1519
1519
|
if (url) {
|
|
1520
|
-
const safe =
|
|
1520
|
+
const safe = _chunkQB7CS534cjs.sanitizeHref.call(void 0, url);
|
|
1521
1521
|
if (safe) href = safe;
|
|
1522
1522
|
}
|
|
1523
1523
|
walk(child);
|
|
@@ -1684,7 +1684,7 @@ function decompressStream(data) {
|
|
|
1684
1684
|
return _zlib.inflateRawSync.call(void 0, data, opts);
|
|
1685
1685
|
}
|
|
1686
1686
|
function parseFileHeader(data) {
|
|
1687
|
-
if (data.length < 40) throw new (0,
|
|
1687
|
+
if (data.length < 40) throw new (0, _chunkQB7CS534cjs.KordocError)("FileHeader\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 40\uBC14\uC774\uD2B8)");
|
|
1688
1688
|
const sig = data.subarray(0, 32).toString("utf8").replace(/\0+$/, "");
|
|
1689
1689
|
return {
|
|
1690
1690
|
signature: sig,
|
|
@@ -2762,7 +2762,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
2762
2762
|
lenientCfb = parseLenientCfb(buffer);
|
|
2763
2763
|
warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
|
|
2764
2764
|
} catch (e19) {
|
|
2765
|
-
throw new (0,
|
|
2765
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
|
|
2766
2766
|
}
|
|
2767
2767
|
}
|
|
2768
2768
|
const findStream = (path) => {
|
|
@@ -2773,11 +2773,11 @@ function parseHwp5Document(buffer, options) {
|
|
|
2773
2773
|
return lenientCfb.findStream(path);
|
|
2774
2774
|
};
|
|
2775
2775
|
const headerData = findStream("/FileHeader");
|
|
2776
|
-
if (!headerData) throw new (0,
|
|
2776
|
+
if (!headerData) throw new (0, _chunkQB7CS534cjs.KordocError)("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
|
|
2777
2777
|
const header = parseFileHeader(headerData);
|
|
2778
|
-
if (header.signature !== "HWP Document File") throw new (0,
|
|
2779
|
-
if (header.flags & FLAG_ENCRYPTED) throw new (0,
|
|
2780
|
-
if (header.flags & FLAG_DRM) throw new (0,
|
|
2778
|
+
if (header.signature !== "HWP Document File") throw new (0, _chunkQB7CS534cjs.KordocError)("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
2779
|
+
if (header.flags & FLAG_ENCRYPTED) throw new (0, _chunkQB7CS534cjs.KordocError)("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2780
|
+
if (header.flags & FLAG_DRM) throw new (0, _chunkQB7CS534cjs.KordocError)("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2781
2781
|
const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
|
|
2782
2782
|
const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
|
|
2783
2783
|
const metadata = {
|
|
@@ -2786,7 +2786,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
2786
2786
|
if (cfb) extractHwp5Metadata(cfb, metadata);
|
|
2787
2787
|
const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
|
|
2788
2788
|
const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
|
|
2789
|
-
if (sections.length === 0) throw new (0,
|
|
2789
|
+
if (sections.length === 0) throw new (0, _chunkQB7CS534cjs.KordocError)("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2790
2790
|
metadata.pageCount = sections.length;
|
|
2791
2791
|
const pageFilter = _optionalChain([options, 'optionalAccess', _24 => _24.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sections.length) : null;
|
|
2792
2792
|
const totalTarget = pageFilter ? pageFilter.size : sections.length;
|
|
@@ -2800,24 +2800,24 @@ function parseHwp5Document(buffer, options) {
|
|
|
2800
2800
|
const sectionData = sections[si];
|
|
2801
2801
|
const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
2802
2802
|
totalDecompressed += data.length;
|
|
2803
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2803
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkQB7CS534cjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2804
2804
|
const records = readRecords(data);
|
|
2805
2805
|
const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, nestedTableCounter);
|
|
2806
2806
|
blocks.push(...sectionBlocks);
|
|
2807
2807
|
parsedSections++;
|
|
2808
2808
|
_optionalChain([options, 'optionalAccess', _25 => _25.onProgress, 'optionalCall', _26 => _26(parsedSections, totalTarget)]);
|
|
2809
2809
|
} catch (secErr) {
|
|
2810
|
-
if (secErr instanceof
|
|
2810
|
+
if (secErr instanceof _chunkQB7CS534cjs.KordocError) throw secErr;
|
|
2811
2811
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
2812
2812
|
}
|
|
2813
2813
|
}
|
|
2814
2814
|
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
2815
|
-
const flatBlocks =
|
|
2815
|
+
const flatBlocks = _chunkQB7CS534cjs.flattenLayoutTables.call(void 0, blocks);
|
|
2816
2816
|
if (docInfo) {
|
|
2817
2817
|
detectHwp5Headings(flatBlocks, docInfo);
|
|
2818
2818
|
}
|
|
2819
2819
|
const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
2820
|
-
const markdown =
|
|
2820
|
+
const markdown = _chunkQB7CS534cjs.blocksToMarkdown.call(void 0, flatBlocks);
|
|
2821
2821
|
return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
2822
2822
|
}
|
|
2823
2823
|
function parseDocInfoStream(cfb, compressed) {
|
|
@@ -2877,9 +2877,9 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2877
2877
|
let level = 0;
|
|
2878
2878
|
if (_optionalChain([block, 'access', _31 => _31.style, 'optionalAccess', _32 => _32.fontSize]) && baseFontSize > 0) {
|
|
2879
2879
|
const ratio = block.style.fontSize / baseFontSize;
|
|
2880
|
-
if (ratio >=
|
|
2881
|
-
else if (ratio >=
|
|
2882
|
-
else if (ratio >=
|
|
2880
|
+
if (ratio >= _chunkQB7CS534cjs.HEADING_RATIO_H1) level = 1;
|
|
2881
|
+
else if (ratio >= _chunkQB7CS534cjs.HEADING_RATIO_H2) level = 2;
|
|
2882
|
+
else if (ratio >= _chunkQB7CS534cjs.HEADING_RATIO_H3) level = 3;
|
|
2883
2883
|
}
|
|
2884
2884
|
if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
|
|
2885
2885
|
if (level === 0) level = 2;
|
|
@@ -2964,7 +2964,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2964
2964
|
if (!raw) break;
|
|
2965
2965
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2966
2966
|
totalDecompressed += content.length;
|
|
2967
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2967
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkQB7CS534cjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2968
2968
|
sections.push({ idx: i, content });
|
|
2969
2969
|
}
|
|
2970
2970
|
if (sections.length === 0) {
|
|
@@ -2976,7 +2976,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2976
2976
|
if (raw) {
|
|
2977
2977
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2978
2978
|
totalDecompressed += content.length;
|
|
2979
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2979
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkQB7CS534cjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2980
2980
|
sections.push({ idx, content });
|
|
2981
2981
|
}
|
|
2982
2982
|
}
|
|
@@ -2993,7 +2993,7 @@ function findViewTextSectionsLenient(lcfb, compressed) {
|
|
|
2993
2993
|
try {
|
|
2994
2994
|
const content = decryptViewText(raw, compressed);
|
|
2995
2995
|
totalDecompressed += content.length;
|
|
2996
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2996
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkQB7CS534cjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2997
2997
|
sections.push({ idx: i, content });
|
|
2998
2998
|
} catch (e24) {
|
|
2999
2999
|
break;
|
|
@@ -3208,7 +3208,7 @@ function parseSection(records, docInfo, warnings, sectionNum, counter) {
|
|
|
3208
3208
|
if (url && blocks.length > 0) {
|
|
3209
3209
|
const lastBlock = blocks[blocks.length - 1];
|
|
3210
3210
|
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
3211
|
-
lastBlock.href = _nullishCoalesce(
|
|
3211
|
+
lastBlock.href = _nullishCoalesce(_chunkQB7CS534cjs.sanitizeHref.call(void 0, url), () => ( void 0));
|
|
3212
3212
|
}
|
|
3213
3213
|
}
|
|
3214
3214
|
}
|
|
@@ -3367,8 +3367,8 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
3367
3367
|
if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
|
|
3368
3368
|
if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
|
|
3369
3369
|
if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
|
|
3370
|
-
rows = Math.min(rec.data.readUInt16LE(4),
|
|
3371
|
-
cols = Math.min(rec.data.readUInt16LE(6),
|
|
3370
|
+
rows = Math.min(rec.data.readUInt16LE(4), _chunkQB7CS534cjs.MAX_ROWS);
|
|
3371
|
+
cols = Math.min(rec.data.readUInt16LE(6), _chunkQB7CS534cjs.MAX_COLS);
|
|
3372
3372
|
}
|
|
3373
3373
|
if (rec.tagId === TAG_LIST_HEADER) {
|
|
3374
3374
|
const { cell, nextIdx } = parseCellBlock(records, i, tableLevel, counter);
|
|
@@ -3390,7 +3390,7 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
3390
3390
|
return { table: { rows, cols, cells: irCells, hasHeader: rows > 1 }, nextIdx: i };
|
|
3391
3391
|
}
|
|
3392
3392
|
const cellRows = arrangeCells(rows, cols, cells);
|
|
3393
|
-
return { table:
|
|
3393
|
+
return { table: _chunkQB7CS534cjs.buildTable.call(void 0, cellRows), nextIdx: i };
|
|
3394
3394
|
}
|
|
3395
3395
|
function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
3396
3396
|
const rec = records[startIdx];
|
|
@@ -3413,8 +3413,8 @@ function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
|
3413
3413
|
rowAddr = rec.data.readUInt16LE(10);
|
|
3414
3414
|
const cs = rec.data.readUInt16LE(12);
|
|
3415
3415
|
const rs = rec.data.readUInt16LE(14);
|
|
3416
|
-
if (cs > 0) colSpan = Math.min(cs,
|
|
3417
|
-
if (rs > 0) rowSpan = Math.min(rs,
|
|
3416
|
+
if (cs > 0) colSpan = Math.min(cs, _chunkQB7CS534cjs.MAX_COLS);
|
|
3417
|
+
if (rs > 0) rowSpan = Math.min(rs, _chunkQB7CS534cjs.MAX_ROWS);
|
|
3418
3418
|
}
|
|
3419
3419
|
let i = startIdx + 1;
|
|
3420
3420
|
while (i < records.length) {
|
|
@@ -15792,7 +15792,7 @@ function getTextContent(el) {
|
|
|
15792
15792
|
return _nullishCoalesce(_optionalChain([el, 'access', _40 => _40.textContent, 'optionalAccess', _41 => _41.trim, 'call', _42 => _42()]), () => ( ""));
|
|
15793
15793
|
}
|
|
15794
15794
|
function parseXml(text) {
|
|
15795
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
15795
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkQB7CS534cjs.stripDtd.call(void 0, text), "text/xml");
|
|
15796
15796
|
}
|
|
15797
15797
|
function parseSharedStrings(xml) {
|
|
15798
15798
|
const doc = parseXml(xml);
|
|
@@ -15936,7 +15936,7 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
15936
15936
|
cellRows.push(row);
|
|
15937
15937
|
}
|
|
15938
15938
|
if (cellRows.length > 0) {
|
|
15939
|
-
const table =
|
|
15939
|
+
const table = _chunkQB7CS534cjs.buildTable.call(void 0, cellRows);
|
|
15940
15940
|
if (table.rows > 0) {
|
|
15941
15941
|
blocks.push({ type: "table", table, pageNumber: sheetIndex + 1 });
|
|
15942
15942
|
}
|
|
@@ -15944,12 +15944,12 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
15944
15944
|
return blocks;
|
|
15945
15945
|
}
|
|
15946
15946
|
async function parseXlsxDocument(buffer, options) {
|
|
15947
|
-
|
|
15947
|
+
_chunkQB7CS534cjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE3);
|
|
15948
15948
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
15949
15949
|
const warnings = [];
|
|
15950
15950
|
const workbookFile = zip.file("xl/workbook.xml");
|
|
15951
15951
|
if (!workbookFile) {
|
|
15952
|
-
throw new (0,
|
|
15952
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 XLSX \uD30C\uC77C: xl/workbook.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
15953
15953
|
}
|
|
15954
15954
|
let sharedStrings = [];
|
|
15955
15955
|
const ssFile = zip.file("xl/sharedStrings.xml");
|
|
@@ -15958,7 +15958,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
15958
15958
|
}
|
|
15959
15959
|
const sheets = parseWorkbook(await workbookFile.async("text"));
|
|
15960
15960
|
if (sheets.length === 0) {
|
|
15961
|
-
throw new (0,
|
|
15961
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("XLSX \uD30C\uC77C\uC5D0 \uC2DC\uD2B8\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
15962
15962
|
}
|
|
15963
15963
|
let relsMap = /* @__PURE__ */ new Map();
|
|
15964
15964
|
const relsFile = zip.file("xl/_rels/workbook.xml.rels");
|
|
@@ -16030,7 +16030,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
16030
16030
|
} catch (e28) {
|
|
16031
16031
|
}
|
|
16032
16032
|
}
|
|
16033
|
-
const markdown =
|
|
16033
|
+
const markdown = _chunkQB7CS534cjs.blocksToMarkdown.call(void 0, blocks);
|
|
16034
16034
|
return { markdown, blocks, metadata, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
16035
16035
|
}
|
|
16036
16036
|
|
|
@@ -16437,11 +16437,11 @@ function processGlobals(records) {
|
|
|
16437
16437
|
let encrypted = false;
|
|
16438
16438
|
const firstBof = records[0];
|
|
16439
16439
|
if (!firstBof || firstBof.opcode !== OP_BOF) {
|
|
16440
|
-
throw new (0,
|
|
16440
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("XLS: \uCCAB \uB808\uCF54\uB4DC\uAC00 BOF\uAC00 \uC544\uB2D8");
|
|
16441
16441
|
}
|
|
16442
16442
|
const bof = decodeBof(firstBof.data);
|
|
16443
16443
|
if (!bof || bof.dt !== DT_GLOBALS) {
|
|
16444
|
-
throw new (0,
|
|
16444
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("XLS: Globals \uC11C\uBE0C\uC2A4\uD2B8\uB9BC BOF \uB204\uB77D");
|
|
16445
16445
|
}
|
|
16446
16446
|
let i = 1;
|
|
16447
16447
|
while (i < records.length) {
|
|
@@ -16556,7 +16556,7 @@ function sheetToBlocks2(sheetName, sheet, sheetIndex) {
|
|
|
16556
16556
|
cellRows.push(row);
|
|
16557
16557
|
}
|
|
16558
16558
|
if (cellRows.length > 0) {
|
|
16559
|
-
const table =
|
|
16559
|
+
const table = _chunkQB7CS534cjs.buildTable.call(void 0, cellRows);
|
|
16560
16560
|
if (table.rows > 0) {
|
|
16561
16561
|
blocks.push({ type: "table", table, pageNumber: sheetIndex + 1 });
|
|
16562
16562
|
}
|
|
@@ -16569,21 +16569,21 @@ async function parseXlsDocument(buffer, options) {
|
|
|
16569
16569
|
try {
|
|
16570
16570
|
cfb = parseLenientCfb(buf);
|
|
16571
16571
|
} catch (e) {
|
|
16572
|
-
throw new (0,
|
|
16572
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)(
|
|
16573
16573
|
`XLS: OLE2 \uC2DC\uADF8\uB2C8\uCC98 \uAC80\uC99D \uC2E4\uD328 \u2014 ${e instanceof Error ? e.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`
|
|
16574
16574
|
);
|
|
16575
16575
|
}
|
|
16576
16576
|
const wb = _nullishCoalesce(cfb.findStream("/Workbook"), () => ( cfb.findStream("/Book")));
|
|
16577
16577
|
if (!wb) {
|
|
16578
|
-
throw new (0,
|
|
16578
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("XLS: Workbook \uC2A4\uD2B8\uB9BC\uC774 \uC5C6\uC74C (BIFF5 \uB610\uB294 \uBE44\uD45C\uC900 \uD30C\uC77C)");
|
|
16579
16579
|
}
|
|
16580
16580
|
const records = readRecords2(wb);
|
|
16581
16581
|
if (records.length === 0) {
|
|
16582
|
-
throw new (0,
|
|
16582
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("XLS: \uC2DC\uADF8\uB2C8\uCC98 \uB808\uCF54\uB4DC\uAC00 \uC5C6\uC74C (Workbook \uC2A4\uD2B8\uB9BC \uC190\uC0C1)");
|
|
16583
16583
|
}
|
|
16584
16584
|
const firstBof = decodeBof(records[0].data);
|
|
16585
16585
|
if (firstBof && firstBof.vers !== 1536) {
|
|
16586
|
-
throw new (0,
|
|
16586
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)(
|
|
16587
16587
|
`XLS: BIFF8(0x0600)\uB9CC \uC9C0\uC6D0 \u2014 \uBCF8 \uD30C\uC77C\uC740 0x${firstBof.vers.toString(16)}`
|
|
16588
16588
|
);
|
|
16589
16589
|
}
|
|
@@ -16643,7 +16643,7 @@ async function parseXlsDocument(buffer, options) {
|
|
|
16643
16643
|
pageCount: totalSheets
|
|
16644
16644
|
};
|
|
16645
16645
|
return {
|
|
16646
|
-
markdown:
|
|
16646
|
+
markdown: _chunkQB7CS534cjs.blocksToMarkdown.call(void 0, allBlocks),
|
|
16647
16647
|
blocks: allBlocks,
|
|
16648
16648
|
metadata,
|
|
16649
16649
|
warnings: warnings.length > 0 ? warnings : void 0
|
|
@@ -17069,7 +17069,7 @@ function getAttr(el, localName3) {
|
|
|
17069
17069
|
return null;
|
|
17070
17070
|
}
|
|
17071
17071
|
function parseXml2(text) {
|
|
17072
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
17072
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkQB7CS534cjs.stripDtd.call(void 0, text), "text/xml");
|
|
17073
17073
|
}
|
|
17074
17074
|
function parseStyles(xml) {
|
|
17075
17075
|
const doc = parseXml2(xml);
|
|
@@ -17388,12 +17388,12 @@ async function extractImages(zip, rels, doc) {
|
|
|
17388
17388
|
return { blocks, images };
|
|
17389
17389
|
}
|
|
17390
17390
|
async function parseDocxDocument(buffer, options) {
|
|
17391
|
-
|
|
17391
|
+
_chunkQB7CS534cjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE4);
|
|
17392
17392
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
17393
17393
|
const warnings = [];
|
|
17394
17394
|
const docFile = zip.file("word/document.xml");
|
|
17395
17395
|
if (!docFile) {
|
|
17396
|
-
throw new (0,
|
|
17396
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 DOCX \uD30C\uC77C: word/document.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
17397
17397
|
}
|
|
17398
17398
|
let rels = /* @__PURE__ */ new Map();
|
|
17399
17399
|
const relsFile = zip.file("word/_rels/document.xml.rels");
|
|
@@ -17428,7 +17428,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
17428
17428
|
const doc = parseXml2(docXml);
|
|
17429
17429
|
const body = findElements(doc, "body");
|
|
17430
17430
|
if (body.length === 0) {
|
|
17431
|
-
throw new (0,
|
|
17431
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("DOCX \uBCF8\uBB38(w:body)\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
17432
17432
|
}
|
|
17433
17433
|
const blocks = [];
|
|
17434
17434
|
const bodyEl = body[0];
|
|
@@ -17468,7 +17468,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
17468
17468
|
}
|
|
17469
17469
|
}
|
|
17470
17470
|
const outline = blocks.filter((b) => b.type === "heading").map((b) => ({ level: _nullishCoalesce(b.level, () => ( 2)), text: _nullishCoalesce(b.text, () => ( "")) }));
|
|
17471
|
-
const markdown =
|
|
17471
|
+
const markdown = _chunkQB7CS534cjs.blocksToMarkdown.call(void 0, blocks);
|
|
17472
17472
|
return {
|
|
17473
17473
|
markdown,
|
|
17474
17474
|
blocks,
|
|
@@ -17491,7 +17491,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
17491
17491
|
}
|
|
17492
17492
|
const text = new TextDecoder("utf-8").decode(buffer).replace(/^\uFEFF/, "");
|
|
17493
17493
|
const normalized = text.replace(/ /g, " ");
|
|
17494
|
-
const xml =
|
|
17494
|
+
const xml = _chunkQB7CS534cjs.stripDtd.call(void 0, normalized);
|
|
17495
17495
|
const warnings = [];
|
|
17496
17496
|
const parser = new (0, _xmldom.DOMParser)({
|
|
17497
17497
|
onError: (_level, msg) => {
|
|
@@ -17531,7 +17531,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
17531
17531
|
parseSection2(el, blocks, paraShapeMap, sectionIdx, warnings);
|
|
17532
17532
|
}
|
|
17533
17533
|
const outline = blocks.filter((b) => b.type === "heading" && b.text).map((b) => ({ level: _nullishCoalesce(b.level, () => ( 1)), text: b.text, pageNumber: b.pageNumber }));
|
|
17534
|
-
const markdown =
|
|
17534
|
+
const markdown = _chunkQB7CS534cjs.blocksToMarkdown.call(void 0, blocks);
|
|
17535
17535
|
return {
|
|
17536
17536
|
markdown,
|
|
17537
17537
|
blocks,
|
|
@@ -17673,7 +17673,7 @@ function parseTable2(el, blocks, paraShapeMap, sectionNum, warnings) {
|
|
|
17673
17673
|
const cellRows = grid.map(
|
|
17674
17674
|
(row) => row.map((cell) => _nullishCoalesce(cell, () => ( { text: "", colSpan: 1, rowSpan: 1 })))
|
|
17675
17675
|
);
|
|
17676
|
-
const table =
|
|
17676
|
+
const table = _chunkQB7CS534cjs.buildTable.call(void 0, cellRows);
|
|
17677
17677
|
blocks.push({ type: "table", table, pageNumber: sectionNum });
|
|
17678
17678
|
}
|
|
17679
17679
|
function extractCellText(cellEl) {
|
|
@@ -18074,7 +18074,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
18074
18074
|
const normalizedValues = normalizeValues(values);
|
|
18075
18075
|
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
18076
18076
|
if (sectionFiles.length === 0) {
|
|
18077
|
-
throw new (0,
|
|
18077
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
18078
18078
|
}
|
|
18079
18079
|
const xmlParser = new (0, _xmldom.DOMParser)();
|
|
18080
18080
|
const xmlSerializer = new (0, _xmldom.XMLSerializer)();
|
|
@@ -18082,7 +18082,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
18082
18082
|
const zipEntry = zip.file(sectionPath);
|
|
18083
18083
|
if (!zipEntry) continue;
|
|
18084
18084
|
const rawXml = await zipEntry.async("text");
|
|
18085
|
-
const doc = xmlParser.parseFromString(
|
|
18085
|
+
const doc = xmlParser.parseFromString(_chunkQB7CS534cjs.stripDtd.call(void 0, rawXml), "text/xml");
|
|
18086
18086
|
if (!doc.documentElement) continue;
|
|
18087
18087
|
let modified = false;
|
|
18088
18088
|
const tables = findAllElements(doc.documentElement, "tbl");
|
|
@@ -19125,13 +19125,13 @@ async function htmlToPdf(html, options) {
|
|
|
19125
19125
|
try {
|
|
19126
19126
|
puppeteer = await Promise.resolve().then(() => _interopRequireWildcard(require("puppeteer-core")));
|
|
19127
19127
|
} catch (e34) {
|
|
19128
|
-
throw new (0,
|
|
19128
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)(
|
|
19129
19129
|
"PDF \uC0DD\uC131\uC5D0 puppeteer-core\uAC00 \uD544\uC694\uD569\uB2C8\uB2E4. \uC124\uCE58: npm install puppeteer-core"
|
|
19130
19130
|
);
|
|
19131
19131
|
}
|
|
19132
19132
|
const executablePath = _nullishCoalesce(process.env.PUPPETEER_EXECUTABLE_PATH, () => ( findChromiumPath()));
|
|
19133
19133
|
if (!executablePath) {
|
|
19134
|
-
throw new (0,
|
|
19134
|
+
throw new (0, _chunkQB7CS534cjs.KordocError)(
|
|
19135
19135
|
"Chromium \uC2E4\uD589 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4. PUPPETEER_EXECUTABLE_PATH \uD658\uACBD\uBCC0\uC218\uB97C \uC124\uC815\uD558\uC138\uC694."
|
|
19136
19136
|
);
|
|
19137
19137
|
}
|
|
@@ -19190,7 +19190,7 @@ async function markdownToPdf(markdown, options) {
|
|
|
19190
19190
|
return htmlToPdf(html, options);
|
|
19191
19191
|
}
|
|
19192
19192
|
async function blocksToPdf(blocks, options) {
|
|
19193
|
-
const markdown =
|
|
19193
|
+
const markdown = _chunkQB7CS534cjs.blocksToMarkdown.call(void 0, blocks);
|
|
19194
19194
|
return markdownToPdf(markdown, options);
|
|
19195
19195
|
}
|
|
19196
19196
|
|
|
@@ -19201,13 +19201,13 @@ async function parse(input, options) {
|
|
|
19201
19201
|
if (typeof input === "string") {
|
|
19202
19202
|
try {
|
|
19203
19203
|
const buf = await _promises.readFile.call(void 0, input);
|
|
19204
|
-
buffer =
|
|
19204
|
+
buffer = _chunkQB7CS534cjs.toArrayBuffer.call(void 0, buf);
|
|
19205
19205
|
} catch (err) {
|
|
19206
19206
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
19207
19207
|
return { success: false, fileType: "unknown", error: msg, code: "PARSE_ERROR" };
|
|
19208
19208
|
}
|
|
19209
19209
|
} else if (Buffer.isBuffer(input)) {
|
|
19210
|
-
buffer =
|
|
19210
|
+
buffer = _chunkQB7CS534cjs.toArrayBuffer.call(void 0, input);
|
|
19211
19211
|
} else {
|
|
19212
19212
|
buffer = input;
|
|
19213
19213
|
}
|
|
@@ -19242,7 +19242,7 @@ async function parseHwp3(buffer, options) {
|
|
|
19242
19242
|
const { markdown, blocks, metadata, outline, warnings } = parseHwp3Document(buffer, options);
|
|
19243
19243
|
return { success: true, fileType: "hwp3", markdown, blocks, metadata, outline, warnings };
|
|
19244
19244
|
} catch (err) {
|
|
19245
|
-
return { success: false, fileType: "hwp3", error: err instanceof Error ? err.message : "HWP3 \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19245
|
+
return { success: false, fileType: "hwp3", error: err instanceof Error ? err.message : "HWP3 \uD30C\uC2F1 \uC2E4\uD328", code: _chunkQB7CS534cjs.classifyError.call(void 0, err) };
|
|
19246
19246
|
}
|
|
19247
19247
|
}
|
|
19248
19248
|
async function parseHwpx(buffer, options) {
|
|
@@ -19250,7 +19250,7 @@ async function parseHwpx(buffer, options) {
|
|
|
19250
19250
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseHwpxDocument(buffer, options);
|
|
19251
19251
|
return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _121 => _121.length]) ? images : void 0 };
|
|
19252
19252
|
} catch (err) {
|
|
19253
|
-
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19253
|
+
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkQB7CS534cjs.classifyError.call(void 0, err) };
|
|
19254
19254
|
}
|
|
19255
19255
|
}
|
|
19256
19256
|
async function parseHwp(buffer, options) {
|
|
@@ -19275,13 +19275,13 @@ async function parseHwp(buffer, options) {
|
|
|
19275
19275
|
}
|
|
19276
19276
|
return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _123 => _123.length]) ? images : void 0 };
|
|
19277
19277
|
} catch (err) {
|
|
19278
|
-
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19278
|
+
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code: _chunkQB7CS534cjs.classifyError.call(void 0, err) };
|
|
19279
19279
|
}
|
|
19280
19280
|
}
|
|
19281
19281
|
async function parsePdf(buffer, options) {
|
|
19282
19282
|
let parsePdfDocument;
|
|
19283
19283
|
try {
|
|
19284
|
-
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-
|
|
19284
|
+
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-EL5YETUA.cjs")));
|
|
19285
19285
|
parsePdfDocument = mod.parsePdfDocument;
|
|
19286
19286
|
} catch (e36) {
|
|
19287
19287
|
return {
|
|
@@ -19292,11 +19292,11 @@ async function parsePdf(buffer, options) {
|
|
|
19292
19292
|
};
|
|
19293
19293
|
}
|
|
19294
19294
|
try {
|
|
19295
|
-
const { markdown, blocks, metadata, outline, warnings, isImageBased } = await parsePdfDocument(buffer, options);
|
|
19296
|
-
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
|
|
19295
|
+
const { markdown, blocks, metadata, outline, warnings, isImageBased, pageQuality, qualitySummary } = await parsePdfDocument(buffer, options);
|
|
19296
|
+
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased, pageQuality, qualitySummary };
|
|
19297
19297
|
} catch (err) {
|
|
19298
19298
|
const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
|
|
19299
|
-
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19299
|
+
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: _chunkQB7CS534cjs.classifyError.call(void 0, err), isImageBased };
|
|
19300
19300
|
}
|
|
19301
19301
|
}
|
|
19302
19302
|
async function parseXlsx(buffer, options) {
|
|
@@ -19304,7 +19304,7 @@ async function parseXlsx(buffer, options) {
|
|
|
19304
19304
|
const { markdown, blocks, metadata, warnings } = await parseXlsxDocument(buffer, options);
|
|
19305
19305
|
return { success: true, fileType: "xlsx", markdown, blocks, metadata, warnings };
|
|
19306
19306
|
} catch (err) {
|
|
19307
|
-
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19307
|
+
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkQB7CS534cjs.classifyError.call(void 0, err) };
|
|
19308
19308
|
}
|
|
19309
19309
|
}
|
|
19310
19310
|
async function parseXls(buffer, options) {
|
|
@@ -19312,7 +19312,7 @@ async function parseXls(buffer, options) {
|
|
|
19312
19312
|
const { markdown, blocks, metadata, warnings } = await parseXlsDocument(buffer, options);
|
|
19313
19313
|
return { success: true, fileType: "xls", markdown, blocks, metadata, warnings };
|
|
19314
19314
|
} catch (err) {
|
|
19315
|
-
return { success: false, fileType: "xls", error: err instanceof Error ? err.message : "XLS \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19315
|
+
return { success: false, fileType: "xls", error: err instanceof Error ? err.message : "XLS \uD30C\uC2F1 \uC2E4\uD328", code: _chunkQB7CS534cjs.classifyError.call(void 0, err) };
|
|
19316
19316
|
}
|
|
19317
19317
|
}
|
|
19318
19318
|
async function parseDocx(buffer, options) {
|
|
@@ -19320,7 +19320,7 @@ async function parseDocx(buffer, options) {
|
|
|
19320
19320
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseDocxDocument(buffer, options);
|
|
19321
19321
|
return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _124 => _124.length]) ? images : void 0 };
|
|
19322
19322
|
} catch (err) {
|
|
19323
|
-
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19323
|
+
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkQB7CS534cjs.classifyError.call(void 0, err) };
|
|
19324
19324
|
}
|
|
19325
19325
|
}
|
|
19326
19326
|
async function parseHwpml(buffer, options) {
|
|
@@ -19328,16 +19328,16 @@ async function parseHwpml(buffer, options) {
|
|
|
19328
19328
|
const { markdown, blocks, metadata, outline, warnings } = parseHwpmlDocument(buffer, options);
|
|
19329
19329
|
return { success: true, fileType: "hwpml", markdown, blocks, metadata, outline, warnings };
|
|
19330
19330
|
} catch (err) {
|
|
19331
|
-
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19331
|
+
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code: _chunkQB7CS534cjs.classifyError.call(void 0, err) };
|
|
19332
19332
|
}
|
|
19333
19333
|
}
|
|
19334
19334
|
async function fillForm(input, values, outputFormat = "markdown") {
|
|
19335
19335
|
let buffer;
|
|
19336
19336
|
if (typeof input === "string") {
|
|
19337
19337
|
const buf = await _promises.readFile.call(void 0, input);
|
|
19338
|
-
buffer =
|
|
19338
|
+
buffer = _chunkQB7CS534cjs.toArrayBuffer.call(void 0, buf);
|
|
19339
19339
|
} else if (Buffer.isBuffer(input)) {
|
|
19340
|
-
buffer =
|
|
19340
|
+
buffer = _chunkQB7CS534cjs.toArrayBuffer.call(void 0, input);
|
|
19341
19341
|
} else {
|
|
19342
19342
|
buffer = input;
|
|
19343
19343
|
}
|
|
@@ -19363,7 +19363,7 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
19363
19363
|
throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
|
|
19364
19364
|
}
|
|
19365
19365
|
const fill = fillFormFields(parsed.blocks, values);
|
|
19366
|
-
const markdown =
|
|
19366
|
+
const markdown = _chunkQB7CS534cjs.blocksToMarkdown.call(void 0, fill.blocks);
|
|
19367
19367
|
if (outputFormat === "hwpx") {
|
|
19368
19368
|
const hwpxBuffer = await markdownToHwpx(markdown);
|
|
19369
19369
|
return { output: hwpxBuffer, format: "hwpx", fill };
|
|
@@ -19400,5 +19400,5 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
19400
19400
|
|
|
19401
19401
|
|
|
19402
19402
|
|
|
19403
|
-
exports.VERSION =
|
|
19403
|
+
exports.VERSION = _chunkQB7CS534cjs.VERSION; exports.blocksToMarkdown = _chunkQB7CS534cjs.blocksToMarkdown; exports.blocksToPdf = blocksToPdf; exports.compare = compare; exports.detectFormat = detectFormat; exports.detectOle2Format = detectOle2Format; exports.detectZipFormat = detectZipFormat; exports.diffBlocks = diffBlocks; exports.extractFormFields = extractFormFields; exports.fillForm = fillForm; exports.fillFormFields = fillFormFields; exports.fillHwpx = fillHwpx; exports.isHwpxFile = isHwpxFile; exports.isLabelCell = isLabelCell; exports.isOldHwpFile = isOldHwpFile; exports.isPdfFile = isPdfFile; exports.isZipFile = isZipFile; exports.markdownToHwpx = markdownToHwpx; exports.markdownToPdf = markdownToPdf; exports.parse = parse; exports.parseDocx = parseDocx; exports.parseHwp = parseHwp; exports.parseHwp3 = parseHwp3; exports.parseHwpml = parseHwpml; exports.parseHwpx = parseHwpx; exports.parsePdf = parsePdf; exports.parseXls = parseXls; exports.parseXlsx = parseXlsx; exports.renderHtml = renderHtml;
|
|
19404
19404
|
//# sourceMappingURL=index.cjs.map
|