kordoc 2.7.1 → 2.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +450 -450
- package/dist/{chunk-OBSPVJ6A.js → chunk-4NWDJGAU.js} +12 -4
- package/dist/chunk-4NWDJGAU.js.map +1 -0
- package/dist/{chunk-LA66FVBN.js → chunk-4SK2PDMQ.js} +2 -2
- package/dist/chunk-4SK2PDMQ.js.map +1 -0
- package/dist/{chunk-GNN6MHH4.js → chunk-LB7E2KDF.js} +2 -2
- package/dist/chunk-LB7E2KDF.js.map +1 -0
- package/dist/{chunk-5CJGKKMZ.js → chunk-MEPHGCPQ.js} +1 -1
- package/dist/chunk-MEPHGCPQ.js.map +1 -0
- package/dist/chunk-MOL7MDBG.js +0 -0
- package/dist/chunk-MUOQXDZ4.cjs.map +1 -1
- package/dist/{chunk-RFGEEHI4.cjs → chunk-Y476BOHI.cjs} +2 -2
- package/dist/chunk-Y476BOHI.cjs.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/cli.js.map +1 -1
- package/dist/{detect-PJZMUL2Z.js → detect-RI2MQ33K.js} +2 -2
- package/dist/formula-JCNF43NE.js +0 -0
- package/dist/formula-XGG6ZP42.cjs.map +1 -1
- package/dist/index.cjs +105 -97
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +10 -2
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +5 -5
- package/dist/mcp.js.map +1 -1
- package/dist/page-range-3C7UGGEK.cjs.map +1 -1
- package/dist/page-range-737B4EZW.js +0 -0
- package/dist/{parser-SRI2TIZX.cjs → parser-7OFQ67QL.cjs} +16 -16
- package/dist/parser-7OFQ67QL.cjs.map +1 -0
- package/dist/{parser-6L6DZCOB.js → parser-DJCMY3OO.js} +3 -3
- package/dist/parser-DJCMY3OO.js.map +1 -0
- package/dist/{parser-5CJGXQCJ.js → parser-QMMQ7Y7R.js} +3 -3
- package/dist/parser-QMMQ7Y7R.js.map +1 -0
- package/dist/{provider-WPIYEALY.js → provider-2SEHU2FM.js} +1 -1
- package/dist/provider-2SEHU2FM.js.map +1 -0
- package/dist/{provider-7H4CPZYS.js → provider-AKROB7WQ.js} +1 -1
- package/dist/provider-AKROB7WQ.js.map +1 -0
- package/dist/{provider-YN2SSK4X.cjs → provider-SNONEZNW.cjs} +1 -1
- package/dist/provider-SNONEZNW.cjs.map +1 -0
- package/dist/setup-57FB3LSP.js +0 -0
- package/dist/{watch-7CTGUDQB.js → watch-FVMVIZ5Q.js} +4 -4
- package/dist/watch-FVMVIZ5Q.js.map +1 -0
- package/package.json +98 -98
- package/dist/chunk-5CJGKKMZ.js.map +0 -1
- package/dist/chunk-GNN6MHH4.js.map +0 -1
- package/dist/chunk-LA66FVBN.js.map +0 -1
- package/dist/chunk-OBSPVJ6A.js.map +0 -1
- package/dist/chunk-RFGEEHI4.cjs.map +0 -1
- package/dist/parser-5CJGXQCJ.js.map +0 -1
- package/dist/parser-6L6DZCOB.js.map +0 -1
- package/dist/parser-SRI2TIZX.cjs.map +0 -1
- package/dist/provider-7H4CPZYS.js.map +0 -1
- package/dist/provider-WPIYEALY.js.map +0 -1
- package/dist/provider-YN2SSK4X.cjs.map +0 -1
- package/dist/watch-7CTGUDQB.js.map +0 -1
- /package/dist/{detect-PJZMUL2Z.js.map → detect-RI2MQ33K.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
var
|
|
19
|
+
var _chunkY476BOHIcjs = require('./chunk-Y476BOHI.cjs');
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -821,7 +821,7 @@ var MAX_XML_DEPTH = 200;
|
|
|
821
821
|
function createXmlParser(warnings) {
|
|
822
822
|
return new (0, _xmldom.DOMParser)({
|
|
823
823
|
onError(level, msg) {
|
|
824
|
-
if (level === "fatalError") throw new (0,
|
|
824
|
+
if (level === "fatalError") throw new (0, _chunkY476BOHIcjs.KordocError)(`XML \uD30C\uC2F1 \uC2E4\uD328: ${msg}`);
|
|
825
825
|
_optionalChain([warnings, 'optionalAccess', _2 => _2.push, 'call', _3 => _3({ code: "MALFORMED_XML", message: `XML ${level === "warn" ? "\uACBD\uACE0" : "\uC624\uB958"}: ${msg}` })]);
|
|
826
826
|
}
|
|
827
827
|
});
|
|
@@ -840,10 +840,10 @@ async function extractHwpxStyles(zip, decompressed) {
|
|
|
840
840
|
const xml = await file.async("text");
|
|
841
841
|
if (decompressed) {
|
|
842
842
|
decompressed.total += xml.length * 2;
|
|
843
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
843
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkY476BOHIcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
844
844
|
}
|
|
845
845
|
const parser = createXmlParser();
|
|
846
|
-
const doc = parser.parseFromString(
|
|
846
|
+
const doc = parser.parseFromString(_chunkY476BOHIcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
847
847
|
if (!doc.documentElement) continue;
|
|
848
848
|
parseCharProperties(doc, result.charProperties);
|
|
849
849
|
parseStyleElements(doc, result.styles);
|
|
@@ -905,7 +905,7 @@ function parseStyleElements(doc, map) {
|
|
|
905
905
|
}
|
|
906
906
|
}
|
|
907
907
|
async function parseHwpxDocument(buffer, options) {
|
|
908
|
-
|
|
908
|
+
_chunkY476BOHIcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
|
|
909
909
|
let zip;
|
|
910
910
|
try {
|
|
911
911
|
zip = await _jszip2.default.loadAsync(buffer);
|
|
@@ -914,7 +914,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
914
914
|
}
|
|
915
915
|
const actualEntryCount = Object.keys(zip.files).length;
|
|
916
916
|
if (actualEntryCount > MAX_ZIP_ENTRIES) {
|
|
917
|
-
throw new (0,
|
|
917
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
918
918
|
}
|
|
919
919
|
const manifestFile = zip.file("META-INF/manifest.xml");
|
|
920
920
|
if (manifestFile) {
|
|
@@ -926,7 +926,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
926
926
|
return comResultToParseResult(pages, pageCount, warnings2);
|
|
927
927
|
}
|
|
928
928
|
}
|
|
929
|
-
throw new (0,
|
|
929
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("DRM \uC554\uD638\uD654\uB41C HWPX \uD30C\uC77C\uC785\uB2C8\uB2E4. Windows + \uD55C\uCEF4 \uC624\uD53C\uC2A4 \uC124\uCE58 \uC2DC \uC790\uB3D9 \uCD94\uCD9C\uB429\uB2C8\uB2E4.");
|
|
930
930
|
}
|
|
931
931
|
}
|
|
932
932
|
const decompressed = { total: 0 };
|
|
@@ -935,7 +935,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
935
935
|
const styleMap = await extractHwpxStyles(zip, decompressed);
|
|
936
936
|
const warnings = [];
|
|
937
937
|
const sectionPaths = await resolveSectionPaths(zip);
|
|
938
|
-
if (sectionPaths.length === 0) throw new (0,
|
|
938
|
+
if (sectionPaths.length === 0) throw new (0, _chunkY476BOHIcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
939
939
|
metadata.pageCount = sectionPaths.length;
|
|
940
940
|
const pageFilter = _optionalChain([options, 'optionalAccess', _5 => _5.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sectionPaths.length) : null;
|
|
941
941
|
const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
|
|
@@ -949,19 +949,19 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
949
949
|
try {
|
|
950
950
|
const xml = await file.async("text");
|
|
951
951
|
decompressed.total += xml.length * 2;
|
|
952
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
952
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkY476BOHIcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
953
953
|
blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, nestedTableCounter));
|
|
954
954
|
parsedSections++;
|
|
955
955
|
_optionalChain([options, 'optionalAccess', _6 => _6.onProgress, 'optionalCall', _7 => _7(parsedSections, totalTarget)]);
|
|
956
956
|
} catch (secErr) {
|
|
957
|
-
if (secErr instanceof
|
|
957
|
+
if (secErr instanceof _chunkY476BOHIcjs.KordocError) throw secErr;
|
|
958
958
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
959
959
|
}
|
|
960
960
|
}
|
|
961
961
|
const images = await extractImagesFromZip(zip, blocks, decompressed, warnings);
|
|
962
962
|
detectHwpxHeadings(blocks, styleMap);
|
|
963
963
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
964
|
-
const markdown =
|
|
964
|
+
const markdown = _chunkY476BOHIcjs.blocksToMarkdown.call(void 0, blocks);
|
|
965
965
|
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
966
966
|
}
|
|
967
967
|
function imageExtToMime(ext) {
|
|
@@ -1025,13 +1025,13 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
1025
1025
|
let found = false;
|
|
1026
1026
|
const allCandidates = resolvedPath ? [resolvedPath, ...candidates] : candidates;
|
|
1027
1027
|
for (const path of allCandidates) {
|
|
1028
|
-
if (
|
|
1028
|
+
if (_chunkY476BOHIcjs.isPathTraversal.call(void 0, path)) continue;
|
|
1029
1029
|
const file = zip.file(path);
|
|
1030
1030
|
if (!file) continue;
|
|
1031
1031
|
try {
|
|
1032
1032
|
const data = await file.async("uint8array");
|
|
1033
1033
|
decompressed.total += data.length;
|
|
1034
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
1034
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkY476BOHIcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
1035
1035
|
const actualPath = path;
|
|
1036
1036
|
const ext = actualPath.includes(".") ? actualPath.split(".").pop() || "png" : "png";
|
|
1037
1037
|
const mimeType = imageExtToMime(ext);
|
|
@@ -1043,7 +1043,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
1043
1043
|
found = true;
|
|
1044
1044
|
break;
|
|
1045
1045
|
} catch (err) {
|
|
1046
|
-
if (err instanceof
|
|
1046
|
+
if (err instanceof _chunkY476BOHIcjs.KordocError) throw err;
|
|
1047
1047
|
}
|
|
1048
1048
|
}
|
|
1049
1049
|
if (!found) {
|
|
@@ -1063,7 +1063,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
1063
1063
|
const xml = await file.async("text");
|
|
1064
1064
|
if (decompressed) {
|
|
1065
1065
|
decompressed.total += xml.length * 2;
|
|
1066
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
1066
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkY476BOHIcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
1067
1067
|
}
|
|
1068
1068
|
parseDublinCoreMetadata(xml, metadata);
|
|
1069
1069
|
if (metadata.title || metadata.author) return;
|
|
@@ -1073,7 +1073,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
1073
1073
|
}
|
|
1074
1074
|
function parseDublinCoreMetadata(xml, metadata) {
|
|
1075
1075
|
const parser = createXmlParser();
|
|
1076
|
-
const doc = parser.parseFromString(
|
|
1076
|
+
const doc = parser.parseFromString(_chunkY476BOHIcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
1077
1077
|
if (!doc.documentElement) return;
|
|
1078
1078
|
const getText = (tagNames) => {
|
|
1079
1079
|
for (const tag of tagNames) {
|
|
@@ -1133,7 +1133,7 @@ function extractFromBrokenZip(buffer) {
|
|
|
1133
1133
|
}
|
|
1134
1134
|
const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
|
|
1135
1135
|
const name = new TextDecoder().decode(nameBytes);
|
|
1136
|
-
if (
|
|
1136
|
+
if (_chunkY476BOHIcjs.isPathTraversal.call(void 0, name)) {
|
|
1137
1137
|
pos = fileStart + compSize;
|
|
1138
1138
|
continue;
|
|
1139
1139
|
}
|
|
@@ -1151,15 +1151,15 @@ function extractFromBrokenZip(buffer) {
|
|
|
1151
1151
|
continue;
|
|
1152
1152
|
}
|
|
1153
1153
|
totalDecompressed += content.length * 2;
|
|
1154
|
-
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
1154
|
+
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0, _chunkY476BOHIcjs.KordocError)("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
|
|
1155
1155
|
sectionNum++;
|
|
1156
1156
|
blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, nestedTableCounter));
|
|
1157
1157
|
} catch (e13) {
|
|
1158
1158
|
continue;
|
|
1159
1159
|
}
|
|
1160
1160
|
}
|
|
1161
|
-
if (blocks.length === 0) throw new (0,
|
|
1162
|
-
const markdown =
|
|
1161
|
+
if (blocks.length === 0) throw new (0, _chunkY476BOHIcjs.KordocError)("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
1162
|
+
const markdown = _chunkY476BOHIcjs.blocksToMarkdown.call(void 0, blocks);
|
|
1163
1163
|
return { markdown, blocks, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
1164
1164
|
}
|
|
1165
1165
|
async function resolveSectionPaths(zip) {
|
|
@@ -1177,7 +1177,7 @@ async function resolveSectionPaths(zip) {
|
|
|
1177
1177
|
}
|
|
1178
1178
|
function parseSectionPathsFromManifest(xml) {
|
|
1179
1179
|
const parser = createXmlParser();
|
|
1180
|
-
const doc = parser.parseFromString(
|
|
1180
|
+
const doc = parser.parseFromString(_chunkY476BOHIcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
1181
1181
|
const items = doc.getElementsByTagName("opf:item");
|
|
1182
1182
|
const spine = doc.getElementsByTagName("opf:itemref");
|
|
1183
1183
|
const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
|
|
@@ -1224,9 +1224,9 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
1224
1224
|
let level = 0;
|
|
1225
1225
|
if (baseFontSize > 0 && _optionalChain([block, 'access', _16 => _16.style, 'optionalAccess', _17 => _17.fontSize])) {
|
|
1226
1226
|
const ratio = block.style.fontSize / baseFontSize;
|
|
1227
|
-
if (ratio >=
|
|
1228
|
-
else if (ratio >=
|
|
1229
|
-
else if (ratio >=
|
|
1227
|
+
if (ratio >= _chunkY476BOHIcjs.HEADING_RATIO_H1) level = 1;
|
|
1228
|
+
else if (ratio >= _chunkY476BOHIcjs.HEADING_RATIO_H2) level = 2;
|
|
1229
|
+
else if (ratio >= _chunkY476BOHIcjs.HEADING_RATIO_H3) level = 3;
|
|
1230
1230
|
}
|
|
1231
1231
|
const compactText = text.replace(/\s+/g, "");
|
|
1232
1232
|
if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
|
|
@@ -1251,13 +1251,13 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
1251
1251
|
let nestedCols = 0;
|
|
1252
1252
|
for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
|
|
1253
1253
|
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
1254
|
-
blocks.push({ type: "table", table:
|
|
1254
|
+
blocks.push({ type: "table", table: _chunkY476BOHIcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
1255
1255
|
if (parentTable.cell) {
|
|
1256
1256
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
1257
1257
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker;
|
|
1258
1258
|
}
|
|
1259
1259
|
} else {
|
|
1260
|
-
const nestedText =
|
|
1260
|
+
const nestedText = _chunkY476BOHIcjs.convertTableToText.call(void 0, newTable.rows);
|
|
1261
1261
|
if (parentTable.cell) {
|
|
1262
1262
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
1263
1263
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker + "\n" + nestedText;
|
|
@@ -1267,7 +1267,7 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
1267
1267
|
}
|
|
1268
1268
|
function parseSectionXml(xml, styleMap, warnings, sectionNum, counter) {
|
|
1269
1269
|
const parser = createXmlParser(warnings);
|
|
1270
|
-
const doc = parser.parseFromString(
|
|
1270
|
+
const doc = parser.parseFromString(_chunkY476BOHIcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
1271
1271
|
if (!doc.documentElement) return [];
|
|
1272
1272
|
const blocks = [];
|
|
1273
1273
|
const ctx = { styleMap, warnings, sectionNum, counter };
|
|
@@ -1310,7 +1310,7 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
1310
1310
|
if (tableStack.length > 0) {
|
|
1311
1311
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
1312
1312
|
} else {
|
|
1313
|
-
blocks.push({ type: "table", table:
|
|
1313
|
+
blocks.push({ type: "table", table: _chunkY476BOHIcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
1314
1314
|
tableCtx = null;
|
|
1315
1315
|
}
|
|
1316
1316
|
} else {
|
|
@@ -1350,8 +1350,8 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
1350
1350
|
const cs = isNaN(rawCs) ? 1 : rawCs;
|
|
1351
1351
|
const rawRs = parseInt(el.getAttribute("rowSpan") || "1", 10);
|
|
1352
1352
|
const rs = isNaN(rawRs) ? 1 : rawRs;
|
|
1353
|
-
tableCtx.cell.colSpan = clampSpan(cs,
|
|
1354
|
-
tableCtx.cell.rowSpan = clampSpan(rs,
|
|
1353
|
+
tableCtx.cell.colSpan = clampSpan(cs, _chunkY476BOHIcjs.MAX_COLS);
|
|
1354
|
+
tableCtx.cell.rowSpan = clampSpan(rs, _chunkY476BOHIcjs.MAX_ROWS);
|
|
1355
1355
|
}
|
|
1356
1356
|
break;
|
|
1357
1357
|
case "p": {
|
|
@@ -1409,7 +1409,7 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth =
|
|
|
1409
1409
|
if (tableStack.length > 0) {
|
|
1410
1410
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
1411
1411
|
} else {
|
|
1412
|
-
blocks.push({ type: "table", table:
|
|
1412
|
+
blocks.push({ type: "table", table: _chunkY476BOHIcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
1413
1413
|
tableCtx = null;
|
|
1414
1414
|
}
|
|
1415
1415
|
} else {
|
|
@@ -1517,7 +1517,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
1517
1517
|
case "hyperlink": {
|
|
1518
1518
|
const url = child.getAttribute("url") || child.getAttribute("href") || "";
|
|
1519
1519
|
if (url) {
|
|
1520
|
-
const safe =
|
|
1520
|
+
const safe = _chunkY476BOHIcjs.sanitizeHref.call(void 0, url);
|
|
1521
1521
|
if (safe) href = safe;
|
|
1522
1522
|
}
|
|
1523
1523
|
walk(child);
|
|
@@ -1684,7 +1684,7 @@ function decompressStream(data) {
|
|
|
1684
1684
|
return _zlib.inflateRawSync.call(void 0, data, opts);
|
|
1685
1685
|
}
|
|
1686
1686
|
function parseFileHeader(data) {
|
|
1687
|
-
if (data.length < 40) throw new (0,
|
|
1687
|
+
if (data.length < 40) throw new (0, _chunkY476BOHIcjs.KordocError)("FileHeader\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 40\uBC14\uC774\uD2B8)");
|
|
1688
1688
|
const sig = data.subarray(0, 32).toString("utf8").replace(/\0+$/, "");
|
|
1689
1689
|
return {
|
|
1690
1690
|
signature: sig,
|
|
@@ -2762,7 +2762,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
2762
2762
|
lenientCfb = parseLenientCfb(buffer);
|
|
2763
2763
|
warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
|
|
2764
2764
|
} catch (e19) {
|
|
2765
|
-
throw new (0,
|
|
2765
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
|
|
2766
2766
|
}
|
|
2767
2767
|
}
|
|
2768
2768
|
const findStream = (path) => {
|
|
@@ -2773,11 +2773,11 @@ function parseHwp5Document(buffer, options) {
|
|
|
2773
2773
|
return lenientCfb.findStream(path);
|
|
2774
2774
|
};
|
|
2775
2775
|
const headerData = findStream("/FileHeader");
|
|
2776
|
-
if (!headerData) throw new (0,
|
|
2776
|
+
if (!headerData) throw new (0, _chunkY476BOHIcjs.KordocError)("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
|
|
2777
2777
|
const header = parseFileHeader(headerData);
|
|
2778
|
-
if (header.signature !== "HWP Document File") throw new (0,
|
|
2779
|
-
if (header.flags & FLAG_ENCRYPTED) throw new (0,
|
|
2780
|
-
if (header.flags & FLAG_DRM) throw new (0,
|
|
2778
|
+
if (header.signature !== "HWP Document File") throw new (0, _chunkY476BOHIcjs.KordocError)("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
2779
|
+
if (header.flags & FLAG_ENCRYPTED) throw new (0, _chunkY476BOHIcjs.KordocError)("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2780
|
+
if (header.flags & FLAG_DRM) throw new (0, _chunkY476BOHIcjs.KordocError)("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2781
2781
|
const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
|
|
2782
2782
|
const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
|
|
2783
2783
|
const metadata = {
|
|
@@ -2786,7 +2786,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
2786
2786
|
if (cfb) extractHwp5Metadata(cfb, metadata);
|
|
2787
2787
|
const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
|
|
2788
2788
|
const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
|
|
2789
|
-
if (sections.length === 0) throw new (0,
|
|
2789
|
+
if (sections.length === 0) throw new (0, _chunkY476BOHIcjs.KordocError)("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2790
2790
|
metadata.pageCount = sections.length;
|
|
2791
2791
|
const pageFilter = _optionalChain([options, 'optionalAccess', _24 => _24.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sections.length) : null;
|
|
2792
2792
|
const totalTarget = pageFilter ? pageFilter.size : sections.length;
|
|
@@ -2800,24 +2800,24 @@ function parseHwp5Document(buffer, options) {
|
|
|
2800
2800
|
const sectionData = sections[si];
|
|
2801
2801
|
const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
2802
2802
|
totalDecompressed += data.length;
|
|
2803
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2803
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkY476BOHIcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2804
2804
|
const records = readRecords(data);
|
|
2805
2805
|
const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, nestedTableCounter);
|
|
2806
2806
|
blocks.push(...sectionBlocks);
|
|
2807
2807
|
parsedSections++;
|
|
2808
2808
|
_optionalChain([options, 'optionalAccess', _25 => _25.onProgress, 'optionalCall', _26 => _26(parsedSections, totalTarget)]);
|
|
2809
2809
|
} catch (secErr) {
|
|
2810
|
-
if (secErr instanceof
|
|
2810
|
+
if (secErr instanceof _chunkY476BOHIcjs.KordocError) throw secErr;
|
|
2811
2811
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
2812
2812
|
}
|
|
2813
2813
|
}
|
|
2814
2814
|
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
2815
|
-
const flatBlocks =
|
|
2815
|
+
const flatBlocks = _chunkY476BOHIcjs.flattenLayoutTables.call(void 0, blocks);
|
|
2816
2816
|
if (docInfo) {
|
|
2817
2817
|
detectHwp5Headings(flatBlocks, docInfo);
|
|
2818
2818
|
}
|
|
2819
2819
|
const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
2820
|
-
const markdown =
|
|
2820
|
+
const markdown = _chunkY476BOHIcjs.blocksToMarkdown.call(void 0, flatBlocks);
|
|
2821
2821
|
return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
2822
2822
|
}
|
|
2823
2823
|
function parseDocInfoStream(cfb, compressed) {
|
|
@@ -2877,9 +2877,9 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2877
2877
|
let level = 0;
|
|
2878
2878
|
if (_optionalChain([block, 'access', _31 => _31.style, 'optionalAccess', _32 => _32.fontSize]) && baseFontSize > 0) {
|
|
2879
2879
|
const ratio = block.style.fontSize / baseFontSize;
|
|
2880
|
-
if (ratio >=
|
|
2881
|
-
else if (ratio >=
|
|
2882
|
-
else if (ratio >=
|
|
2880
|
+
if (ratio >= _chunkY476BOHIcjs.HEADING_RATIO_H1) level = 1;
|
|
2881
|
+
else if (ratio >= _chunkY476BOHIcjs.HEADING_RATIO_H2) level = 2;
|
|
2882
|
+
else if (ratio >= _chunkY476BOHIcjs.HEADING_RATIO_H3) level = 3;
|
|
2883
2883
|
}
|
|
2884
2884
|
if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
|
|
2885
2885
|
if (level === 0) level = 2;
|
|
@@ -2964,7 +2964,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2964
2964
|
if (!raw) break;
|
|
2965
2965
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2966
2966
|
totalDecompressed += content.length;
|
|
2967
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2967
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkY476BOHIcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2968
2968
|
sections.push({ idx: i, content });
|
|
2969
2969
|
}
|
|
2970
2970
|
if (sections.length === 0) {
|
|
@@ -2976,7 +2976,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2976
2976
|
if (raw) {
|
|
2977
2977
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2978
2978
|
totalDecompressed += content.length;
|
|
2979
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2979
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkY476BOHIcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2980
2980
|
sections.push({ idx, content });
|
|
2981
2981
|
}
|
|
2982
2982
|
}
|
|
@@ -2993,7 +2993,7 @@ function findViewTextSectionsLenient(lcfb, compressed) {
|
|
|
2993
2993
|
try {
|
|
2994
2994
|
const content = decryptViewText(raw, compressed);
|
|
2995
2995
|
totalDecompressed += content.length;
|
|
2996
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2996
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkY476BOHIcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2997
2997
|
sections.push({ idx: i, content });
|
|
2998
2998
|
} catch (e24) {
|
|
2999
2999
|
break;
|
|
@@ -3208,7 +3208,7 @@ function parseSection(records, docInfo, warnings, sectionNum, counter) {
|
|
|
3208
3208
|
if (url && blocks.length > 0) {
|
|
3209
3209
|
const lastBlock = blocks[blocks.length - 1];
|
|
3210
3210
|
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
3211
|
-
lastBlock.href = _nullishCoalesce(
|
|
3211
|
+
lastBlock.href = _nullishCoalesce(_chunkY476BOHIcjs.sanitizeHref.call(void 0, url), () => ( void 0));
|
|
3212
3212
|
}
|
|
3213
3213
|
}
|
|
3214
3214
|
}
|
|
@@ -3367,8 +3367,8 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
3367
3367
|
if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
|
|
3368
3368
|
if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
|
|
3369
3369
|
if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
|
|
3370
|
-
rows = Math.min(rec.data.readUInt16LE(4),
|
|
3371
|
-
cols = Math.min(rec.data.readUInt16LE(6),
|
|
3370
|
+
rows = Math.min(rec.data.readUInt16LE(4), _chunkY476BOHIcjs.MAX_ROWS);
|
|
3371
|
+
cols = Math.min(rec.data.readUInt16LE(6), _chunkY476BOHIcjs.MAX_COLS);
|
|
3372
3372
|
}
|
|
3373
3373
|
if (rec.tagId === TAG_LIST_HEADER) {
|
|
3374
3374
|
const { cell, nextIdx } = parseCellBlock(records, i, tableLevel, counter);
|
|
@@ -3390,7 +3390,7 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
3390
3390
|
return { table: { rows, cols, cells: irCells, hasHeader: rows > 1 }, nextIdx: i };
|
|
3391
3391
|
}
|
|
3392
3392
|
const cellRows = arrangeCells(rows, cols, cells);
|
|
3393
|
-
return { table:
|
|
3393
|
+
return { table: _chunkY476BOHIcjs.buildTable.call(void 0, cellRows), nextIdx: i };
|
|
3394
3394
|
}
|
|
3395
3395
|
function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
3396
3396
|
const rec = records[startIdx];
|
|
@@ -3413,8 +3413,8 @@ function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
|
3413
3413
|
rowAddr = rec.data.readUInt16LE(10);
|
|
3414
3414
|
const cs = rec.data.readUInt16LE(12);
|
|
3415
3415
|
const rs = rec.data.readUInt16LE(14);
|
|
3416
|
-
if (cs > 0) colSpan = Math.min(cs,
|
|
3417
|
-
if (rs > 0) rowSpan = Math.min(rs,
|
|
3416
|
+
if (cs > 0) colSpan = Math.min(cs, _chunkY476BOHIcjs.MAX_COLS);
|
|
3417
|
+
if (rs > 0) rowSpan = Math.min(rs, _chunkY476BOHIcjs.MAX_ROWS);
|
|
3418
3418
|
}
|
|
3419
3419
|
let i = startIdx + 1;
|
|
3420
3420
|
while (i < records.length) {
|
|
@@ -15792,7 +15792,7 @@ function getTextContent(el) {
|
|
|
15792
15792
|
return _nullishCoalesce(_optionalChain([el, 'access', _40 => _40.textContent, 'optionalAccess', _41 => _41.trim, 'call', _42 => _42()]), () => ( ""));
|
|
15793
15793
|
}
|
|
15794
15794
|
function parseXml(text) {
|
|
15795
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
15795
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkY476BOHIcjs.stripDtd.call(void 0, text), "text/xml");
|
|
15796
15796
|
}
|
|
15797
15797
|
function parseSharedStrings(xml) {
|
|
15798
15798
|
const doc = parseXml(xml);
|
|
@@ -15936,7 +15936,7 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
15936
15936
|
cellRows.push(row);
|
|
15937
15937
|
}
|
|
15938
15938
|
if (cellRows.length > 0) {
|
|
15939
|
-
const table =
|
|
15939
|
+
const table = _chunkY476BOHIcjs.buildTable.call(void 0, cellRows);
|
|
15940
15940
|
if (table.rows > 0) {
|
|
15941
15941
|
blocks.push({ type: "table", table, pageNumber: sheetIndex + 1 });
|
|
15942
15942
|
}
|
|
@@ -15944,12 +15944,12 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
15944
15944
|
return blocks;
|
|
15945
15945
|
}
|
|
15946
15946
|
async function parseXlsxDocument(buffer, options) {
|
|
15947
|
-
|
|
15947
|
+
_chunkY476BOHIcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE3);
|
|
15948
15948
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
15949
15949
|
const warnings = [];
|
|
15950
15950
|
const workbookFile = zip.file("xl/workbook.xml");
|
|
15951
15951
|
if (!workbookFile) {
|
|
15952
|
-
throw new (0,
|
|
15952
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 XLSX \uD30C\uC77C: xl/workbook.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
15953
15953
|
}
|
|
15954
15954
|
let sharedStrings = [];
|
|
15955
15955
|
const ssFile = zip.file("xl/sharedStrings.xml");
|
|
@@ -15958,7 +15958,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
15958
15958
|
}
|
|
15959
15959
|
const sheets = parseWorkbook(await workbookFile.async("text"));
|
|
15960
15960
|
if (sheets.length === 0) {
|
|
15961
|
-
throw new (0,
|
|
15961
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("XLSX \uD30C\uC77C\uC5D0 \uC2DC\uD2B8\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
15962
15962
|
}
|
|
15963
15963
|
let relsMap = /* @__PURE__ */ new Map();
|
|
15964
15964
|
const relsFile = zip.file("xl/_rels/workbook.xml.rels");
|
|
@@ -16030,7 +16030,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
16030
16030
|
} catch (e28) {
|
|
16031
16031
|
}
|
|
16032
16032
|
}
|
|
16033
|
-
const markdown =
|
|
16033
|
+
const markdown = _chunkY476BOHIcjs.blocksToMarkdown.call(void 0, blocks);
|
|
16034
16034
|
return { markdown, blocks, metadata, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
16035
16035
|
}
|
|
16036
16036
|
|
|
@@ -16437,11 +16437,11 @@ function processGlobals(records) {
|
|
|
16437
16437
|
let encrypted = false;
|
|
16438
16438
|
const firstBof = records[0];
|
|
16439
16439
|
if (!firstBof || firstBof.opcode !== OP_BOF) {
|
|
16440
|
-
throw new (0,
|
|
16440
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("XLS: \uCCAB \uB808\uCF54\uB4DC\uAC00 BOF\uAC00 \uC544\uB2D8");
|
|
16441
16441
|
}
|
|
16442
16442
|
const bof = decodeBof(firstBof.data);
|
|
16443
16443
|
if (!bof || bof.dt !== DT_GLOBALS) {
|
|
16444
|
-
throw new (0,
|
|
16444
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("XLS: Globals \uC11C\uBE0C\uC2A4\uD2B8\uB9BC BOF \uB204\uB77D");
|
|
16445
16445
|
}
|
|
16446
16446
|
let i = 1;
|
|
16447
16447
|
while (i < records.length) {
|
|
@@ -16556,7 +16556,7 @@ function sheetToBlocks2(sheetName, sheet, sheetIndex) {
|
|
|
16556
16556
|
cellRows.push(row);
|
|
16557
16557
|
}
|
|
16558
16558
|
if (cellRows.length > 0) {
|
|
16559
|
-
const table =
|
|
16559
|
+
const table = _chunkY476BOHIcjs.buildTable.call(void 0, cellRows);
|
|
16560
16560
|
if (table.rows > 0) {
|
|
16561
16561
|
blocks.push({ type: "table", table, pageNumber: sheetIndex + 1 });
|
|
16562
16562
|
}
|
|
@@ -16569,21 +16569,21 @@ async function parseXlsDocument(buffer, options) {
|
|
|
16569
16569
|
try {
|
|
16570
16570
|
cfb = parseLenientCfb(buf);
|
|
16571
16571
|
} catch (e) {
|
|
16572
|
-
throw new (0,
|
|
16572
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)(
|
|
16573
16573
|
`XLS: OLE2 \uC2DC\uADF8\uB2C8\uCC98 \uAC80\uC99D \uC2E4\uD328 \u2014 ${e instanceof Error ? e.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`
|
|
16574
16574
|
);
|
|
16575
16575
|
}
|
|
16576
16576
|
const wb = _nullishCoalesce(cfb.findStream("/Workbook"), () => ( cfb.findStream("/Book")));
|
|
16577
16577
|
if (!wb) {
|
|
16578
|
-
throw new (0,
|
|
16578
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("XLS: Workbook \uC2A4\uD2B8\uB9BC\uC774 \uC5C6\uC74C (BIFF5 \uB610\uB294 \uBE44\uD45C\uC900 \uD30C\uC77C)");
|
|
16579
16579
|
}
|
|
16580
16580
|
const records = readRecords2(wb);
|
|
16581
16581
|
if (records.length === 0) {
|
|
16582
|
-
throw new (0,
|
|
16582
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("XLS: \uC2DC\uADF8\uB2C8\uCC98 \uB808\uCF54\uB4DC\uAC00 \uC5C6\uC74C (Workbook \uC2A4\uD2B8\uB9BC \uC190\uC0C1)");
|
|
16583
16583
|
}
|
|
16584
16584
|
const firstBof = decodeBof(records[0].data);
|
|
16585
16585
|
if (firstBof && firstBof.vers !== 1536) {
|
|
16586
|
-
throw new (0,
|
|
16586
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)(
|
|
16587
16587
|
`XLS: BIFF8(0x0600)\uB9CC \uC9C0\uC6D0 \u2014 \uBCF8 \uD30C\uC77C\uC740 0x${firstBof.vers.toString(16)}`
|
|
16588
16588
|
);
|
|
16589
16589
|
}
|
|
@@ -16643,7 +16643,7 @@ async function parseXlsDocument(buffer, options) {
|
|
|
16643
16643
|
pageCount: totalSheets
|
|
16644
16644
|
};
|
|
16645
16645
|
return {
|
|
16646
|
-
markdown:
|
|
16646
|
+
markdown: _chunkY476BOHIcjs.blocksToMarkdown.call(void 0, allBlocks),
|
|
16647
16647
|
blocks: allBlocks,
|
|
16648
16648
|
metadata,
|
|
16649
16649
|
warnings: warnings.length > 0 ? warnings : void 0
|
|
@@ -17069,7 +17069,7 @@ function getAttr(el, localName3) {
|
|
|
17069
17069
|
return null;
|
|
17070
17070
|
}
|
|
17071
17071
|
function parseXml2(text) {
|
|
17072
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
17072
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkY476BOHIcjs.stripDtd.call(void 0, text), "text/xml");
|
|
17073
17073
|
}
|
|
17074
17074
|
function parseStyles(xml) {
|
|
17075
17075
|
const doc = parseXml2(xml);
|
|
@@ -17388,12 +17388,12 @@ async function extractImages(zip, rels, doc) {
|
|
|
17388
17388
|
return { blocks, images };
|
|
17389
17389
|
}
|
|
17390
17390
|
async function parseDocxDocument(buffer, options) {
|
|
17391
|
-
|
|
17391
|
+
_chunkY476BOHIcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE4);
|
|
17392
17392
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
17393
17393
|
const warnings = [];
|
|
17394
17394
|
const docFile = zip.file("word/document.xml");
|
|
17395
17395
|
if (!docFile) {
|
|
17396
|
-
throw new (0,
|
|
17396
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 DOCX \uD30C\uC77C: word/document.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
17397
17397
|
}
|
|
17398
17398
|
let rels = /* @__PURE__ */ new Map();
|
|
17399
17399
|
const relsFile = zip.file("word/_rels/document.xml.rels");
|
|
@@ -17428,7 +17428,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
17428
17428
|
const doc = parseXml2(docXml);
|
|
17429
17429
|
const body = findElements(doc, "body");
|
|
17430
17430
|
if (body.length === 0) {
|
|
17431
|
-
throw new (0,
|
|
17431
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("DOCX \uBCF8\uBB38(w:body)\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
17432
17432
|
}
|
|
17433
17433
|
const blocks = [];
|
|
17434
17434
|
const bodyEl = body[0];
|
|
@@ -17468,7 +17468,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
17468
17468
|
}
|
|
17469
17469
|
}
|
|
17470
17470
|
const outline = blocks.filter((b) => b.type === "heading").map((b) => ({ level: _nullishCoalesce(b.level, () => ( 2)), text: _nullishCoalesce(b.text, () => ( "")) }));
|
|
17471
|
-
const markdown =
|
|
17471
|
+
const markdown = _chunkY476BOHIcjs.blocksToMarkdown.call(void 0, blocks);
|
|
17472
17472
|
return {
|
|
17473
17473
|
markdown,
|
|
17474
17474
|
blocks,
|
|
@@ -17491,7 +17491,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
17491
17491
|
}
|
|
17492
17492
|
const text = new TextDecoder("utf-8").decode(buffer).replace(/^\uFEFF/, "");
|
|
17493
17493
|
const normalized = text.replace(/ /g, " ");
|
|
17494
|
-
const xml =
|
|
17494
|
+
const xml = _chunkY476BOHIcjs.stripDtd.call(void 0, normalized);
|
|
17495
17495
|
const warnings = [];
|
|
17496
17496
|
const parser = new (0, _xmldom.DOMParser)({
|
|
17497
17497
|
onError: (_level, msg) => {
|
|
@@ -17531,7 +17531,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
17531
17531
|
parseSection2(el, blocks, paraShapeMap, sectionIdx, warnings);
|
|
17532
17532
|
}
|
|
17533
17533
|
const outline = blocks.filter((b) => b.type === "heading" && b.text).map((b) => ({ level: _nullishCoalesce(b.level, () => ( 1)), text: b.text, pageNumber: b.pageNumber }));
|
|
17534
|
-
const markdown =
|
|
17534
|
+
const markdown = _chunkY476BOHIcjs.blocksToMarkdown.call(void 0, blocks);
|
|
17535
17535
|
return {
|
|
17536
17536
|
markdown,
|
|
17537
17537
|
blocks,
|
|
@@ -17673,7 +17673,7 @@ function parseTable2(el, blocks, paraShapeMap, sectionNum, warnings) {
|
|
|
17673
17673
|
const cellRows = grid.map(
|
|
17674
17674
|
(row) => row.map((cell) => _nullishCoalesce(cell, () => ( { text: "", colSpan: 1, rowSpan: 1 })))
|
|
17675
17675
|
);
|
|
17676
|
-
const table =
|
|
17676
|
+
const table = _chunkY476BOHIcjs.buildTable.call(void 0, cellRows);
|
|
17677
17677
|
blocks.push({ type: "table", table, pageNumber: sectionNum });
|
|
17678
17678
|
}
|
|
17679
17679
|
function extractCellText(cellEl) {
|
|
@@ -18074,7 +18074,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
18074
18074
|
const normalizedValues = normalizeValues(values);
|
|
18075
18075
|
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
18076
18076
|
if (sectionFiles.length === 0) {
|
|
18077
|
-
throw new (0,
|
|
18077
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
18078
18078
|
}
|
|
18079
18079
|
const xmlParser = new (0, _xmldom.DOMParser)();
|
|
18080
18080
|
const xmlSerializer = new (0, _xmldom.XMLSerializer)();
|
|
@@ -18082,7 +18082,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
18082
18082
|
const zipEntry = zip.file(sectionPath);
|
|
18083
18083
|
if (!zipEntry) continue;
|
|
18084
18084
|
const rawXml = await zipEntry.async("text");
|
|
18085
|
-
const doc = xmlParser.parseFromString(
|
|
18085
|
+
const doc = xmlParser.parseFromString(_chunkY476BOHIcjs.stripDtd.call(void 0, rawXml), "text/xml");
|
|
18086
18086
|
if (!doc.documentElement) continue;
|
|
18087
18087
|
let modified = false;
|
|
18088
18088
|
const tables = findAllElements(doc.documentElement, "tbl");
|
|
@@ -18298,7 +18298,15 @@ function setRunText(runEl, text) {
|
|
|
18298
18298
|
for (let i = 1; i < tElements.length; i++) {
|
|
18299
18299
|
clearChildren(tElements[i]);
|
|
18300
18300
|
}
|
|
18301
|
+
return;
|
|
18301
18302
|
}
|
|
18303
|
+
if (!text) return;
|
|
18304
|
+
const doc = runEl.ownerDocument;
|
|
18305
|
+
const ns = runEl.namespaceURI;
|
|
18306
|
+
const qualifiedName = runEl.prefix ? `${runEl.prefix}:t` : "t";
|
|
18307
|
+
const tEl = ns ? doc.createElementNS(ns, qualifiedName) : doc.createElement(qualifiedName);
|
|
18308
|
+
tEl.appendChild(doc.createTextNode(text));
|
|
18309
|
+
runEl.appendChild(tEl);
|
|
18302
18310
|
}
|
|
18303
18311
|
function clearChildren(el) {
|
|
18304
18312
|
while (el.firstChild) el.removeChild(el.firstChild);
|
|
@@ -19090,13 +19098,13 @@ async function htmlToPdf(html, options) {
|
|
|
19090
19098
|
try {
|
|
19091
19099
|
puppeteer = await Promise.resolve().then(() => _interopRequireWildcard(require("puppeteer-core")));
|
|
19092
19100
|
} catch (e34) {
|
|
19093
|
-
throw new (0,
|
|
19101
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)(
|
|
19094
19102
|
"PDF \uC0DD\uC131\uC5D0 puppeteer-core\uAC00 \uD544\uC694\uD569\uB2C8\uB2E4. \uC124\uCE58: npm install puppeteer-core"
|
|
19095
19103
|
);
|
|
19096
19104
|
}
|
|
19097
19105
|
const executablePath = _nullishCoalesce(process.env.PUPPETEER_EXECUTABLE_PATH, () => ( findChromiumPath()));
|
|
19098
19106
|
if (!executablePath) {
|
|
19099
|
-
throw new (0,
|
|
19107
|
+
throw new (0, _chunkY476BOHIcjs.KordocError)(
|
|
19100
19108
|
"Chromium \uC2E4\uD589 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4. PUPPETEER_EXECUTABLE_PATH \uD658\uACBD\uBCC0\uC218\uB97C \uC124\uC815\uD558\uC138\uC694."
|
|
19101
19109
|
);
|
|
19102
19110
|
}
|
|
@@ -19155,7 +19163,7 @@ async function markdownToPdf(markdown, options) {
|
|
|
19155
19163
|
return htmlToPdf(html, options);
|
|
19156
19164
|
}
|
|
19157
19165
|
async function blocksToPdf(blocks, options) {
|
|
19158
|
-
const markdown =
|
|
19166
|
+
const markdown = _chunkY476BOHIcjs.blocksToMarkdown.call(void 0, blocks);
|
|
19159
19167
|
return markdownToPdf(markdown, options);
|
|
19160
19168
|
}
|
|
19161
19169
|
|
|
@@ -19166,13 +19174,13 @@ async function parse(input, options) {
|
|
|
19166
19174
|
if (typeof input === "string") {
|
|
19167
19175
|
try {
|
|
19168
19176
|
const buf = await _promises.readFile.call(void 0, input);
|
|
19169
|
-
buffer =
|
|
19177
|
+
buffer = _chunkY476BOHIcjs.toArrayBuffer.call(void 0, buf);
|
|
19170
19178
|
} catch (err) {
|
|
19171
19179
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
19172
19180
|
return { success: false, fileType: "unknown", error: msg, code: "PARSE_ERROR" };
|
|
19173
19181
|
}
|
|
19174
19182
|
} else if (Buffer.isBuffer(input)) {
|
|
19175
|
-
buffer =
|
|
19183
|
+
buffer = _chunkY476BOHIcjs.toArrayBuffer.call(void 0, input);
|
|
19176
19184
|
} else {
|
|
19177
19185
|
buffer = input;
|
|
19178
19186
|
}
|
|
@@ -19207,7 +19215,7 @@ async function parseHwp3(buffer, options) {
|
|
|
19207
19215
|
const { markdown, blocks, metadata, outline, warnings } = parseHwp3Document(buffer, options);
|
|
19208
19216
|
return { success: true, fileType: "hwp3", markdown, blocks, metadata, outline, warnings };
|
|
19209
19217
|
} catch (err) {
|
|
19210
|
-
return { success: false, fileType: "hwp3", error: err instanceof Error ? err.message : "HWP3 \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19218
|
+
return { success: false, fileType: "hwp3", error: err instanceof Error ? err.message : "HWP3 \uD30C\uC2F1 \uC2E4\uD328", code: _chunkY476BOHIcjs.classifyError.call(void 0, err) };
|
|
19211
19219
|
}
|
|
19212
19220
|
}
|
|
19213
19221
|
async function parseHwpx(buffer, options) {
|
|
@@ -19215,7 +19223,7 @@ async function parseHwpx(buffer, options) {
|
|
|
19215
19223
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseHwpxDocument(buffer, options);
|
|
19216
19224
|
return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _104 => _104.length]) ? images : void 0 };
|
|
19217
19225
|
} catch (err) {
|
|
19218
|
-
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19226
|
+
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkY476BOHIcjs.classifyError.call(void 0, err) };
|
|
19219
19227
|
}
|
|
19220
19228
|
}
|
|
19221
19229
|
async function parseHwp(buffer, options) {
|
|
@@ -19240,13 +19248,13 @@ async function parseHwp(buffer, options) {
|
|
|
19240
19248
|
}
|
|
19241
19249
|
return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _106 => _106.length]) ? images : void 0 };
|
|
19242
19250
|
} catch (err) {
|
|
19243
|
-
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19251
|
+
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code: _chunkY476BOHIcjs.classifyError.call(void 0, err) };
|
|
19244
19252
|
}
|
|
19245
19253
|
}
|
|
19246
19254
|
async function parsePdf(buffer, options) {
|
|
19247
19255
|
let parsePdfDocument;
|
|
19248
19256
|
try {
|
|
19249
|
-
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-
|
|
19257
|
+
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-7OFQ67QL.cjs")));
|
|
19250
19258
|
parsePdfDocument = mod.parsePdfDocument;
|
|
19251
19259
|
} catch (e36) {
|
|
19252
19260
|
return {
|
|
@@ -19261,7 +19269,7 @@ async function parsePdf(buffer, options) {
|
|
|
19261
19269
|
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
|
|
19262
19270
|
} catch (err) {
|
|
19263
19271
|
const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
|
|
19264
|
-
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19272
|
+
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: _chunkY476BOHIcjs.classifyError.call(void 0, err), isImageBased };
|
|
19265
19273
|
}
|
|
19266
19274
|
}
|
|
19267
19275
|
async function parseXlsx(buffer, options) {
|
|
@@ -19269,7 +19277,7 @@ async function parseXlsx(buffer, options) {
|
|
|
19269
19277
|
const { markdown, blocks, metadata, warnings } = await parseXlsxDocument(buffer, options);
|
|
19270
19278
|
return { success: true, fileType: "xlsx", markdown, blocks, metadata, warnings };
|
|
19271
19279
|
} catch (err) {
|
|
19272
|
-
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19280
|
+
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkY476BOHIcjs.classifyError.call(void 0, err) };
|
|
19273
19281
|
}
|
|
19274
19282
|
}
|
|
19275
19283
|
async function parseXls(buffer, options) {
|
|
@@ -19277,7 +19285,7 @@ async function parseXls(buffer, options) {
|
|
|
19277
19285
|
const { markdown, blocks, metadata, warnings } = await parseXlsDocument(buffer, options);
|
|
19278
19286
|
return { success: true, fileType: "xls", markdown, blocks, metadata, warnings };
|
|
19279
19287
|
} catch (err) {
|
|
19280
|
-
return { success: false, fileType: "xls", error: err instanceof Error ? err.message : "XLS \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19288
|
+
return { success: false, fileType: "xls", error: err instanceof Error ? err.message : "XLS \uD30C\uC2F1 \uC2E4\uD328", code: _chunkY476BOHIcjs.classifyError.call(void 0, err) };
|
|
19281
19289
|
}
|
|
19282
19290
|
}
|
|
19283
19291
|
async function parseDocx(buffer, options) {
|
|
@@ -19285,7 +19293,7 @@ async function parseDocx(buffer, options) {
|
|
|
19285
19293
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseDocxDocument(buffer, options);
|
|
19286
19294
|
return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _107 => _107.length]) ? images : void 0 };
|
|
19287
19295
|
} catch (err) {
|
|
19288
|
-
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19296
|
+
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkY476BOHIcjs.classifyError.call(void 0, err) };
|
|
19289
19297
|
}
|
|
19290
19298
|
}
|
|
19291
19299
|
async function parseHwpml(buffer, options) {
|
|
@@ -19293,16 +19301,16 @@ async function parseHwpml(buffer, options) {
|
|
|
19293
19301
|
const { markdown, blocks, metadata, outline, warnings } = parseHwpmlDocument(buffer, options);
|
|
19294
19302
|
return { success: true, fileType: "hwpml", markdown, blocks, metadata, outline, warnings };
|
|
19295
19303
|
} catch (err) {
|
|
19296
|
-
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
19304
|
+
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code: _chunkY476BOHIcjs.classifyError.call(void 0, err) };
|
|
19297
19305
|
}
|
|
19298
19306
|
}
|
|
19299
19307
|
async function fillForm(input, values, outputFormat = "markdown") {
|
|
19300
19308
|
let buffer;
|
|
19301
19309
|
if (typeof input === "string") {
|
|
19302
19310
|
const buf = await _promises.readFile.call(void 0, input);
|
|
19303
|
-
buffer =
|
|
19311
|
+
buffer = _chunkY476BOHIcjs.toArrayBuffer.call(void 0, buf);
|
|
19304
19312
|
} else if (Buffer.isBuffer(input)) {
|
|
19305
|
-
buffer =
|
|
19313
|
+
buffer = _chunkY476BOHIcjs.toArrayBuffer.call(void 0, input);
|
|
19306
19314
|
} else {
|
|
19307
19315
|
buffer = input;
|
|
19308
19316
|
}
|
|
@@ -19328,7 +19336,7 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
19328
19336
|
throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
|
|
19329
19337
|
}
|
|
19330
19338
|
const fill = fillFormFields(parsed.blocks, values);
|
|
19331
|
-
const markdown =
|
|
19339
|
+
const markdown = _chunkY476BOHIcjs.blocksToMarkdown.call(void 0, fill.blocks);
|
|
19332
19340
|
if (outputFormat === "hwpx") {
|
|
19333
19341
|
const hwpxBuffer = await markdownToHwpx(markdown);
|
|
19334
19342
|
return { output: hwpxBuffer, format: "hwpx", fill };
|
|
@@ -19365,5 +19373,5 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
19365
19373
|
|
|
19366
19374
|
|
|
19367
19375
|
|
|
19368
|
-
exports.VERSION =
|
|
19376
|
+
exports.VERSION = _chunkY476BOHIcjs.VERSION; exports.blocksToMarkdown = _chunkY476BOHIcjs.blocksToMarkdown; exports.blocksToPdf = blocksToPdf; exports.compare = compare; exports.detectFormat = detectFormat; exports.detectOle2Format = detectOle2Format; exports.detectZipFormat = detectZipFormat; exports.diffBlocks = diffBlocks; exports.extractFormFields = extractFormFields; exports.fillForm = fillForm; exports.fillFormFields = fillFormFields; exports.fillHwpx = fillHwpx; exports.isHwpxFile = isHwpxFile; exports.isLabelCell = isLabelCell; exports.isOldHwpFile = isOldHwpFile; exports.isPdfFile = isPdfFile; exports.isZipFile = isZipFile; exports.markdownToHwpx = markdownToHwpx; exports.markdownToPdf = markdownToPdf; exports.parse = parse; exports.parseDocx = parseDocx; exports.parseHwp = parseHwp; exports.parseHwp3 = parseHwp3; exports.parseHwpml = parseHwpml; exports.parseHwpx = parseHwpx; exports.parsePdf = parsePdf; exports.parseXls = parseXls; exports.parseXlsx = parseXlsx; exports.renderHtml = renderHtml;
|
|
19369
19377
|
//# sourceMappingURL=index.cjs.map
|