kordoc 2.2.5 → 2.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-JH5XLWJQ.js → chunk-FCQEF2ZM.js} +2 -2
- package/dist/{chunk-OJ4QR33V.cjs → chunk-HXUCZ2IL.cjs} +2 -2
- package/dist/{chunk-OJ4QR33V.cjs.map → chunk-HXUCZ2IL.cjs.map} +1 -1
- package/dist/{chunk-UU2O6D3R.js → chunk-NL5XLN5R.js} +2 -2
- package/dist/{chunk-RQWICKON.js → chunk-RF6UJXR3.js} +3 -3
- package/dist/cli.js +6 -3
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +81 -81
- package/dist/index.js +2 -2
- package/dist/mcp.js +3 -3
- package/dist/{parser-OIRWPKIQ.js → parser-43IAQ5KE.js} +2 -2
- package/dist/{parser-PXD73E4H.js → parser-AMP7MAOH.js} +2 -2
- package/dist/{parser-CYBX5MP4.cjs → parser-KOWPTDJU.cjs} +15 -15
- package/dist/{parser-CYBX5MP4.cjs.map → parser-KOWPTDJU.cjs.map} +1 -1
- package/dist/{watch-NSBABJ4A.js → watch-IUQXOXW3.js} +3 -3
- package/package.json +1 -1
- /package/dist/{chunk-JH5XLWJQ.js.map → chunk-FCQEF2ZM.js.map} +0 -0
- /package/dist/{chunk-UU2O6D3R.js.map → chunk-NL5XLN5R.js.map} +0 -0
- /package/dist/{chunk-RQWICKON.js.map → chunk-RF6UJXR3.js.map} +0 -0
- /package/dist/{parser-OIRWPKIQ.js.map → parser-43IAQ5KE.js.map} +0 -0
- /package/dist/{parser-PXD73E4H.js.map → parser-AMP7MAOH.js.map} +0 -0
- /package/dist/{watch-NSBABJ4A.js.map → watch-IUQXOXW3.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
var
|
|
19
|
+
var _chunkHXUCZ2ILcjs = require('./chunk-HXUCZ2IL.cjs');
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -78,7 +78,7 @@ var MAX_XML_DEPTH = 200;
|
|
|
78
78
|
function createXmlParser(warnings) {
|
|
79
79
|
return new (0, _xmldom.DOMParser)({
|
|
80
80
|
onError(level, msg) {
|
|
81
|
-
if (level === "fatalError") throw new (0,
|
|
81
|
+
if (level === "fatalError") throw new (0, _chunkHXUCZ2ILcjs.KordocError)(`XML \uD30C\uC2F1 \uC2E4\uD328: ${msg}`);
|
|
82
82
|
_optionalChain([warnings, 'optionalAccess', _2 => _2.push, 'call', _3 => _3({ code: "MALFORMED_XML", message: `XML ${level === "warn" ? "\uACBD\uACE0" : "\uC624\uB958"}: ${msg}` })]);
|
|
83
83
|
}
|
|
84
84
|
});
|
|
@@ -97,10 +97,10 @@ async function extractHwpxStyles(zip, decompressed) {
|
|
|
97
97
|
const xml = await file.async("text");
|
|
98
98
|
if (decompressed) {
|
|
99
99
|
decompressed.total += xml.length * 2;
|
|
100
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
100
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
101
101
|
}
|
|
102
102
|
const parser = createXmlParser();
|
|
103
|
-
const doc = parser.parseFromString(
|
|
103
|
+
const doc = parser.parseFromString(_chunkHXUCZ2ILcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
104
104
|
if (!doc.documentElement) continue;
|
|
105
105
|
parseCharProperties(doc, result.charProperties);
|
|
106
106
|
parseStyleElements(doc, result.styles);
|
|
@@ -162,7 +162,7 @@ function parseStyleElements(doc, map) {
|
|
|
162
162
|
}
|
|
163
163
|
}
|
|
164
164
|
async function parseHwpxDocument(buffer, options) {
|
|
165
|
-
|
|
165
|
+
_chunkHXUCZ2ILcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
|
|
166
166
|
let zip;
|
|
167
167
|
try {
|
|
168
168
|
zip = await _jszip2.default.loadAsync(buffer);
|
|
@@ -171,7 +171,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
171
171
|
}
|
|
172
172
|
const actualEntryCount = Object.keys(zip.files).length;
|
|
173
173
|
if (actualEntryCount > MAX_ZIP_ENTRIES) {
|
|
174
|
-
throw new (0,
|
|
174
|
+
throw new (0, _chunkHXUCZ2ILcjs.KordocError)("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
175
175
|
}
|
|
176
176
|
const decompressed = { total: 0 };
|
|
177
177
|
const metadata = {};
|
|
@@ -179,7 +179,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
179
179
|
const styleMap = await extractHwpxStyles(zip, decompressed);
|
|
180
180
|
const warnings = [];
|
|
181
181
|
const sectionPaths = await resolveSectionPaths(zip);
|
|
182
|
-
if (sectionPaths.length === 0) throw new (0,
|
|
182
|
+
if (sectionPaths.length === 0) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
183
183
|
metadata.pageCount = sectionPaths.length;
|
|
184
184
|
const pageFilter = _optionalChain([options, 'optionalAccess', _4 => _4.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sectionPaths.length) : null;
|
|
185
185
|
const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
|
|
@@ -192,19 +192,19 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
192
192
|
try {
|
|
193
193
|
const xml = await file.async("text");
|
|
194
194
|
decompressed.total += xml.length * 2;
|
|
195
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
195
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
196
196
|
blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1));
|
|
197
197
|
parsedSections++;
|
|
198
198
|
_optionalChain([options, 'optionalAccess', _5 => _5.onProgress, 'optionalCall', _6 => _6(parsedSections, totalTarget)]);
|
|
199
199
|
} catch (secErr) {
|
|
200
|
-
if (secErr instanceof
|
|
200
|
+
if (secErr instanceof _chunkHXUCZ2ILcjs.KordocError) throw secErr;
|
|
201
201
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
202
202
|
}
|
|
203
203
|
}
|
|
204
204
|
const images = await extractImagesFromZip(zip, blocks, decompressed, warnings);
|
|
205
205
|
detectHwpxHeadings(blocks, styleMap);
|
|
206
206
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
207
|
-
const markdown =
|
|
207
|
+
const markdown = _chunkHXUCZ2ILcjs.blocksToMarkdown.call(void 0, blocks);
|
|
208
208
|
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
209
209
|
}
|
|
210
210
|
function imageExtToMime(ext) {
|
|
@@ -256,13 +256,13 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
256
256
|
];
|
|
257
257
|
let found = false;
|
|
258
258
|
for (const path of candidates) {
|
|
259
|
-
if (
|
|
259
|
+
if (_chunkHXUCZ2ILcjs.isPathTraversal.call(void 0, path)) continue;
|
|
260
260
|
const file = zip.file(path);
|
|
261
261
|
if (!file) continue;
|
|
262
262
|
try {
|
|
263
263
|
const data = await file.async("uint8array");
|
|
264
264
|
decompressed.total += data.length;
|
|
265
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
265
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
266
266
|
const ext = ref.includes(".") ? ref.split(".").pop() || "png" : "png";
|
|
267
267
|
const mimeType = imageExtToMime(ext);
|
|
268
268
|
imageIndex++;
|
|
@@ -273,7 +273,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
273
273
|
found = true;
|
|
274
274
|
break;
|
|
275
275
|
} catch (err) {
|
|
276
|
-
if (err instanceof
|
|
276
|
+
if (err instanceof _chunkHXUCZ2ILcjs.KordocError) throw err;
|
|
277
277
|
}
|
|
278
278
|
}
|
|
279
279
|
if (!found) {
|
|
@@ -293,7 +293,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
293
293
|
const xml = await file.async("text");
|
|
294
294
|
if (decompressed) {
|
|
295
295
|
decompressed.total += xml.length * 2;
|
|
296
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
296
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
297
297
|
}
|
|
298
298
|
parseDublinCoreMetadata(xml, metadata);
|
|
299
299
|
if (metadata.title || metadata.author) return;
|
|
@@ -303,7 +303,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
303
303
|
}
|
|
304
304
|
function parseDublinCoreMetadata(xml, metadata) {
|
|
305
305
|
const parser = createXmlParser();
|
|
306
|
-
const doc = parser.parseFromString(
|
|
306
|
+
const doc = parser.parseFromString(_chunkHXUCZ2ILcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
307
307
|
if (!doc.documentElement) return;
|
|
308
308
|
const getText = (tagNames) => {
|
|
309
309
|
for (const tag of tagNames) {
|
|
@@ -362,7 +362,7 @@ function extractFromBrokenZip(buffer) {
|
|
|
362
362
|
}
|
|
363
363
|
const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
|
|
364
364
|
const name = new TextDecoder().decode(nameBytes);
|
|
365
|
-
if (
|
|
365
|
+
if (_chunkHXUCZ2ILcjs.isPathTraversal.call(void 0, name)) {
|
|
366
366
|
pos = fileStart + compSize;
|
|
367
367
|
continue;
|
|
368
368
|
}
|
|
@@ -380,15 +380,15 @@ function extractFromBrokenZip(buffer) {
|
|
|
380
380
|
continue;
|
|
381
381
|
}
|
|
382
382
|
totalDecompressed += content.length * 2;
|
|
383
|
-
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
383
|
+
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
|
|
384
384
|
sectionNum++;
|
|
385
385
|
blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum));
|
|
386
386
|
} catch (e6) {
|
|
387
387
|
continue;
|
|
388
388
|
}
|
|
389
389
|
}
|
|
390
|
-
if (blocks.length === 0) throw new (0,
|
|
391
|
-
const markdown =
|
|
390
|
+
if (blocks.length === 0) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
391
|
+
const markdown = _chunkHXUCZ2ILcjs.blocksToMarkdown.call(void 0, blocks);
|
|
392
392
|
return { markdown, blocks, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
393
393
|
}
|
|
394
394
|
async function resolveSectionPaths(zip) {
|
|
@@ -406,7 +406,7 @@ async function resolveSectionPaths(zip) {
|
|
|
406
406
|
}
|
|
407
407
|
function parseSectionPathsFromManifest(xml) {
|
|
408
408
|
const parser = createXmlParser();
|
|
409
|
-
const doc = parser.parseFromString(
|
|
409
|
+
const doc = parser.parseFromString(_chunkHXUCZ2ILcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
410
410
|
const items = doc.getElementsByTagName("opf:item");
|
|
411
411
|
const spine = doc.getElementsByTagName("opf:itemref");
|
|
412
412
|
const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
|
|
@@ -453,9 +453,9 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
453
453
|
let level = 0;
|
|
454
454
|
if (baseFontSize > 0 && _optionalChain([block, 'access', _15 => _15.style, 'optionalAccess', _16 => _16.fontSize])) {
|
|
455
455
|
const ratio = block.style.fontSize / baseFontSize;
|
|
456
|
-
if (ratio >=
|
|
457
|
-
else if (ratio >=
|
|
458
|
-
else if (ratio >=
|
|
456
|
+
if (ratio >= _chunkHXUCZ2ILcjs.HEADING_RATIO_H1) level = 1;
|
|
457
|
+
else if (ratio >= _chunkHXUCZ2ILcjs.HEADING_RATIO_H2) level = 2;
|
|
458
|
+
else if (ratio >= _chunkHXUCZ2ILcjs.HEADING_RATIO_H3) level = 3;
|
|
459
459
|
}
|
|
460
460
|
const compactText = text.replace(/\s+/g, "");
|
|
461
461
|
if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
|
|
@@ -469,7 +469,7 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
469
469
|
}
|
|
470
470
|
function parseSectionXml(xml, styleMap, warnings, sectionNum) {
|
|
471
471
|
const parser = createXmlParser(warnings);
|
|
472
|
-
const doc = parser.parseFromString(
|
|
472
|
+
const doc = parser.parseFromString(_chunkHXUCZ2ILcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
473
473
|
if (!doc.documentElement) return [];
|
|
474
474
|
const blocks = [];
|
|
475
475
|
walkSection(doc.documentElement, blocks, null, [], styleMap, warnings, sectionNum);
|
|
@@ -513,16 +513,16 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
|
|
|
513
513
|
let nestedCols = 0;
|
|
514
514
|
for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
|
|
515
515
|
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
516
|
-
blocks.push({ type: "table", table:
|
|
516
|
+
blocks.push({ type: "table", table: _chunkHXUCZ2ILcjs.buildTable.call(void 0, newTable.rows), pageNumber: sectionNum });
|
|
517
517
|
} else {
|
|
518
|
-
const nestedText =
|
|
518
|
+
const nestedText = _chunkHXUCZ2ILcjs.convertTableToText.call(void 0, newTable.rows);
|
|
519
519
|
if (parentTable.cell) {
|
|
520
520
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
|
|
521
521
|
}
|
|
522
522
|
}
|
|
523
523
|
tableCtx = parentTable;
|
|
524
524
|
} else {
|
|
525
|
-
blocks.push({ type: "table", table:
|
|
525
|
+
blocks.push({ type: "table", table: _chunkHXUCZ2ILcjs.buildTable.call(void 0, newTable.rows), pageNumber: sectionNum });
|
|
526
526
|
tableCtx = null;
|
|
527
527
|
}
|
|
528
528
|
} else {
|
|
@@ -562,8 +562,8 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
|
|
|
562
562
|
const cs = isNaN(rawCs) ? 1 : rawCs;
|
|
563
563
|
const rawRs = parseInt(el.getAttribute("rowSpan") || "1", 10);
|
|
564
564
|
const rs = isNaN(rawRs) ? 1 : rawRs;
|
|
565
|
-
tableCtx.cell.colSpan = clampSpan(cs,
|
|
566
|
-
tableCtx.cell.rowSpan = clampSpan(rs,
|
|
565
|
+
tableCtx.cell.colSpan = clampSpan(cs, _chunkHXUCZ2ILcjs.MAX_COLS);
|
|
566
|
+
tableCtx.cell.rowSpan = clampSpan(rs, _chunkHXUCZ2ILcjs.MAX_ROWS);
|
|
567
567
|
}
|
|
568
568
|
break;
|
|
569
569
|
case "p": {
|
|
@@ -623,16 +623,16 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
|
|
|
623
623
|
let nestedCols = 0;
|
|
624
624
|
for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
|
|
625
625
|
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
626
|
-
blocks.push({ type: "table", table:
|
|
626
|
+
blocks.push({ type: "table", table: _chunkHXUCZ2ILcjs.buildTable.call(void 0, newTable.rows), pageNumber: sectionNum });
|
|
627
627
|
} else {
|
|
628
|
-
const nestedText =
|
|
628
|
+
const nestedText = _chunkHXUCZ2ILcjs.convertTableToText.call(void 0, newTable.rows);
|
|
629
629
|
if (parentTable.cell) {
|
|
630
630
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
|
|
631
631
|
}
|
|
632
632
|
}
|
|
633
633
|
tableCtx = parentTable;
|
|
634
634
|
} else {
|
|
635
|
-
blocks.push({ type: "table", table:
|
|
635
|
+
blocks.push({ type: "table", table: _chunkHXUCZ2ILcjs.buildTable.call(void 0, newTable.rows), pageNumber: sectionNum });
|
|
636
636
|
tableCtx = null;
|
|
637
637
|
}
|
|
638
638
|
} else {
|
|
@@ -740,7 +740,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
740
740
|
case "hyperlink": {
|
|
741
741
|
const url = child.getAttribute("url") || child.getAttribute("href") || "";
|
|
742
742
|
if (url) {
|
|
743
|
-
const safe =
|
|
743
|
+
const safe = _chunkHXUCZ2ILcjs.sanitizeHref.call(void 0, url);
|
|
744
744
|
if (safe) href = safe;
|
|
745
745
|
}
|
|
746
746
|
walk(child);
|
|
@@ -880,7 +880,7 @@ function decompressStream(data) {
|
|
|
880
880
|
return _zlib.inflateRawSync.call(void 0, data, opts);
|
|
881
881
|
}
|
|
882
882
|
function parseFileHeader(data) {
|
|
883
|
-
if (data.length < 40) throw new (0,
|
|
883
|
+
if (data.length < 40) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("FileHeader\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 40\uBC14\uC774\uD2B8)");
|
|
884
884
|
const sig = data.subarray(0, 32).toString("utf8").replace(/\0+$/, "");
|
|
885
885
|
return {
|
|
886
886
|
signature: sig,
|
|
@@ -1899,7 +1899,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
1899
1899
|
lenientCfb = parseLenientCfb(buffer);
|
|
1900
1900
|
warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
|
|
1901
1901
|
} catch (e11) {
|
|
1902
|
-
throw new (0,
|
|
1902
|
+
throw new (0, _chunkHXUCZ2ILcjs.KordocError)("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
|
|
1903
1903
|
}
|
|
1904
1904
|
}
|
|
1905
1905
|
const findStream = (path) => {
|
|
@@ -1910,11 +1910,11 @@ function parseHwp5Document(buffer, options) {
|
|
|
1910
1910
|
return lenientCfb.findStream(path);
|
|
1911
1911
|
};
|
|
1912
1912
|
const headerData = findStream("/FileHeader");
|
|
1913
|
-
if (!headerData) throw new (0,
|
|
1913
|
+
if (!headerData) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
|
|
1914
1914
|
const header = parseFileHeader(headerData);
|
|
1915
|
-
if (header.signature !== "HWP Document File") throw new (0,
|
|
1916
|
-
if (header.flags & FLAG_ENCRYPTED) throw new (0,
|
|
1917
|
-
if (header.flags & FLAG_DRM) throw new (0,
|
|
1915
|
+
if (header.signature !== "HWP Document File") throw new (0, _chunkHXUCZ2ILcjs.KordocError)("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
1916
|
+
if (header.flags & FLAG_ENCRYPTED) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
1917
|
+
if (header.flags & FLAG_DRM) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
1918
1918
|
const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
|
|
1919
1919
|
const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
|
|
1920
1920
|
const metadata = {
|
|
@@ -1923,7 +1923,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
1923
1923
|
if (cfb) extractHwp5Metadata(cfb, metadata);
|
|
1924
1924
|
const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
|
|
1925
1925
|
const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
|
|
1926
|
-
if (sections.length === 0) throw new (0,
|
|
1926
|
+
if (sections.length === 0) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
1927
1927
|
metadata.pageCount = sections.length;
|
|
1928
1928
|
const pageFilter = _optionalChain([options, 'optionalAccess', _21 => _21.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sections.length) : null;
|
|
1929
1929
|
const totalTarget = pageFilter ? pageFilter.size : sections.length;
|
|
@@ -1936,24 +1936,24 @@ function parseHwp5Document(buffer, options) {
|
|
|
1936
1936
|
const sectionData = sections[si];
|
|
1937
1937
|
const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
1938
1938
|
totalDecompressed += data.length;
|
|
1939
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
1939
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
1940
1940
|
const records = readRecords(data);
|
|
1941
1941
|
const sectionBlocks = parseSection(records, docInfo, warnings, si + 1);
|
|
1942
1942
|
blocks.push(...sectionBlocks);
|
|
1943
1943
|
parsedSections++;
|
|
1944
1944
|
_optionalChain([options, 'optionalAccess', _22 => _22.onProgress, 'optionalCall', _23 => _23(parsedSections, totalTarget)]);
|
|
1945
1945
|
} catch (secErr) {
|
|
1946
|
-
if (secErr instanceof
|
|
1946
|
+
if (secErr instanceof _chunkHXUCZ2ILcjs.KordocError) throw secErr;
|
|
1947
1947
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
1948
1948
|
}
|
|
1949
1949
|
}
|
|
1950
1950
|
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
1951
|
-
const flatBlocks =
|
|
1951
|
+
const flatBlocks = _chunkHXUCZ2ILcjs.flattenLayoutTables.call(void 0, blocks);
|
|
1952
1952
|
if (docInfo) {
|
|
1953
1953
|
detectHwp5Headings(flatBlocks, docInfo);
|
|
1954
1954
|
}
|
|
1955
1955
|
const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
1956
|
-
const markdown =
|
|
1956
|
+
const markdown = _chunkHXUCZ2ILcjs.blocksToMarkdown.call(void 0, flatBlocks);
|
|
1957
1957
|
return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
1958
1958
|
}
|
|
1959
1959
|
function parseDocInfoStream(cfb, compressed) {
|
|
@@ -2013,9 +2013,9 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2013
2013
|
let level = 0;
|
|
2014
2014
|
if (_optionalChain([block, 'access', _28 => _28.style, 'optionalAccess', _29 => _29.fontSize]) && baseFontSize > 0) {
|
|
2015
2015
|
const ratio = block.style.fontSize / baseFontSize;
|
|
2016
|
-
if (ratio >=
|
|
2017
|
-
else if (ratio >=
|
|
2018
|
-
else if (ratio >=
|
|
2016
|
+
if (ratio >= _chunkHXUCZ2ILcjs.HEADING_RATIO_H1) level = 1;
|
|
2017
|
+
else if (ratio >= _chunkHXUCZ2ILcjs.HEADING_RATIO_H2) level = 2;
|
|
2018
|
+
else if (ratio >= _chunkHXUCZ2ILcjs.HEADING_RATIO_H3) level = 3;
|
|
2019
2019
|
}
|
|
2020
2020
|
if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
|
|
2021
2021
|
if (level === 0) level = 2;
|
|
@@ -2100,7 +2100,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2100
2100
|
if (!raw) break;
|
|
2101
2101
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2102
2102
|
totalDecompressed += content.length;
|
|
2103
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2103
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2104
2104
|
sections.push({ idx: i, content });
|
|
2105
2105
|
}
|
|
2106
2106
|
if (sections.length === 0) {
|
|
@@ -2112,7 +2112,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2112
2112
|
if (raw) {
|
|
2113
2113
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2114
2114
|
totalDecompressed += content.length;
|
|
2115
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2115
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2116
2116
|
sections.push({ idx, content });
|
|
2117
2117
|
}
|
|
2118
2118
|
}
|
|
@@ -2129,7 +2129,7 @@ function findViewTextSectionsLenient(lcfb, compressed) {
|
|
|
2129
2129
|
try {
|
|
2130
2130
|
const content = decryptViewText(raw, compressed);
|
|
2131
2131
|
totalDecompressed += content.length;
|
|
2132
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2132
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkHXUCZ2ILcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2133
2133
|
sections.push({ idx: i, content });
|
|
2134
2134
|
} catch (e16) {
|
|
2135
2135
|
break;
|
|
@@ -2317,7 +2317,7 @@ function parseSection(records, docInfo, warnings, sectionNum) {
|
|
|
2317
2317
|
if (url && blocks.length > 0) {
|
|
2318
2318
|
const lastBlock = blocks[blocks.length - 1];
|
|
2319
2319
|
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
2320
|
-
lastBlock.href = _nullishCoalesce(
|
|
2320
|
+
lastBlock.href = _nullishCoalesce(_chunkHXUCZ2ILcjs.sanitizeHref.call(void 0, url), () => ( void 0));
|
|
2321
2321
|
}
|
|
2322
2322
|
}
|
|
2323
2323
|
}
|
|
@@ -2435,8 +2435,8 @@ function parseTableBlock(records, startIdx) {
|
|
|
2435
2435
|
if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
|
|
2436
2436
|
if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
|
|
2437
2437
|
if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
|
|
2438
|
-
rows = Math.min(rec.data.readUInt16LE(4),
|
|
2439
|
-
cols = Math.min(rec.data.readUInt16LE(6),
|
|
2438
|
+
rows = Math.min(rec.data.readUInt16LE(4), _chunkHXUCZ2ILcjs.MAX_ROWS);
|
|
2439
|
+
cols = Math.min(rec.data.readUInt16LE(6), _chunkHXUCZ2ILcjs.MAX_COLS);
|
|
2440
2440
|
}
|
|
2441
2441
|
if (rec.tagId === TAG_LIST_HEADER) {
|
|
2442
2442
|
const { cell, nextIdx } = parseCellBlock(records, i, tableLevel);
|
|
@@ -2458,7 +2458,7 @@ function parseTableBlock(records, startIdx) {
|
|
|
2458
2458
|
return { table: { rows, cols, cells: irCells, hasHeader: rows > 1 }, nextIdx: i };
|
|
2459
2459
|
}
|
|
2460
2460
|
const cellRows = arrangeCells(rows, cols, cells);
|
|
2461
|
-
return { table:
|
|
2461
|
+
return { table: _chunkHXUCZ2ILcjs.buildTable.call(void 0, cellRows), nextIdx: i };
|
|
2462
2462
|
}
|
|
2463
2463
|
function parseCellBlock(records, startIdx, tableLevel) {
|
|
2464
2464
|
const rec = records[startIdx];
|
|
@@ -2473,8 +2473,8 @@ function parseCellBlock(records, startIdx, tableLevel) {
|
|
|
2473
2473
|
rowAddr = rec.data.readUInt16LE(10);
|
|
2474
2474
|
const cs = rec.data.readUInt16LE(12);
|
|
2475
2475
|
const rs = rec.data.readUInt16LE(14);
|
|
2476
|
-
if (cs > 0) colSpan = Math.min(cs,
|
|
2477
|
-
if (rs > 0) rowSpan = Math.min(rs,
|
|
2476
|
+
if (cs > 0) colSpan = Math.min(cs, _chunkHXUCZ2ILcjs.MAX_COLS);
|
|
2477
|
+
if (rs > 0) rowSpan = Math.min(rs, _chunkHXUCZ2ILcjs.MAX_ROWS);
|
|
2478
2478
|
}
|
|
2479
2479
|
let i = startIdx + 1;
|
|
2480
2480
|
while (i < records.length) {
|
|
@@ -2565,7 +2565,7 @@ function getTextContent(el) {
|
|
|
2565
2565
|
return _nullishCoalesce(_optionalChain([el, 'access', _37 => _37.textContent, 'optionalAccess', _38 => _38.trim, 'call', _39 => _39()]), () => ( ""));
|
|
2566
2566
|
}
|
|
2567
2567
|
function parseXml(text) {
|
|
2568
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
2568
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkHXUCZ2ILcjs.stripDtd.call(void 0, text), "text/xml");
|
|
2569
2569
|
}
|
|
2570
2570
|
function parseSharedStrings(xml) {
|
|
2571
2571
|
const doc = parseXml(xml);
|
|
@@ -2709,7 +2709,7 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2709
2709
|
cellRows.push(row);
|
|
2710
2710
|
}
|
|
2711
2711
|
if (cellRows.length > 0) {
|
|
2712
|
-
const table =
|
|
2712
|
+
const table = _chunkHXUCZ2ILcjs.buildTable.call(void 0, cellRows);
|
|
2713
2713
|
if (table.rows > 0) {
|
|
2714
2714
|
blocks.push({ type: "table", table, pageNumber: sheetIndex + 1 });
|
|
2715
2715
|
}
|
|
@@ -2717,12 +2717,12 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2717
2717
|
return blocks;
|
|
2718
2718
|
}
|
|
2719
2719
|
async function parseXlsxDocument(buffer, options) {
|
|
2720
|
-
|
|
2720
|
+
_chunkHXUCZ2ILcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE3);
|
|
2721
2721
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
2722
2722
|
const warnings = [];
|
|
2723
2723
|
const workbookFile = zip.file("xl/workbook.xml");
|
|
2724
2724
|
if (!workbookFile) {
|
|
2725
|
-
throw new (0,
|
|
2725
|
+
throw new (0, _chunkHXUCZ2ILcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 XLSX \uD30C\uC77C: xl/workbook.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2726
2726
|
}
|
|
2727
2727
|
let sharedStrings = [];
|
|
2728
2728
|
const ssFile = zip.file("xl/sharedStrings.xml");
|
|
@@ -2731,7 +2731,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2731
2731
|
}
|
|
2732
2732
|
const sheets = parseWorkbook(await workbookFile.async("text"));
|
|
2733
2733
|
if (sheets.length === 0) {
|
|
2734
|
-
throw new (0,
|
|
2734
|
+
throw new (0, _chunkHXUCZ2ILcjs.KordocError)("XLSX \uD30C\uC77C\uC5D0 \uC2DC\uD2B8\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2735
2735
|
}
|
|
2736
2736
|
let relsMap = /* @__PURE__ */ new Map();
|
|
2737
2737
|
const relsFile = zip.file("xl/_rels/workbook.xml.rels");
|
|
@@ -2803,7 +2803,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2803
2803
|
} catch (e20) {
|
|
2804
2804
|
}
|
|
2805
2805
|
}
|
|
2806
|
-
const markdown =
|
|
2806
|
+
const markdown = _chunkHXUCZ2ILcjs.blocksToMarkdown.call(void 0, blocks);
|
|
2807
2807
|
return { markdown, blocks, metadata, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
2808
2808
|
}
|
|
2809
2809
|
|
|
@@ -2852,7 +2852,7 @@ function getAttr(el, localName2) {
|
|
|
2852
2852
|
return null;
|
|
2853
2853
|
}
|
|
2854
2854
|
function parseXml2(text) {
|
|
2855
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
2855
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkHXUCZ2ILcjs.stripDtd.call(void 0, text), "text/xml");
|
|
2856
2856
|
}
|
|
2857
2857
|
function parseStyles(xml) {
|
|
2858
2858
|
const doc = parseXml2(xml);
|
|
@@ -3145,12 +3145,12 @@ async function extractImages(zip, rels, doc) {
|
|
|
3145
3145
|
return { blocks, images };
|
|
3146
3146
|
}
|
|
3147
3147
|
async function parseDocxDocument(buffer, options) {
|
|
3148
|
-
|
|
3148
|
+
_chunkHXUCZ2ILcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE4);
|
|
3149
3149
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
3150
3150
|
const warnings = [];
|
|
3151
3151
|
const docFile = zip.file("word/document.xml");
|
|
3152
3152
|
if (!docFile) {
|
|
3153
|
-
throw new (0,
|
|
3153
|
+
throw new (0, _chunkHXUCZ2ILcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 DOCX \uD30C\uC77C: word/document.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3154
3154
|
}
|
|
3155
3155
|
let rels = /* @__PURE__ */ new Map();
|
|
3156
3156
|
const relsFile = zip.file("word/_rels/document.xml.rels");
|
|
@@ -3185,7 +3185,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3185
3185
|
const doc = parseXml2(docXml);
|
|
3186
3186
|
const body = findElements(doc, "body");
|
|
3187
3187
|
if (body.length === 0) {
|
|
3188
|
-
throw new (0,
|
|
3188
|
+
throw new (0, _chunkHXUCZ2ILcjs.KordocError)("DOCX \uBCF8\uBB38(w:body)\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3189
3189
|
}
|
|
3190
3190
|
const blocks = [];
|
|
3191
3191
|
const bodyEl = body[0];
|
|
@@ -3225,7 +3225,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3225
3225
|
}
|
|
3226
3226
|
}
|
|
3227
3227
|
const outline = blocks.filter((b) => b.type === "heading").map((b) => ({ level: _nullishCoalesce(b.level, () => ( 2)), text: _nullishCoalesce(b.text, () => ( "")) }));
|
|
3228
|
-
const markdown =
|
|
3228
|
+
const markdown = _chunkHXUCZ2ILcjs.blocksToMarkdown.call(void 0, blocks);
|
|
3229
3229
|
return {
|
|
3230
3230
|
markdown,
|
|
3231
3231
|
blocks,
|
|
@@ -3578,7 +3578,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
3578
3578
|
const normalizedValues = normalizeValues(values);
|
|
3579
3579
|
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
3580
3580
|
if (sectionFiles.length === 0) {
|
|
3581
|
-
throw new (0,
|
|
3581
|
+
throw new (0, _chunkHXUCZ2ILcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3582
3582
|
}
|
|
3583
3583
|
const xmlParser = new (0, _xmldom.DOMParser)();
|
|
3584
3584
|
const xmlSerializer = new (0, _xmldom.XMLSerializer)();
|
|
@@ -3586,7 +3586,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
3586
3586
|
const zipEntry = zip.file(sectionPath);
|
|
3587
3587
|
if (!zipEntry) continue;
|
|
3588
3588
|
const rawXml = await zipEntry.async("text");
|
|
3589
|
-
const doc = xmlParser.parseFromString(
|
|
3589
|
+
const doc = xmlParser.parseFromString(_chunkHXUCZ2ILcjs.stripDtd.call(void 0, rawXml), "text/xml");
|
|
3590
3590
|
if (!doc.documentElement) continue;
|
|
3591
3591
|
let modified = false;
|
|
3592
3592
|
const tables = findAllElements(doc.documentElement, "tbl");
|
|
@@ -4447,13 +4447,13 @@ async function parse(input, options) {
|
|
|
4447
4447
|
if (typeof input === "string") {
|
|
4448
4448
|
try {
|
|
4449
4449
|
const buf = await _promises.readFile.call(void 0, input);
|
|
4450
|
-
buffer =
|
|
4450
|
+
buffer = _chunkHXUCZ2ILcjs.toArrayBuffer.call(void 0, buf);
|
|
4451
4451
|
} catch (err) {
|
|
4452
4452
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
4453
4453
|
return { success: false, fileType: "unknown", error: msg, code: "PARSE_ERROR" };
|
|
4454
4454
|
}
|
|
4455
4455
|
} else if (Buffer.isBuffer(input)) {
|
|
4456
|
-
buffer =
|
|
4456
|
+
buffer = _chunkHXUCZ2ILcjs.toArrayBuffer.call(void 0, input);
|
|
4457
4457
|
} else {
|
|
4458
4458
|
buffer = input;
|
|
4459
4459
|
}
|
|
@@ -4481,7 +4481,7 @@ async function parseHwpx(buffer, options) {
|
|
|
4481
4481
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseHwpxDocument(buffer, options);
|
|
4482
4482
|
return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _76 => _76.length]) ? images : void 0 };
|
|
4483
4483
|
} catch (err) {
|
|
4484
|
-
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4484
|
+
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkHXUCZ2ILcjs.classifyError.call(void 0, err) };
|
|
4485
4485
|
}
|
|
4486
4486
|
}
|
|
4487
4487
|
async function parseHwp(buffer, options) {
|
|
@@ -4489,13 +4489,13 @@ async function parseHwp(buffer, options) {
|
|
|
4489
4489
|
const { markdown, blocks, metadata, outline, warnings, images } = parseHwp5Document(Buffer.from(buffer), options);
|
|
4490
4490
|
return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _77 => _77.length]) ? images : void 0 };
|
|
4491
4491
|
} catch (err) {
|
|
4492
|
-
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4492
|
+
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code: _chunkHXUCZ2ILcjs.classifyError.call(void 0, err) };
|
|
4493
4493
|
}
|
|
4494
4494
|
}
|
|
4495
4495
|
async function parsePdf(buffer, options) {
|
|
4496
4496
|
let parsePdfDocument;
|
|
4497
4497
|
try {
|
|
4498
|
-
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-
|
|
4498
|
+
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-KOWPTDJU.cjs")));
|
|
4499
4499
|
parsePdfDocument = mod.parsePdfDocument;
|
|
4500
4500
|
} catch (e26) {
|
|
4501
4501
|
return {
|
|
@@ -4510,7 +4510,7 @@ async function parsePdf(buffer, options) {
|
|
|
4510
4510
|
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
|
|
4511
4511
|
} catch (err) {
|
|
4512
4512
|
const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
|
|
4513
|
-
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4513
|
+
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: _chunkHXUCZ2ILcjs.classifyError.call(void 0, err), isImageBased };
|
|
4514
4514
|
}
|
|
4515
4515
|
}
|
|
4516
4516
|
async function parseXlsx(buffer, options) {
|
|
@@ -4518,7 +4518,7 @@ async function parseXlsx(buffer, options) {
|
|
|
4518
4518
|
const { markdown, blocks, metadata, warnings } = await parseXlsxDocument(buffer, options);
|
|
4519
4519
|
return { success: true, fileType: "xlsx", markdown, blocks, metadata, warnings };
|
|
4520
4520
|
} catch (err) {
|
|
4521
|
-
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4521
|
+
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkHXUCZ2ILcjs.classifyError.call(void 0, err) };
|
|
4522
4522
|
}
|
|
4523
4523
|
}
|
|
4524
4524
|
async function parseDocx(buffer, options) {
|
|
@@ -4526,16 +4526,16 @@ async function parseDocx(buffer, options) {
|
|
|
4526
4526
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseDocxDocument(buffer, options);
|
|
4527
4527
|
return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _78 => _78.length]) ? images : void 0 };
|
|
4528
4528
|
} catch (err) {
|
|
4529
|
-
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4529
|
+
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkHXUCZ2ILcjs.classifyError.call(void 0, err) };
|
|
4530
4530
|
}
|
|
4531
4531
|
}
|
|
4532
4532
|
async function fillForm(input, values, outputFormat = "markdown") {
|
|
4533
4533
|
let buffer;
|
|
4534
4534
|
if (typeof input === "string") {
|
|
4535
4535
|
const buf = await _promises.readFile.call(void 0, input);
|
|
4536
|
-
buffer =
|
|
4536
|
+
buffer = _chunkHXUCZ2ILcjs.toArrayBuffer.call(void 0, buf);
|
|
4537
4537
|
} else if (Buffer.isBuffer(input)) {
|
|
4538
|
-
buffer =
|
|
4538
|
+
buffer = _chunkHXUCZ2ILcjs.toArrayBuffer.call(void 0, input);
|
|
4539
4539
|
} else {
|
|
4540
4540
|
buffer = input;
|
|
4541
4541
|
}
|
|
@@ -4561,7 +4561,7 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
4561
4561
|
throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
|
|
4562
4562
|
}
|
|
4563
4563
|
const fill = fillFormFields(parsed.blocks, values);
|
|
4564
|
-
const markdown =
|
|
4564
|
+
const markdown = _chunkHXUCZ2ILcjs.blocksToMarkdown.call(void 0, fill.blocks);
|
|
4565
4565
|
if (outputFormat === "hwpx") {
|
|
4566
4566
|
const hwpxBuffer = await markdownToHwpx(markdown);
|
|
4567
4567
|
return { output: hwpxBuffer, format: "hwpx", fill };
|
|
@@ -4591,5 +4591,5 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
4591
4591
|
|
|
4592
4592
|
|
|
4593
4593
|
|
|
4594
|
-
exports.VERSION =
|
|
4594
|
+
exports.VERSION = _chunkHXUCZ2ILcjs.VERSION; exports.blocksToMarkdown = _chunkHXUCZ2ILcjs.blocksToMarkdown; exports.compare = compare; exports.detectFormat = detectFormat; exports.detectZipFormat = detectZipFormat; exports.diffBlocks = diffBlocks; exports.extractFormFields = extractFormFields; exports.fillForm = fillForm; exports.fillFormFields = fillFormFields; exports.fillHwpx = fillHwpx; exports.isHwpxFile = isHwpxFile; exports.isLabelCell = isLabelCell; exports.isOldHwpFile = isOldHwpFile; exports.isPdfFile = isPdfFile; exports.isZipFile = isZipFile; exports.markdownToHwpx = markdownToHwpx; exports.parse = parse; exports.parseDocx = parseDocx; exports.parseHwp = parseHwp; exports.parseHwpx = parseHwpx; exports.parsePdf = parsePdf; exports.parseXlsx = parseXlsx;
|
|
4595
4595
|
//# sourceMappingURL=index.cjs.map
|
package/dist/index.js
CHANGED
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
sanitizeHref,
|
|
17
17
|
stripDtd,
|
|
18
18
|
toArrayBuffer
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-NL5XLN5R.js";
|
|
20
20
|
import {
|
|
21
21
|
parsePageRange
|
|
22
22
|
} from "./chunk-SBVRCJFH.js";
|
|
@@ -4495,7 +4495,7 @@ async function parseHwp(buffer, options) {
|
|
|
4495
4495
|
async function parsePdf(buffer, options) {
|
|
4496
4496
|
let parsePdfDocument;
|
|
4497
4497
|
try {
|
|
4498
|
-
const mod = await import("./parser-
|
|
4498
|
+
const mod = await import("./parser-43IAQ5KE.js");
|
|
4499
4499
|
parsePdfDocument = mod.parsePdfDocument;
|
|
4500
4500
|
} catch {
|
|
4501
4501
|
return {
|
package/dist/mcp.js
CHANGED
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
fillHwpx,
|
|
9
9
|
markdownToHwpx,
|
|
10
10
|
parse
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-RF6UJXR3.js";
|
|
12
12
|
import {
|
|
13
13
|
detectFormat,
|
|
14
14
|
detectZipFormat
|
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
blocksToMarkdown,
|
|
20
20
|
sanitizeError,
|
|
21
21
|
toArrayBuffer
|
|
22
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-FCQEF2ZM.js";
|
|
23
23
|
import "./chunk-MOL7MDBG.js";
|
|
24
24
|
|
|
25
25
|
// src/mcp.ts
|
|
@@ -191,7 +191,7 @@ server.tool(
|
|
|
191
191
|
break;
|
|
192
192
|
case "pdf":
|
|
193
193
|
try {
|
|
194
|
-
const { extractPdfMetadataOnly } = await import("./parser-
|
|
194
|
+
const { extractPdfMetadataOnly } = await import("./parser-AMP7MAOH.js");
|
|
195
195
|
metadata = await extractPdfMetadataOnly(buffer);
|
|
196
196
|
} catch {
|
|
197
197
|
metadata = void 0;
|