kordoc 2.4.0 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/dist/{chunk-VLSATRNQ.cjs → chunk-IVC5CB2Q.cjs} +2 -2
- package/dist/{chunk-VLSATRNQ.cjs.map → chunk-IVC5CB2Q.cjs.map} +1 -1
- package/dist/{chunk-KSBPABBQ.js → chunk-JFPF7B5L.js} +20 -5
- package/dist/chunk-JFPF7B5L.js.map +1 -0
- package/dist/{chunk-XG5CQUSC.js → chunk-T65PPCNU.js} +2 -2
- package/dist/{chunk-VJPDY4YT.js → chunk-VYFIAYCW.js} +2 -2
- package/dist/cli.js +7 -3
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +101 -86
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +19 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +3 -3
- package/dist/{parser-4275GJRB.js → parser-UHUCMAA7.js} +2 -2
- package/dist/{parser-XRUZEFZT.js → parser-VXUBNDG4.js} +2 -2
- package/dist/{parser-STAOZMUC.cjs → parser-ZORW4RSC.cjs} +15 -15
- package/dist/{parser-STAOZMUC.cjs.map → parser-ZORW4RSC.cjs.map} +1 -1
- package/dist/setup-57FB3LSP.js +201 -0
- package/dist/setup-57FB3LSP.js.map +1 -0
- package/dist/{watch-BFLNFJBE.js → watch-SSENKOE2.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-KSBPABBQ.js.map +0 -1
- /package/dist/{chunk-XG5CQUSC.js.map → chunk-T65PPCNU.js.map} +0 -0
- /package/dist/{chunk-VJPDY4YT.js.map → chunk-VYFIAYCW.js.map} +0 -0
- /package/dist/{parser-4275GJRB.js.map → parser-UHUCMAA7.js.map} +0 -0
- /package/dist/{parser-XRUZEFZT.js.map → parser-VXUBNDG4.js.map} +0 -0
- /package/dist/{watch-BFLNFJBE.js.map → watch-SSENKOE2.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
var
|
|
19
|
+
var _chunkIVC5CB2Qcjs = require('./chunk-IVC5CB2Q.cjs');
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -93,21 +93,36 @@ function extractTextViaCom(filePath) {
|
|
|
93
93
|
const ps1 = `
|
|
94
94
|
[Console]::OutputEncoding = [System.Text.Encoding]::UTF8
|
|
95
95
|
$ErrorActionPreference = 'Stop'
|
|
96
|
+
|
|
97
|
+
$src = '${escaped}'
|
|
98
|
+
$tmpDir = Join-Path $env:TEMP ('hwp-com-' + [guid]::NewGuid().ToString('N'))
|
|
99
|
+
[void](New-Item -ItemType Directory -Path $tmpDir -Force)
|
|
100
|
+
$tmpFile = Join-Path $tmpDir (Split-Path $src -Leaf)
|
|
101
|
+
Copy-Item -LiteralPath $src -Destination $tmpFile -Force
|
|
102
|
+
|
|
96
103
|
try {
|
|
97
104
|
$hwp = New-Object -ComObject HWPFrame.HwpObject
|
|
98
105
|
$hwp.RegisterModule('FilePathCheckerModule', 'FilePathCheckerModuleExample') | Out-Null
|
|
99
|
-
$hwp.Open(
|
|
106
|
+
$hwp.Open($tmpFile, '', '') | Out-Null
|
|
100
107
|
$pc = $hwp.PageCount
|
|
101
108
|
$result = @{ pageCount = $pc; pages = @() }
|
|
102
109
|
for ($p = 1; $p -le $pc; $p++) {
|
|
103
110
|
$t = $hwp.GetPageText($p, 0)
|
|
104
111
|
$result.pages += @($t)
|
|
105
112
|
}
|
|
106
|
-
$hwp.Clear(1)
|
|
113
|
+
$hwp.Clear(1) | Out-Null
|
|
114
|
+
try { $hwp.Quit() | Out-Null } catch { }
|
|
107
115
|
[System.Runtime.InteropServices.Marshal]::ReleaseComObject($hwp) | Out-Null
|
|
116
|
+
[GC]::Collect()
|
|
117
|
+
[GC]::WaitForPendingFinalizers()
|
|
108
118
|
$result | ConvertTo-Json -Depth 3 -Compress
|
|
109
119
|
} catch {
|
|
110
120
|
@{ error = $_.Exception.Message } | ConvertTo-Json -Compress
|
|
121
|
+
} finally {
|
|
122
|
+
# \uC784\uC2DC \uD30C\uC77C \uC815\uB9AC + \uC880\uBE44 Hwp.exe \uBC29\uC9C0\uC6A9 garbage collect
|
|
123
|
+
try { Remove-Item -LiteralPath $tmpDir -Recurse -Force -ErrorAction SilentlyContinue } catch { }
|
|
124
|
+
[GC]::Collect()
|
|
125
|
+
[GC]::WaitForPendingFinalizers()
|
|
111
126
|
}
|
|
112
127
|
`;
|
|
113
128
|
const stdout = _child_process.execFileSync.call(void 0, "powershell", [
|
|
@@ -178,7 +193,7 @@ var MAX_XML_DEPTH = 200;
|
|
|
178
193
|
function createXmlParser(warnings) {
|
|
179
194
|
return new (0, _xmldom.DOMParser)({
|
|
180
195
|
onError(level, msg) {
|
|
181
|
-
if (level === "fatalError") throw new (0,
|
|
196
|
+
if (level === "fatalError") throw new (0, _chunkIVC5CB2Qcjs.KordocError)(`XML \uD30C\uC2F1 \uC2E4\uD328: ${msg}`);
|
|
182
197
|
_optionalChain([warnings, 'optionalAccess', _2 => _2.push, 'call', _3 => _3({ code: "MALFORMED_XML", message: `XML ${level === "warn" ? "\uACBD\uACE0" : "\uC624\uB958"}: ${msg}` })]);
|
|
183
198
|
}
|
|
184
199
|
});
|
|
@@ -197,10 +212,10 @@ async function extractHwpxStyles(zip, decompressed) {
|
|
|
197
212
|
const xml = await file.async("text");
|
|
198
213
|
if (decompressed) {
|
|
199
214
|
decompressed.total += xml.length * 2;
|
|
200
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
215
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
201
216
|
}
|
|
202
217
|
const parser = createXmlParser();
|
|
203
|
-
const doc = parser.parseFromString(
|
|
218
|
+
const doc = parser.parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
204
219
|
if (!doc.documentElement) continue;
|
|
205
220
|
parseCharProperties(doc, result.charProperties);
|
|
206
221
|
parseStyleElements(doc, result.styles);
|
|
@@ -262,7 +277,7 @@ function parseStyleElements(doc, map) {
|
|
|
262
277
|
}
|
|
263
278
|
}
|
|
264
279
|
async function parseHwpxDocument(buffer, options) {
|
|
265
|
-
|
|
280
|
+
_chunkIVC5CB2Qcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
|
|
266
281
|
let zip;
|
|
267
282
|
try {
|
|
268
283
|
zip = await _jszip2.default.loadAsync(buffer);
|
|
@@ -271,7 +286,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
271
286
|
}
|
|
272
287
|
const actualEntryCount = Object.keys(zip.files).length;
|
|
273
288
|
if (actualEntryCount > MAX_ZIP_ENTRIES) {
|
|
274
|
-
throw new (0,
|
|
289
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
275
290
|
}
|
|
276
291
|
const manifestFile = zip.file("META-INF/manifest.xml");
|
|
277
292
|
if (manifestFile) {
|
|
@@ -283,7 +298,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
283
298
|
return comResultToParseResult(pages, pageCount, warnings2);
|
|
284
299
|
}
|
|
285
300
|
}
|
|
286
|
-
throw new (0,
|
|
301
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("DRM \uC554\uD638\uD654\uB41C HWPX \uD30C\uC77C\uC785\uB2C8\uB2E4. Windows + \uD55C\uCEF4 \uC624\uD53C\uC2A4 \uC124\uCE58 \uC2DC \uC790\uB3D9 \uCD94\uCD9C\uB429\uB2C8\uB2E4.");
|
|
287
302
|
}
|
|
288
303
|
}
|
|
289
304
|
const decompressed = { total: 0 };
|
|
@@ -292,7 +307,7 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
292
307
|
const styleMap = await extractHwpxStyles(zip, decompressed);
|
|
293
308
|
const warnings = [];
|
|
294
309
|
const sectionPaths = await resolveSectionPaths(zip);
|
|
295
|
-
if (sectionPaths.length === 0) throw new (0,
|
|
310
|
+
if (sectionPaths.length === 0) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
296
311
|
metadata.pageCount = sectionPaths.length;
|
|
297
312
|
const pageFilter = _optionalChain([options, 'optionalAccess', _5 => _5.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sectionPaths.length) : null;
|
|
298
313
|
const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
|
|
@@ -306,19 +321,19 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
306
321
|
try {
|
|
307
322
|
const xml = await file.async("text");
|
|
308
323
|
decompressed.total += xml.length * 2;
|
|
309
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
324
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
310
325
|
blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, nestedTableCounter));
|
|
311
326
|
parsedSections++;
|
|
312
327
|
_optionalChain([options, 'optionalAccess', _6 => _6.onProgress, 'optionalCall', _7 => _7(parsedSections, totalTarget)]);
|
|
313
328
|
} catch (secErr) {
|
|
314
|
-
if (secErr instanceof
|
|
329
|
+
if (secErr instanceof _chunkIVC5CB2Qcjs.KordocError) throw secErr;
|
|
315
330
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
316
331
|
}
|
|
317
332
|
}
|
|
318
333
|
const images = await extractImagesFromZip(zip, blocks, decompressed, warnings);
|
|
319
334
|
detectHwpxHeadings(blocks, styleMap);
|
|
320
335
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
321
|
-
const markdown =
|
|
336
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, blocks);
|
|
322
337
|
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
323
338
|
}
|
|
324
339
|
function imageExtToMime(ext) {
|
|
@@ -382,13 +397,13 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
382
397
|
let found = false;
|
|
383
398
|
const allCandidates = resolvedPath ? [resolvedPath, ...candidates] : candidates;
|
|
384
399
|
for (const path of allCandidates) {
|
|
385
|
-
if (
|
|
400
|
+
if (_chunkIVC5CB2Qcjs.isPathTraversal.call(void 0, path)) continue;
|
|
386
401
|
const file = zip.file(path);
|
|
387
402
|
if (!file) continue;
|
|
388
403
|
try {
|
|
389
404
|
const data = await file.async("uint8array");
|
|
390
405
|
decompressed.total += data.length;
|
|
391
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
406
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
392
407
|
const actualPath = path;
|
|
393
408
|
const ext = actualPath.includes(".") ? actualPath.split(".").pop() || "png" : "png";
|
|
394
409
|
const mimeType = imageExtToMime(ext);
|
|
@@ -400,7 +415,7 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
400
415
|
found = true;
|
|
401
416
|
break;
|
|
402
417
|
} catch (err) {
|
|
403
|
-
if (err instanceof
|
|
418
|
+
if (err instanceof _chunkIVC5CB2Qcjs.KordocError) throw err;
|
|
404
419
|
}
|
|
405
420
|
}
|
|
406
421
|
if (!found) {
|
|
@@ -420,7 +435,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
420
435
|
const xml = await file.async("text");
|
|
421
436
|
if (decompressed) {
|
|
422
437
|
decompressed.total += xml.length * 2;
|
|
423
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
438
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
424
439
|
}
|
|
425
440
|
parseDublinCoreMetadata(xml, metadata);
|
|
426
441
|
if (metadata.title || metadata.author) return;
|
|
@@ -430,7 +445,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
430
445
|
}
|
|
431
446
|
function parseDublinCoreMetadata(xml, metadata) {
|
|
432
447
|
const parser = createXmlParser();
|
|
433
|
-
const doc = parser.parseFromString(
|
|
448
|
+
const doc = parser.parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
434
449
|
if (!doc.documentElement) return;
|
|
435
450
|
const getText = (tagNames) => {
|
|
436
451
|
for (const tag of tagNames) {
|
|
@@ -490,7 +505,7 @@ function extractFromBrokenZip(buffer) {
|
|
|
490
505
|
}
|
|
491
506
|
const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
|
|
492
507
|
const name = new TextDecoder().decode(nameBytes);
|
|
493
|
-
if (
|
|
508
|
+
if (_chunkIVC5CB2Qcjs.isPathTraversal.call(void 0, name)) {
|
|
494
509
|
pos = fileStart + compSize;
|
|
495
510
|
continue;
|
|
496
511
|
}
|
|
@@ -508,15 +523,15 @@ function extractFromBrokenZip(buffer) {
|
|
|
508
523
|
continue;
|
|
509
524
|
}
|
|
510
525
|
totalDecompressed += content.length * 2;
|
|
511
|
-
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
526
|
+
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
|
|
512
527
|
sectionNum++;
|
|
513
528
|
blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, nestedTableCounter));
|
|
514
529
|
} catch (e6) {
|
|
515
530
|
continue;
|
|
516
531
|
}
|
|
517
532
|
}
|
|
518
|
-
if (blocks.length === 0) throw new (0,
|
|
519
|
-
const markdown =
|
|
533
|
+
if (blocks.length === 0) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
534
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, blocks);
|
|
520
535
|
return { markdown, blocks, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
521
536
|
}
|
|
522
537
|
async function resolveSectionPaths(zip) {
|
|
@@ -534,7 +549,7 @@ async function resolveSectionPaths(zip) {
|
|
|
534
549
|
}
|
|
535
550
|
function parseSectionPathsFromManifest(xml) {
|
|
536
551
|
const parser = createXmlParser();
|
|
537
|
-
const doc = parser.parseFromString(
|
|
552
|
+
const doc = parser.parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
538
553
|
const items = doc.getElementsByTagName("opf:item");
|
|
539
554
|
const spine = doc.getElementsByTagName("opf:itemref");
|
|
540
555
|
const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
|
|
@@ -581,9 +596,9 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
581
596
|
let level = 0;
|
|
582
597
|
if (baseFontSize > 0 && _optionalChain([block, 'access', _16 => _16.style, 'optionalAccess', _17 => _17.fontSize])) {
|
|
583
598
|
const ratio = block.style.fontSize / baseFontSize;
|
|
584
|
-
if (ratio >=
|
|
585
|
-
else if (ratio >=
|
|
586
|
-
else if (ratio >=
|
|
599
|
+
if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H1) level = 1;
|
|
600
|
+
else if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H2) level = 2;
|
|
601
|
+
else if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H3) level = 3;
|
|
587
602
|
}
|
|
588
603
|
const compactText = text.replace(/\s+/g, "");
|
|
589
604
|
if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
|
|
@@ -608,13 +623,13 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
608
623
|
let nestedCols = 0;
|
|
609
624
|
for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
|
|
610
625
|
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
611
|
-
blocks.push({ type: "table", table:
|
|
626
|
+
blocks.push({ type: "table", table: _chunkIVC5CB2Qcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
612
627
|
if (parentTable.cell) {
|
|
613
628
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
614
629
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker;
|
|
615
630
|
}
|
|
616
631
|
} else {
|
|
617
|
-
const nestedText =
|
|
632
|
+
const nestedText = _chunkIVC5CB2Qcjs.convertTableToText.call(void 0, newTable.rows);
|
|
618
633
|
if (parentTable.cell) {
|
|
619
634
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
620
635
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker + "\n" + nestedText;
|
|
@@ -624,7 +639,7 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
624
639
|
}
|
|
625
640
|
function parseSectionXml(xml, styleMap, warnings, sectionNum, counter) {
|
|
626
641
|
const parser = createXmlParser(warnings);
|
|
627
|
-
const doc = parser.parseFromString(
|
|
642
|
+
const doc = parser.parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
628
643
|
if (!doc.documentElement) return [];
|
|
629
644
|
const blocks = [];
|
|
630
645
|
const ctx = { styleMap, warnings, sectionNum, counter };
|
|
@@ -667,7 +682,7 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
667
682
|
if (tableStack.length > 0) {
|
|
668
683
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
669
684
|
} else {
|
|
670
|
-
blocks.push({ type: "table", table:
|
|
685
|
+
blocks.push({ type: "table", table: _chunkIVC5CB2Qcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
671
686
|
tableCtx = null;
|
|
672
687
|
}
|
|
673
688
|
} else {
|
|
@@ -707,8 +722,8 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
707
722
|
const cs = isNaN(rawCs) ? 1 : rawCs;
|
|
708
723
|
const rawRs = parseInt(el.getAttribute("rowSpan") || "1", 10);
|
|
709
724
|
const rs = isNaN(rawRs) ? 1 : rawRs;
|
|
710
|
-
tableCtx.cell.colSpan = clampSpan(cs,
|
|
711
|
-
tableCtx.cell.rowSpan = clampSpan(rs,
|
|
725
|
+
tableCtx.cell.colSpan = clampSpan(cs, _chunkIVC5CB2Qcjs.MAX_COLS);
|
|
726
|
+
tableCtx.cell.rowSpan = clampSpan(rs, _chunkIVC5CB2Qcjs.MAX_ROWS);
|
|
712
727
|
}
|
|
713
728
|
break;
|
|
714
729
|
case "p": {
|
|
@@ -766,7 +781,7 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth =
|
|
|
766
781
|
if (tableStack.length > 0) {
|
|
767
782
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
768
783
|
} else {
|
|
769
|
-
blocks.push({ type: "table", table:
|
|
784
|
+
blocks.push({ type: "table", table: _chunkIVC5CB2Qcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
770
785
|
tableCtx = null;
|
|
771
786
|
}
|
|
772
787
|
} else {
|
|
@@ -874,7 +889,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
874
889
|
case "hyperlink": {
|
|
875
890
|
const url = child.getAttribute("url") || child.getAttribute("href") || "";
|
|
876
891
|
if (url) {
|
|
877
|
-
const safe =
|
|
892
|
+
const safe = _chunkIVC5CB2Qcjs.sanitizeHref.call(void 0, url);
|
|
878
893
|
if (safe) href = safe;
|
|
879
894
|
}
|
|
880
895
|
walk(child);
|
|
@@ -1014,7 +1029,7 @@ function decompressStream(data) {
|
|
|
1014
1029
|
return _zlib.inflateRawSync.call(void 0, data, opts);
|
|
1015
1030
|
}
|
|
1016
1031
|
function parseFileHeader(data) {
|
|
1017
|
-
if (data.length < 40) throw new (0,
|
|
1032
|
+
if (data.length < 40) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("FileHeader\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 40\uBC14\uC774\uD2B8)");
|
|
1018
1033
|
const sig = data.subarray(0, 32).toString("utf8").replace(/\0+$/, "");
|
|
1019
1034
|
return {
|
|
1020
1035
|
signature: sig,
|
|
@@ -2033,7 +2048,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
2033
2048
|
lenientCfb = parseLenientCfb(buffer);
|
|
2034
2049
|
warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
|
|
2035
2050
|
} catch (e11) {
|
|
2036
|
-
throw new (0,
|
|
2051
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
|
|
2037
2052
|
}
|
|
2038
2053
|
}
|
|
2039
2054
|
const findStream = (path) => {
|
|
@@ -2044,11 +2059,11 @@ function parseHwp5Document(buffer, options) {
|
|
|
2044
2059
|
return lenientCfb.findStream(path);
|
|
2045
2060
|
};
|
|
2046
2061
|
const headerData = findStream("/FileHeader");
|
|
2047
|
-
if (!headerData) throw new (0,
|
|
2062
|
+
if (!headerData) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
|
|
2048
2063
|
const header = parseFileHeader(headerData);
|
|
2049
|
-
if (header.signature !== "HWP Document File") throw new (0,
|
|
2050
|
-
if (header.flags & FLAG_ENCRYPTED) throw new (0,
|
|
2051
|
-
if (header.flags & FLAG_DRM) throw new (0,
|
|
2064
|
+
if (header.signature !== "HWP Document File") throw new (0, _chunkIVC5CB2Qcjs.KordocError)("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
2065
|
+
if (header.flags & FLAG_ENCRYPTED) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2066
|
+
if (header.flags & FLAG_DRM) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2052
2067
|
const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
|
|
2053
2068
|
const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
|
|
2054
2069
|
const metadata = {
|
|
@@ -2057,7 +2072,7 @@ function parseHwp5Document(buffer, options) {
|
|
|
2057
2072
|
if (cfb) extractHwp5Metadata(cfb, metadata);
|
|
2058
2073
|
const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
|
|
2059
2074
|
const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
|
|
2060
|
-
if (sections.length === 0) throw new (0,
|
|
2075
|
+
if (sections.length === 0) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2061
2076
|
metadata.pageCount = sections.length;
|
|
2062
2077
|
const pageFilter = _optionalChain([options, 'optionalAccess', _22 => _22.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sections.length) : null;
|
|
2063
2078
|
const totalTarget = pageFilter ? pageFilter.size : sections.length;
|
|
@@ -2071,24 +2086,24 @@ function parseHwp5Document(buffer, options) {
|
|
|
2071
2086
|
const sectionData = sections[si];
|
|
2072
2087
|
const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
2073
2088
|
totalDecompressed += data.length;
|
|
2074
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2089
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2075
2090
|
const records = readRecords(data);
|
|
2076
2091
|
const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, nestedTableCounter);
|
|
2077
2092
|
blocks.push(...sectionBlocks);
|
|
2078
2093
|
parsedSections++;
|
|
2079
2094
|
_optionalChain([options, 'optionalAccess', _23 => _23.onProgress, 'optionalCall', _24 => _24(parsedSections, totalTarget)]);
|
|
2080
2095
|
} catch (secErr) {
|
|
2081
|
-
if (secErr instanceof
|
|
2096
|
+
if (secErr instanceof _chunkIVC5CB2Qcjs.KordocError) throw secErr;
|
|
2082
2097
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
2083
2098
|
}
|
|
2084
2099
|
}
|
|
2085
2100
|
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
2086
|
-
const flatBlocks =
|
|
2101
|
+
const flatBlocks = _chunkIVC5CB2Qcjs.flattenLayoutTables.call(void 0, blocks);
|
|
2087
2102
|
if (docInfo) {
|
|
2088
2103
|
detectHwp5Headings(flatBlocks, docInfo);
|
|
2089
2104
|
}
|
|
2090
2105
|
const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
2091
|
-
const markdown =
|
|
2106
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, flatBlocks);
|
|
2092
2107
|
return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
2093
2108
|
}
|
|
2094
2109
|
function parseDocInfoStream(cfb, compressed) {
|
|
@@ -2148,9 +2163,9 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2148
2163
|
let level = 0;
|
|
2149
2164
|
if (_optionalChain([block, 'access', _29 => _29.style, 'optionalAccess', _30 => _30.fontSize]) && baseFontSize > 0) {
|
|
2150
2165
|
const ratio = block.style.fontSize / baseFontSize;
|
|
2151
|
-
if (ratio >=
|
|
2152
|
-
else if (ratio >=
|
|
2153
|
-
else if (ratio >=
|
|
2166
|
+
if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H1) level = 1;
|
|
2167
|
+
else if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H2) level = 2;
|
|
2168
|
+
else if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H3) level = 3;
|
|
2154
2169
|
}
|
|
2155
2170
|
if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
|
|
2156
2171
|
if (level === 0) level = 2;
|
|
@@ -2235,7 +2250,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2235
2250
|
if (!raw) break;
|
|
2236
2251
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2237
2252
|
totalDecompressed += content.length;
|
|
2238
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2253
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2239
2254
|
sections.push({ idx: i, content });
|
|
2240
2255
|
}
|
|
2241
2256
|
if (sections.length === 0) {
|
|
@@ -2247,7 +2262,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2247
2262
|
if (raw) {
|
|
2248
2263
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2249
2264
|
totalDecompressed += content.length;
|
|
2250
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2265
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2251
2266
|
sections.push({ idx, content });
|
|
2252
2267
|
}
|
|
2253
2268
|
}
|
|
@@ -2264,7 +2279,7 @@ function findViewTextSectionsLenient(lcfb, compressed) {
|
|
|
2264
2279
|
try {
|
|
2265
2280
|
const content = decryptViewText(raw, compressed);
|
|
2266
2281
|
totalDecompressed += content.length;
|
|
2267
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2282
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2268
2283
|
sections.push({ idx: i, content });
|
|
2269
2284
|
} catch (e16) {
|
|
2270
2285
|
break;
|
|
@@ -2452,7 +2467,7 @@ function parseSection(records, docInfo, warnings, sectionNum, counter) {
|
|
|
2452
2467
|
if (url && blocks.length > 0) {
|
|
2453
2468
|
const lastBlock = blocks[blocks.length - 1];
|
|
2454
2469
|
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
2455
|
-
lastBlock.href = _nullishCoalesce(
|
|
2470
|
+
lastBlock.href = _nullishCoalesce(_chunkIVC5CB2Qcjs.sanitizeHref.call(void 0, url), () => ( void 0));
|
|
2456
2471
|
}
|
|
2457
2472
|
}
|
|
2458
2473
|
}
|
|
@@ -2570,8 +2585,8 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
2570
2585
|
if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
|
|
2571
2586
|
if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
|
|
2572
2587
|
if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
|
|
2573
|
-
rows = Math.min(rec.data.readUInt16LE(4),
|
|
2574
|
-
cols = Math.min(rec.data.readUInt16LE(6),
|
|
2588
|
+
rows = Math.min(rec.data.readUInt16LE(4), _chunkIVC5CB2Qcjs.MAX_ROWS);
|
|
2589
|
+
cols = Math.min(rec.data.readUInt16LE(6), _chunkIVC5CB2Qcjs.MAX_COLS);
|
|
2575
2590
|
}
|
|
2576
2591
|
if (rec.tagId === TAG_LIST_HEADER) {
|
|
2577
2592
|
const { cell, nextIdx } = parseCellBlock(records, i, tableLevel, counter);
|
|
@@ -2593,7 +2608,7 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
2593
2608
|
return { table: { rows, cols, cells: irCells, hasHeader: rows > 1 }, nextIdx: i };
|
|
2594
2609
|
}
|
|
2595
2610
|
const cellRows = arrangeCells(rows, cols, cells);
|
|
2596
|
-
return { table:
|
|
2611
|
+
return { table: _chunkIVC5CB2Qcjs.buildTable.call(void 0, cellRows), nextIdx: i };
|
|
2597
2612
|
}
|
|
2598
2613
|
function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
2599
2614
|
const rec = records[startIdx];
|
|
@@ -2608,8 +2623,8 @@ function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
|
2608
2623
|
rowAddr = rec.data.readUInt16LE(10);
|
|
2609
2624
|
const cs = rec.data.readUInt16LE(12);
|
|
2610
2625
|
const rs = rec.data.readUInt16LE(14);
|
|
2611
|
-
if (cs > 0) colSpan = Math.min(cs,
|
|
2612
|
-
if (rs > 0) rowSpan = Math.min(rs,
|
|
2626
|
+
if (cs > 0) colSpan = Math.min(cs, _chunkIVC5CB2Qcjs.MAX_COLS);
|
|
2627
|
+
if (rs > 0) rowSpan = Math.min(rs, _chunkIVC5CB2Qcjs.MAX_ROWS);
|
|
2613
2628
|
}
|
|
2614
2629
|
let i = startIdx + 1;
|
|
2615
2630
|
while (i < records.length) {
|
|
@@ -2711,7 +2726,7 @@ function getTextContent(el) {
|
|
|
2711
2726
|
return _nullishCoalesce(_optionalChain([el, 'access', _38 => _38.textContent, 'optionalAccess', _39 => _39.trim, 'call', _40 => _40()]), () => ( ""));
|
|
2712
2727
|
}
|
|
2713
2728
|
function parseXml(text) {
|
|
2714
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
2729
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, text), "text/xml");
|
|
2715
2730
|
}
|
|
2716
2731
|
function parseSharedStrings(xml) {
|
|
2717
2732
|
const doc = parseXml(xml);
|
|
@@ -2855,7 +2870,7 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2855
2870
|
cellRows.push(row);
|
|
2856
2871
|
}
|
|
2857
2872
|
if (cellRows.length > 0) {
|
|
2858
|
-
const table =
|
|
2873
|
+
const table = _chunkIVC5CB2Qcjs.buildTable.call(void 0, cellRows);
|
|
2859
2874
|
if (table.rows > 0) {
|
|
2860
2875
|
blocks.push({ type: "table", table, pageNumber: sheetIndex + 1 });
|
|
2861
2876
|
}
|
|
@@ -2863,12 +2878,12 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2863
2878
|
return blocks;
|
|
2864
2879
|
}
|
|
2865
2880
|
async function parseXlsxDocument(buffer, options) {
|
|
2866
|
-
|
|
2881
|
+
_chunkIVC5CB2Qcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE3);
|
|
2867
2882
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
2868
2883
|
const warnings = [];
|
|
2869
2884
|
const workbookFile = zip.file("xl/workbook.xml");
|
|
2870
2885
|
if (!workbookFile) {
|
|
2871
|
-
throw new (0,
|
|
2886
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 XLSX \uD30C\uC77C: xl/workbook.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2872
2887
|
}
|
|
2873
2888
|
let sharedStrings = [];
|
|
2874
2889
|
const ssFile = zip.file("xl/sharedStrings.xml");
|
|
@@ -2877,7 +2892,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2877
2892
|
}
|
|
2878
2893
|
const sheets = parseWorkbook(await workbookFile.async("text"));
|
|
2879
2894
|
if (sheets.length === 0) {
|
|
2880
|
-
throw new (0,
|
|
2895
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("XLSX \uD30C\uC77C\uC5D0 \uC2DC\uD2B8\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2881
2896
|
}
|
|
2882
2897
|
let relsMap = /* @__PURE__ */ new Map();
|
|
2883
2898
|
const relsFile = zip.file("xl/_rels/workbook.xml.rels");
|
|
@@ -2949,7 +2964,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2949
2964
|
} catch (e20) {
|
|
2950
2965
|
}
|
|
2951
2966
|
}
|
|
2952
|
-
const markdown =
|
|
2967
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, blocks);
|
|
2953
2968
|
return { markdown, blocks, metadata, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
2954
2969
|
}
|
|
2955
2970
|
|
|
@@ -2998,7 +3013,7 @@ function getAttr(el, localName3) {
|
|
|
2998
3013
|
return null;
|
|
2999
3014
|
}
|
|
3000
3015
|
function parseXml2(text) {
|
|
3001
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
3016
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, text), "text/xml");
|
|
3002
3017
|
}
|
|
3003
3018
|
function parseStyles(xml) {
|
|
3004
3019
|
const doc = parseXml2(xml);
|
|
@@ -3291,12 +3306,12 @@ async function extractImages(zip, rels, doc) {
|
|
|
3291
3306
|
return { blocks, images };
|
|
3292
3307
|
}
|
|
3293
3308
|
async function parseDocxDocument(buffer, options) {
|
|
3294
|
-
|
|
3309
|
+
_chunkIVC5CB2Qcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE4);
|
|
3295
3310
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
3296
3311
|
const warnings = [];
|
|
3297
3312
|
const docFile = zip.file("word/document.xml");
|
|
3298
3313
|
if (!docFile) {
|
|
3299
|
-
throw new (0,
|
|
3314
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 DOCX \uD30C\uC77C: word/document.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3300
3315
|
}
|
|
3301
3316
|
let rels = /* @__PURE__ */ new Map();
|
|
3302
3317
|
const relsFile = zip.file("word/_rels/document.xml.rels");
|
|
@@ -3331,7 +3346,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3331
3346
|
const doc = parseXml2(docXml);
|
|
3332
3347
|
const body = findElements(doc, "body");
|
|
3333
3348
|
if (body.length === 0) {
|
|
3334
|
-
throw new (0,
|
|
3349
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("DOCX \uBCF8\uBB38(w:body)\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3335
3350
|
}
|
|
3336
3351
|
const blocks = [];
|
|
3337
3352
|
const bodyEl = body[0];
|
|
@@ -3371,7 +3386,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3371
3386
|
}
|
|
3372
3387
|
}
|
|
3373
3388
|
const outline = blocks.filter((b) => b.type === "heading").map((b) => ({ level: _nullishCoalesce(b.level, () => ( 2)), text: _nullishCoalesce(b.text, () => ( "")) }));
|
|
3374
|
-
const markdown =
|
|
3389
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, blocks);
|
|
3375
3390
|
return {
|
|
3376
3391
|
markdown,
|
|
3377
3392
|
blocks,
|
|
@@ -3394,7 +3409,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3394
3409
|
}
|
|
3395
3410
|
const text = new TextDecoder("utf-8").decode(buffer).replace(/^\uFEFF/, "");
|
|
3396
3411
|
const normalized = text.replace(/ /g, " ");
|
|
3397
|
-
const xml =
|
|
3412
|
+
const xml = _chunkIVC5CB2Qcjs.stripDtd.call(void 0, normalized);
|
|
3398
3413
|
const warnings = [];
|
|
3399
3414
|
const parser = new (0, _xmldom.DOMParser)({
|
|
3400
3415
|
onError: (_level, msg) => {
|
|
@@ -3434,7 +3449,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3434
3449
|
parseSection2(el, blocks, paraShapeMap, sectionIdx, warnings);
|
|
3435
3450
|
}
|
|
3436
3451
|
const outline = blocks.filter((b) => b.type === "heading" && b.text).map((b) => ({ level: _nullishCoalesce(b.level, () => ( 1)), text: b.text, pageNumber: b.pageNumber }));
|
|
3437
|
-
const markdown =
|
|
3452
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, blocks);
|
|
3438
3453
|
return {
|
|
3439
3454
|
markdown,
|
|
3440
3455
|
blocks,
|
|
@@ -3576,7 +3591,7 @@ function parseTable2(el, blocks, paraShapeMap, sectionNum, warnings) {
|
|
|
3576
3591
|
const cellRows = grid.map(
|
|
3577
3592
|
(row) => row.map((cell) => _nullishCoalesce(cell, () => ( { text: "", colSpan: 1, rowSpan: 1 })))
|
|
3578
3593
|
);
|
|
3579
|
-
const table =
|
|
3594
|
+
const table = _chunkIVC5CB2Qcjs.buildTable.call(void 0, cellRows);
|
|
3580
3595
|
blocks.push({ type: "table", table, pageNumber: sectionNum });
|
|
3581
3596
|
}
|
|
3582
3597
|
function extractCellText(cellEl) {
|
|
@@ -3977,7 +3992,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
3977
3992
|
const normalizedValues = normalizeValues(values);
|
|
3978
3993
|
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
3979
3994
|
if (sectionFiles.length === 0) {
|
|
3980
|
-
throw new (0,
|
|
3995
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3981
3996
|
}
|
|
3982
3997
|
const xmlParser = new (0, _xmldom.DOMParser)();
|
|
3983
3998
|
const xmlSerializer = new (0, _xmldom.XMLSerializer)();
|
|
@@ -3985,7 +4000,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
3985
4000
|
const zipEntry = zip.file(sectionPath);
|
|
3986
4001
|
if (!zipEntry) continue;
|
|
3987
4002
|
const rawXml = await zipEntry.async("text");
|
|
3988
|
-
const doc = xmlParser.parseFromString(
|
|
4003
|
+
const doc = xmlParser.parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, rawXml), "text/xml");
|
|
3989
4004
|
if (!doc.documentElement) continue;
|
|
3990
4005
|
let modified = false;
|
|
3991
4006
|
const tables = findAllElements(doc.documentElement, "tbl");
|
|
@@ -4847,13 +4862,13 @@ async function parse(input, options) {
|
|
|
4847
4862
|
if (typeof input === "string") {
|
|
4848
4863
|
try {
|
|
4849
4864
|
const buf = await _promises.readFile.call(void 0, input);
|
|
4850
|
-
buffer =
|
|
4865
|
+
buffer = _chunkIVC5CB2Qcjs.toArrayBuffer.call(void 0, buf);
|
|
4851
4866
|
} catch (err) {
|
|
4852
4867
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
4853
4868
|
return { success: false, fileType: "unknown", error: msg, code: "PARSE_ERROR" };
|
|
4854
4869
|
}
|
|
4855
4870
|
} else if (Buffer.isBuffer(input)) {
|
|
4856
|
-
buffer =
|
|
4871
|
+
buffer = _chunkIVC5CB2Qcjs.toArrayBuffer.call(void 0, input);
|
|
4857
4872
|
} else {
|
|
4858
4873
|
buffer = input;
|
|
4859
4874
|
}
|
|
@@ -4883,7 +4898,7 @@ async function parseHwpx(buffer, options) {
|
|
|
4883
4898
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseHwpxDocument(buffer, options);
|
|
4884
4899
|
return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _80 => _80.length]) ? images : void 0 };
|
|
4885
4900
|
} catch (err) {
|
|
4886
|
-
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4901
|
+
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err) };
|
|
4887
4902
|
}
|
|
4888
4903
|
}
|
|
4889
4904
|
async function parseHwp(buffer, options) {
|
|
@@ -4891,13 +4906,13 @@ async function parseHwp(buffer, options) {
|
|
|
4891
4906
|
const { markdown, blocks, metadata, outline, warnings, images } = parseHwp5Document(Buffer.from(buffer), options);
|
|
4892
4907
|
return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _81 => _81.length]) ? images : void 0 };
|
|
4893
4908
|
} catch (err) {
|
|
4894
|
-
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4909
|
+
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err) };
|
|
4895
4910
|
}
|
|
4896
4911
|
}
|
|
4897
4912
|
async function parsePdf(buffer, options) {
|
|
4898
4913
|
let parsePdfDocument;
|
|
4899
4914
|
try {
|
|
4900
|
-
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-
|
|
4915
|
+
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-ZORW4RSC.cjs")));
|
|
4901
4916
|
parsePdfDocument = mod.parsePdfDocument;
|
|
4902
4917
|
} catch (e26) {
|
|
4903
4918
|
return {
|
|
@@ -4912,7 +4927,7 @@ async function parsePdf(buffer, options) {
|
|
|
4912
4927
|
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
|
|
4913
4928
|
} catch (err) {
|
|
4914
4929
|
const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
|
|
4915
|
-
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4930
|
+
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err), isImageBased };
|
|
4916
4931
|
}
|
|
4917
4932
|
}
|
|
4918
4933
|
async function parseXlsx(buffer, options) {
|
|
@@ -4920,7 +4935,7 @@ async function parseXlsx(buffer, options) {
|
|
|
4920
4935
|
const { markdown, blocks, metadata, warnings } = await parseXlsxDocument(buffer, options);
|
|
4921
4936
|
return { success: true, fileType: "xlsx", markdown, blocks, metadata, warnings };
|
|
4922
4937
|
} catch (err) {
|
|
4923
|
-
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4938
|
+
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err) };
|
|
4924
4939
|
}
|
|
4925
4940
|
}
|
|
4926
4941
|
async function parseDocx(buffer, options) {
|
|
@@ -4928,7 +4943,7 @@ async function parseDocx(buffer, options) {
|
|
|
4928
4943
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseDocxDocument(buffer, options);
|
|
4929
4944
|
return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _82 => _82.length]) ? images : void 0 };
|
|
4930
4945
|
} catch (err) {
|
|
4931
|
-
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4946
|
+
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err) };
|
|
4932
4947
|
}
|
|
4933
4948
|
}
|
|
4934
4949
|
async function parseHwpml(buffer, options) {
|
|
@@ -4936,16 +4951,16 @@ async function parseHwpml(buffer, options) {
|
|
|
4936
4951
|
const { markdown, blocks, metadata, outline, warnings } = parseHwpmlDocument(buffer, options);
|
|
4937
4952
|
return { success: true, fileType: "hwpml", markdown, blocks, metadata, outline, warnings };
|
|
4938
4953
|
} catch (err) {
|
|
4939
|
-
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4954
|
+
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err) };
|
|
4940
4955
|
}
|
|
4941
4956
|
}
|
|
4942
4957
|
async function fillForm(input, values, outputFormat = "markdown") {
|
|
4943
4958
|
let buffer;
|
|
4944
4959
|
if (typeof input === "string") {
|
|
4945
4960
|
const buf = await _promises.readFile.call(void 0, input);
|
|
4946
|
-
buffer =
|
|
4961
|
+
buffer = _chunkIVC5CB2Qcjs.toArrayBuffer.call(void 0, buf);
|
|
4947
4962
|
} else if (Buffer.isBuffer(input)) {
|
|
4948
|
-
buffer =
|
|
4963
|
+
buffer = _chunkIVC5CB2Qcjs.toArrayBuffer.call(void 0, input);
|
|
4949
4964
|
} else {
|
|
4950
4965
|
buffer = input;
|
|
4951
4966
|
}
|
|
@@ -4971,7 +4986,7 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
4971
4986
|
throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
|
|
4972
4987
|
}
|
|
4973
4988
|
const fill = fillFormFields(parsed.blocks, values);
|
|
4974
|
-
const markdown =
|
|
4989
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, fill.blocks);
|
|
4975
4990
|
if (outputFormat === "hwpx") {
|
|
4976
4991
|
const hwpxBuffer = await markdownToHwpx(markdown);
|
|
4977
4992
|
return { output: hwpxBuffer, format: "hwpx", fill };
|
|
@@ -5002,5 +5017,5 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
5002
5017
|
|
|
5003
5018
|
|
|
5004
5019
|
|
|
5005
|
-
exports.VERSION =
|
|
5020
|
+
exports.VERSION = _chunkIVC5CB2Qcjs.VERSION; exports.blocksToMarkdown = _chunkIVC5CB2Qcjs.blocksToMarkdown; exports.compare = compare; exports.detectFormat = detectFormat; exports.detectZipFormat = detectZipFormat; exports.diffBlocks = diffBlocks; exports.extractFormFields = extractFormFields; exports.fillForm = fillForm; exports.fillFormFields = fillFormFields; exports.fillHwpx = fillHwpx; exports.isHwpxFile = isHwpxFile; exports.isLabelCell = isLabelCell; exports.isOldHwpFile = isOldHwpFile; exports.isPdfFile = isPdfFile; exports.isZipFile = isZipFile; exports.markdownToHwpx = markdownToHwpx; exports.parse = parse; exports.parseDocx = parseDocx; exports.parseHwp = parseHwp; exports.parseHwpml = parseHwpml; exports.parseHwpx = parseHwpx; exports.parsePdf = parsePdf; exports.parseXlsx = parseXlsx;
|
|
5006
5021
|
//# sourceMappingURL=index.cjs.map
|