kordoc 2.3.0 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -1
- package/dist/{chunk-ZNJPRRIA.cjs → chunk-IVC5CB2Q.cjs} +2 -2
- package/dist/{chunk-ZNJPRRIA.cjs.map → chunk-IVC5CB2Q.cjs.map} +1 -1
- package/dist/{chunk-OEJJPCMM.js → chunk-JFPF7B5L.js} +132 -9
- package/dist/chunk-JFPF7B5L.js.map +1 -0
- package/dist/{chunk-JFTFC2BB.js → chunk-T65PPCNU.js} +2 -2
- package/dist/{chunk-JFTFC2BB.js.map → chunk-T65PPCNU.js.map} +1 -1
- package/dist/{chunk-Z7UPTVMX.js → chunk-VYFIAYCW.js} +2 -2
- package/dist/{chunk-Z7UPTVMX.js.map → chunk-VYFIAYCW.js.map} +1 -1
- package/dist/cli.js +8 -4
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +254 -131
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +131 -8
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +3 -3
- package/dist/{parser-4LKJXBPP.js → parser-UHUCMAA7.js} +2 -2
- package/dist/{parser-25LF2S2J.js → parser-VXUBNDG4.js} +2 -2
- package/dist/{parser-KBQZB3QY.cjs → parser-ZORW4RSC.cjs} +15 -15
- package/dist/{parser-KBQZB3QY.cjs.map → parser-ZORW4RSC.cjs.map} +1 -1
- package/dist/setup-57FB3LSP.js +201 -0
- package/dist/setup-57FB3LSP.js.map +1 -0
- package/dist/{watch-GXRBLW3Y.js → watch-SSENKOE2.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-OEJJPCMM.js.map +0 -1
- /package/dist/{parser-4LKJXBPP.js.map → parser-UHUCMAA7.js.map} +0 -0
- /package/dist/{parser-25LF2S2J.js.map → parser-VXUBNDG4.js.map} +0 -0
- /package/dist/{watch-GXRBLW3Y.js.map → watch-SSENKOE2.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
var
|
|
19
|
+
var _chunkIVC5CB2Qcjs = require('./chunk-IVC5CB2Q.cjs');
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -75,6 +75,115 @@ async function detectZipFormat(buffer) {
|
|
|
75
75
|
|
|
76
76
|
var _zlib = require('zlib');
|
|
77
77
|
var _xmldom = require('@xmldom/xmldom');
|
|
78
|
+
|
|
79
|
+
// src/hwpx/com-fallback.ts
|
|
80
|
+
var _child_process = require('child_process');
|
|
81
|
+
var _os = require('os');
|
|
82
|
+
function isComFallbackAvailable() {
|
|
83
|
+
return _os.platform.call(void 0, ) === "win32";
|
|
84
|
+
}
|
|
85
|
+
function isEncryptedHwpx(manifestXml) {
|
|
86
|
+
return manifestXml.includes("encryption-data");
|
|
87
|
+
}
|
|
88
|
+
function extractTextViaCom(filePath) {
|
|
89
|
+
if (!isComFallbackAvailable()) {
|
|
90
|
+
throw new Error("COM fallback\uC740 Windows\uC5D0\uC11C\uB9CC \uC0AC\uC6A9 \uAC00\uB2A5\uD569\uB2C8\uB2E4");
|
|
91
|
+
}
|
|
92
|
+
const escaped = filePath.replace(/'/g, "''");
|
|
93
|
+
const ps1 = `
|
|
94
|
+
[Console]::OutputEncoding = [System.Text.Encoding]::UTF8
|
|
95
|
+
$ErrorActionPreference = 'Stop'
|
|
96
|
+
|
|
97
|
+
$src = '${escaped}'
|
|
98
|
+
$tmpDir = Join-Path $env:TEMP ('hwp-com-' + [guid]::NewGuid().ToString('N'))
|
|
99
|
+
[void](New-Item -ItemType Directory -Path $tmpDir -Force)
|
|
100
|
+
$tmpFile = Join-Path $tmpDir (Split-Path $src -Leaf)
|
|
101
|
+
Copy-Item -LiteralPath $src -Destination $tmpFile -Force
|
|
102
|
+
|
|
103
|
+
try {
|
|
104
|
+
$hwp = New-Object -ComObject HWPFrame.HwpObject
|
|
105
|
+
$hwp.RegisterModule('FilePathCheckerModule', 'FilePathCheckerModuleExample') | Out-Null
|
|
106
|
+
$hwp.Open($tmpFile, '', '') | Out-Null
|
|
107
|
+
$pc = $hwp.PageCount
|
|
108
|
+
$result = @{ pageCount = $pc; pages = @() }
|
|
109
|
+
for ($p = 1; $p -le $pc; $p++) {
|
|
110
|
+
$t = $hwp.GetPageText($p, 0)
|
|
111
|
+
$result.pages += @($t)
|
|
112
|
+
}
|
|
113
|
+
$hwp.Clear(1) | Out-Null
|
|
114
|
+
try { $hwp.Quit() | Out-Null } catch { }
|
|
115
|
+
[System.Runtime.InteropServices.Marshal]::ReleaseComObject($hwp) | Out-Null
|
|
116
|
+
[GC]::Collect()
|
|
117
|
+
[GC]::WaitForPendingFinalizers()
|
|
118
|
+
$result | ConvertTo-Json -Depth 3 -Compress
|
|
119
|
+
} catch {
|
|
120
|
+
@{ error = $_.Exception.Message } | ConvertTo-Json -Compress
|
|
121
|
+
} finally {
|
|
122
|
+
# \uC784\uC2DC \uD30C\uC77C \uC815\uB9AC + \uC880\uBE44 Hwp.exe \uBC29\uC9C0\uC6A9 garbage collect
|
|
123
|
+
try { Remove-Item -LiteralPath $tmpDir -Recurse -Force -ErrorAction SilentlyContinue } catch { }
|
|
124
|
+
[GC]::Collect()
|
|
125
|
+
[GC]::WaitForPendingFinalizers()
|
|
126
|
+
}
|
|
127
|
+
`;
|
|
128
|
+
const stdout = _child_process.execFileSync.call(void 0, "powershell", [
|
|
129
|
+
"-NoProfile",
|
|
130
|
+
"-NonInteractive",
|
|
131
|
+
"-ExecutionPolicy",
|
|
132
|
+
"Bypass",
|
|
133
|
+
"-Command",
|
|
134
|
+
ps1
|
|
135
|
+
], {
|
|
136
|
+
encoding: "utf-8",
|
|
137
|
+
timeout: 12e4,
|
|
138
|
+
// 2분 타임아웃
|
|
139
|
+
windowsHide: true,
|
|
140
|
+
maxBuffer: 50 * 1024 * 1024
|
|
141
|
+
// 50MB
|
|
142
|
+
});
|
|
143
|
+
const trimmed = stdout.trim();
|
|
144
|
+
const jsonStart = trimmed.indexOf("{");
|
|
145
|
+
if (jsonStart < 0) throw new Error(`COM \uCD9C\uB825\uC5D0 JSON\uC774 \uC5C6\uC2B5\uB2C8\uB2E4: ${trimmed.slice(0, 200)}`);
|
|
146
|
+
const json = JSON.parse(trimmed.slice(jsonStart));
|
|
147
|
+
if (json.error) {
|
|
148
|
+
throw new Error(`COM \uD14D\uC2A4\uD2B8 \uCD94\uCD9C \uC2E4\uD328: ${json.error}`);
|
|
149
|
+
}
|
|
150
|
+
const warnings = [];
|
|
151
|
+
const pages = Array.isArray(json.pages) ? json.pages : [];
|
|
152
|
+
const pageCount = _nullishCoalesce(json.pageCount, () => ( pages.length));
|
|
153
|
+
if (pages.length === 0) {
|
|
154
|
+
warnings.push({ message: "COM\uC73C\uB85C \uD14D\uC2A4\uD2B8\uB97C \uCD94\uCD9C\uD558\uC9C0 \uBABB\uD588\uC2B5\uB2C8\uB2E4", code: "COM_EMPTY" });
|
|
155
|
+
}
|
|
156
|
+
return { pages, pageCount, warnings };
|
|
157
|
+
}
|
|
158
|
+
function comResultToParseResult(pages, pageCount, warnings) {
|
|
159
|
+
const blocks = [];
|
|
160
|
+
const lines = [];
|
|
161
|
+
for (let i = 0; i < pages.length; i++) {
|
|
162
|
+
const text = (_nullishCoalesce(pages[i], () => ( ""))).trim();
|
|
163
|
+
if (!text) continue;
|
|
164
|
+
const paragraphs = text.split(/\n/);
|
|
165
|
+
for (const para of paragraphs) {
|
|
166
|
+
const trimmed = para.trim();
|
|
167
|
+
if (!trimmed) continue;
|
|
168
|
+
blocks.push({ type: "paragraph", text: trimmed, pageNumber: i + 1 });
|
|
169
|
+
lines.push(trimmed);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
const markdown = lines.join("\n\n");
|
|
173
|
+
const metadata = { pageCount };
|
|
174
|
+
warnings.push({
|
|
175
|
+
message: "DRM \uBB38\uC11C: \uD55C\uCEF4 COM API\uB85C \uD14D\uC2A4\uD2B8 \uCD94\uCD9C (\uC11C\uC2DD/\uD45C \uC815\uBCF4 \uC81C\uD55C\uC801)",
|
|
176
|
+
code: "DRM_COM_FALLBACK"
|
|
177
|
+
});
|
|
178
|
+
return {
|
|
179
|
+
markdown,
|
|
180
|
+
blocks,
|
|
181
|
+
metadata,
|
|
182
|
+
warnings: warnings.length > 0 ? warnings : void 0
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// src/hwpx/parser.ts
|
|
78
187
|
var MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024;
|
|
79
188
|
var MAX_ZIP_ENTRIES = 500;
|
|
80
189
|
function clampSpan(val, max) {
|
|
@@ -84,7 +193,7 @@ var MAX_XML_DEPTH = 200;
|
|
|
84
193
|
function createXmlParser(warnings) {
|
|
85
194
|
return new (0, _xmldom.DOMParser)({
|
|
86
195
|
onError(level, msg) {
|
|
87
|
-
if (level === "fatalError") throw new (0,
|
|
196
|
+
if (level === "fatalError") throw new (0, _chunkIVC5CB2Qcjs.KordocError)(`XML \uD30C\uC2F1 \uC2E4\uD328: ${msg}`);
|
|
88
197
|
_optionalChain([warnings, 'optionalAccess', _2 => _2.push, 'call', _3 => _3({ code: "MALFORMED_XML", message: `XML ${level === "warn" ? "\uACBD\uACE0" : "\uC624\uB958"}: ${msg}` })]);
|
|
89
198
|
}
|
|
90
199
|
});
|
|
@@ -103,10 +212,10 @@ async function extractHwpxStyles(zip, decompressed) {
|
|
|
103
212
|
const xml = await file.async("text");
|
|
104
213
|
if (decompressed) {
|
|
105
214
|
decompressed.total += xml.length * 2;
|
|
106
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
215
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
107
216
|
}
|
|
108
217
|
const parser = createXmlParser();
|
|
109
|
-
const doc = parser.parseFromString(
|
|
218
|
+
const doc = parser.parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
110
219
|
if (!doc.documentElement) continue;
|
|
111
220
|
parseCharProperties(doc, result.charProperties);
|
|
112
221
|
parseStyleElements(doc, result.styles);
|
|
@@ -168,7 +277,7 @@ function parseStyleElements(doc, map) {
|
|
|
168
277
|
}
|
|
169
278
|
}
|
|
170
279
|
async function parseHwpxDocument(buffer, options) {
|
|
171
|
-
|
|
280
|
+
_chunkIVC5CB2Qcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
|
|
172
281
|
let zip;
|
|
173
282
|
try {
|
|
174
283
|
zip = await _jszip2.default.loadAsync(buffer);
|
|
@@ -177,7 +286,20 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
177
286
|
}
|
|
178
287
|
const actualEntryCount = Object.keys(zip.files).length;
|
|
179
288
|
if (actualEntryCount > MAX_ZIP_ENTRIES) {
|
|
180
|
-
throw new (0,
|
|
289
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
290
|
+
}
|
|
291
|
+
const manifestFile = zip.file("META-INF/manifest.xml");
|
|
292
|
+
if (manifestFile) {
|
|
293
|
+
const manifestXml = await manifestFile.async("text");
|
|
294
|
+
if (isEncryptedHwpx(manifestXml)) {
|
|
295
|
+
if (isComFallbackAvailable() && _optionalChain([options, 'optionalAccess', _4 => _4.filePath])) {
|
|
296
|
+
const { pages, pageCount, warnings: warnings2 } = extractTextViaCom(options.filePath);
|
|
297
|
+
if (pages.some((p) => p && p.trim().length > 0)) {
|
|
298
|
+
return comResultToParseResult(pages, pageCount, warnings2);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("DRM \uC554\uD638\uD654\uB41C HWPX \uD30C\uC77C\uC785\uB2C8\uB2E4. Windows + \uD55C\uCEF4 \uC624\uD53C\uC2A4 \uC124\uCE58 \uC2DC \uC790\uB3D9 \uCD94\uCD9C\uB429\uB2C8\uB2E4.");
|
|
302
|
+
}
|
|
181
303
|
}
|
|
182
304
|
const decompressed = { total: 0 };
|
|
183
305
|
const metadata = {};
|
|
@@ -185,9 +307,9 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
185
307
|
const styleMap = await extractHwpxStyles(zip, decompressed);
|
|
186
308
|
const warnings = [];
|
|
187
309
|
const sectionPaths = await resolveSectionPaths(zip);
|
|
188
|
-
if (sectionPaths.length === 0) throw new (0,
|
|
310
|
+
if (sectionPaths.length === 0) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
189
311
|
metadata.pageCount = sectionPaths.length;
|
|
190
|
-
const pageFilter = _optionalChain([options, 'optionalAccess',
|
|
312
|
+
const pageFilter = _optionalChain([options, 'optionalAccess', _5 => _5.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sectionPaths.length) : null;
|
|
191
313
|
const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
|
|
192
314
|
const blocks = [];
|
|
193
315
|
const nestedTableCounter = { count: 0 };
|
|
@@ -199,19 +321,19 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
199
321
|
try {
|
|
200
322
|
const xml = await file.async("text");
|
|
201
323
|
decompressed.total += xml.length * 2;
|
|
202
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
324
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
203
325
|
blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, nestedTableCounter));
|
|
204
326
|
parsedSections++;
|
|
205
|
-
_optionalChain([options, 'optionalAccess',
|
|
327
|
+
_optionalChain([options, 'optionalAccess', _6 => _6.onProgress, 'optionalCall', _7 => _7(parsedSections, totalTarget)]);
|
|
206
328
|
} catch (secErr) {
|
|
207
|
-
if (secErr instanceof
|
|
329
|
+
if (secErr instanceof _chunkIVC5CB2Qcjs.KordocError) throw secErr;
|
|
208
330
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
209
331
|
}
|
|
210
332
|
}
|
|
211
333
|
const images = await extractImagesFromZip(zip, blocks, decompressed, warnings);
|
|
212
334
|
detectHwpxHeadings(blocks, styleMap);
|
|
213
335
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
214
|
-
const markdown =
|
|
336
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, blocks);
|
|
215
337
|
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
216
338
|
}
|
|
217
339
|
function imageExtToMime(ext) {
|
|
@@ -275,13 +397,13 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
275
397
|
let found = false;
|
|
276
398
|
const allCandidates = resolvedPath ? [resolvedPath, ...candidates] : candidates;
|
|
277
399
|
for (const path of allCandidates) {
|
|
278
|
-
if (
|
|
400
|
+
if (_chunkIVC5CB2Qcjs.isPathTraversal.call(void 0, path)) continue;
|
|
279
401
|
const file = zip.file(path);
|
|
280
402
|
if (!file) continue;
|
|
281
403
|
try {
|
|
282
404
|
const data = await file.async("uint8array");
|
|
283
405
|
decompressed.total += data.length;
|
|
284
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
406
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
285
407
|
const actualPath = path;
|
|
286
408
|
const ext = actualPath.includes(".") ? actualPath.split(".").pop() || "png" : "png";
|
|
287
409
|
const mimeType = imageExtToMime(ext);
|
|
@@ -293,11 +415,11 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
293
415
|
found = true;
|
|
294
416
|
break;
|
|
295
417
|
} catch (err) {
|
|
296
|
-
if (err instanceof
|
|
418
|
+
if (err instanceof _chunkIVC5CB2Qcjs.KordocError) throw err;
|
|
297
419
|
}
|
|
298
420
|
}
|
|
299
421
|
if (!found) {
|
|
300
|
-
_optionalChain([warnings, 'optionalAccess',
|
|
422
|
+
_optionalChain([warnings, 'optionalAccess', _8 => _8.push, 'call', _9 => _9({ page: block.pageNumber, message: `\uC774\uBBF8\uC9C0 \uD30C\uC77C \uC5C6\uC74C: ${ref}`, code: "SKIPPED_IMAGE" })]);
|
|
301
423
|
block.type = "paragraph";
|
|
302
424
|
block.text = `[\uC774\uBBF8\uC9C0: ${ref}]`;
|
|
303
425
|
}
|
|
@@ -313,7 +435,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
313
435
|
const xml = await file.async("text");
|
|
314
436
|
if (decompressed) {
|
|
315
437
|
decompressed.total += xml.length * 2;
|
|
316
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
438
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
317
439
|
}
|
|
318
440
|
parseDublinCoreMetadata(xml, metadata);
|
|
319
441
|
if (metadata.title || metadata.author) return;
|
|
@@ -323,13 +445,13 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
323
445
|
}
|
|
324
446
|
function parseDublinCoreMetadata(xml, metadata) {
|
|
325
447
|
const parser = createXmlParser();
|
|
326
|
-
const doc = parser.parseFromString(
|
|
448
|
+
const doc = parser.parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
327
449
|
if (!doc.documentElement) return;
|
|
328
450
|
const getText = (tagNames) => {
|
|
329
451
|
for (const tag of tagNames) {
|
|
330
452
|
const els = doc.getElementsByTagName(tag);
|
|
331
453
|
if (els.length > 0) {
|
|
332
|
-
const text = _optionalChain([els, 'access',
|
|
454
|
+
const text = _optionalChain([els, 'access', _10 => _10[0], 'access', _11 => _11.textContent, 'optionalAccess', _12 => _12.trim, 'call', _13 => _13()]);
|
|
333
455
|
if (text) return text;
|
|
334
456
|
}
|
|
335
457
|
}
|
|
@@ -383,7 +505,7 @@ function extractFromBrokenZip(buffer) {
|
|
|
383
505
|
}
|
|
384
506
|
const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
|
|
385
507
|
const name = new TextDecoder().decode(nameBytes);
|
|
386
|
-
if (
|
|
508
|
+
if (_chunkIVC5CB2Qcjs.isPathTraversal.call(void 0, name)) {
|
|
387
509
|
pos = fileStart + compSize;
|
|
388
510
|
continue;
|
|
389
511
|
}
|
|
@@ -401,15 +523,15 @@ function extractFromBrokenZip(buffer) {
|
|
|
401
523
|
continue;
|
|
402
524
|
}
|
|
403
525
|
totalDecompressed += content.length * 2;
|
|
404
|
-
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
526
|
+
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
|
|
405
527
|
sectionNum++;
|
|
406
528
|
blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, nestedTableCounter));
|
|
407
529
|
} catch (e6) {
|
|
408
530
|
continue;
|
|
409
531
|
}
|
|
410
532
|
}
|
|
411
|
-
if (blocks.length === 0) throw new (0,
|
|
412
|
-
const markdown =
|
|
533
|
+
if (blocks.length === 0) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
534
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, blocks);
|
|
413
535
|
return { markdown, blocks, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
414
536
|
}
|
|
415
537
|
async function resolveSectionPaths(zip) {
|
|
@@ -427,7 +549,7 @@ async function resolveSectionPaths(zip) {
|
|
|
427
549
|
}
|
|
428
550
|
function parseSectionPathsFromManifest(xml) {
|
|
429
551
|
const parser = createXmlParser();
|
|
430
|
-
const doc = parser.parseFromString(
|
|
552
|
+
const doc = parser.parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
431
553
|
const items = doc.getElementsByTagName("opf:item");
|
|
432
554
|
const spine = doc.getElementsByTagName("opf:itemref");
|
|
433
555
|
const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
|
|
@@ -456,7 +578,7 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
456
578
|
let baseFontSize = 0;
|
|
457
579
|
const sizeFreq = /* @__PURE__ */ new Map();
|
|
458
580
|
for (const b of blocks) {
|
|
459
|
-
if (_optionalChain([b, 'access',
|
|
581
|
+
if (_optionalChain([b, 'access', _14 => _14.style, 'optionalAccess', _15 => _15.fontSize])) {
|
|
460
582
|
sizeFreq.set(b.style.fontSize, (sizeFreq.get(b.style.fontSize) || 0) + 1);
|
|
461
583
|
}
|
|
462
584
|
}
|
|
@@ -472,11 +594,11 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
472
594
|
const text = block.text.trim();
|
|
473
595
|
if (text.length === 0 || text.length > 200 || /^\d+$/.test(text)) continue;
|
|
474
596
|
let level = 0;
|
|
475
|
-
if (baseFontSize > 0 && _optionalChain([block, 'access',
|
|
597
|
+
if (baseFontSize > 0 && _optionalChain([block, 'access', _16 => _16.style, 'optionalAccess', _17 => _17.fontSize])) {
|
|
476
598
|
const ratio = block.style.fontSize / baseFontSize;
|
|
477
|
-
if (ratio >=
|
|
478
|
-
else if (ratio >=
|
|
479
|
-
else if (ratio >=
|
|
599
|
+
if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H1) level = 1;
|
|
600
|
+
else if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H2) level = 2;
|
|
601
|
+
else if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H3) level = 3;
|
|
480
602
|
}
|
|
481
603
|
const compactText = text.replace(/\s+/g, "");
|
|
482
604
|
if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
|
|
@@ -501,13 +623,13 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
501
623
|
let nestedCols = 0;
|
|
502
624
|
for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
|
|
503
625
|
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
504
|
-
blocks.push({ type: "table", table:
|
|
626
|
+
blocks.push({ type: "table", table: _chunkIVC5CB2Qcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
505
627
|
if (parentTable.cell) {
|
|
506
628
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
507
629
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker;
|
|
508
630
|
}
|
|
509
631
|
} else {
|
|
510
|
-
const nestedText =
|
|
632
|
+
const nestedText = _chunkIVC5CB2Qcjs.convertTableToText.call(void 0, newTable.rows);
|
|
511
633
|
if (parentTable.cell) {
|
|
512
634
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
513
635
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker + "\n" + nestedText;
|
|
@@ -517,7 +639,7 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
517
639
|
}
|
|
518
640
|
function parseSectionXml(xml, styleMap, warnings, sectionNum, counter) {
|
|
519
641
|
const parser = createXmlParser(warnings);
|
|
520
|
-
const doc = parser.parseFromString(
|
|
642
|
+
const doc = parser.parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
521
643
|
if (!doc.documentElement) return [];
|
|
522
644
|
const blocks = [];
|
|
523
645
|
const ctx = { styleMap, warnings, sectionNum, counter };
|
|
@@ -560,7 +682,7 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
560
682
|
if (tableStack.length > 0) {
|
|
561
683
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
562
684
|
} else {
|
|
563
|
-
blocks.push({ type: "table", table:
|
|
685
|
+
blocks.push({ type: "table", table: _chunkIVC5CB2Qcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
564
686
|
tableCtx = null;
|
|
565
687
|
}
|
|
566
688
|
} else {
|
|
@@ -587,7 +709,7 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
587
709
|
}
|
|
588
710
|
break;
|
|
589
711
|
case "cellAddr":
|
|
590
|
-
if (_optionalChain([tableCtx, 'optionalAccess',
|
|
712
|
+
if (_optionalChain([tableCtx, 'optionalAccess', _18 => _18.cell])) {
|
|
591
713
|
const ca = parseInt(el.getAttribute("colAddr") || "", 10);
|
|
592
714
|
const ra = parseInt(el.getAttribute("rowAddr") || "", 10);
|
|
593
715
|
if (!isNaN(ca)) tableCtx.cell.colAddr = ca;
|
|
@@ -595,19 +717,19 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
595
717
|
}
|
|
596
718
|
break;
|
|
597
719
|
case "cellSpan":
|
|
598
|
-
if (_optionalChain([tableCtx, 'optionalAccess',
|
|
720
|
+
if (_optionalChain([tableCtx, 'optionalAccess', _19 => _19.cell])) {
|
|
599
721
|
const rawCs = parseInt(el.getAttribute("colSpan") || "1", 10);
|
|
600
722
|
const cs = isNaN(rawCs) ? 1 : rawCs;
|
|
601
723
|
const rawRs = parseInt(el.getAttribute("rowSpan") || "1", 10);
|
|
602
724
|
const rs = isNaN(rawRs) ? 1 : rawRs;
|
|
603
|
-
tableCtx.cell.colSpan = clampSpan(cs,
|
|
604
|
-
tableCtx.cell.rowSpan = clampSpan(rs,
|
|
725
|
+
tableCtx.cell.colSpan = clampSpan(cs, _chunkIVC5CB2Qcjs.MAX_COLS);
|
|
726
|
+
tableCtx.cell.rowSpan = clampSpan(rs, _chunkIVC5CB2Qcjs.MAX_ROWS);
|
|
605
727
|
}
|
|
606
728
|
break;
|
|
607
729
|
case "p": {
|
|
608
730
|
const { text, href, footnote, style } = extractParagraphInfo(el, ctx.styleMap);
|
|
609
731
|
if (text) {
|
|
610
|
-
if (_optionalChain([tableCtx, 'optionalAccess',
|
|
732
|
+
if (_optionalChain([tableCtx, 'optionalAccess', _20 => _20.cell])) {
|
|
611
733
|
tableCtx.cell.text += (tableCtx.cell.text ? "\n" : "") + text;
|
|
612
734
|
} else if (!tableCtx) {
|
|
613
735
|
const block = { type: "paragraph", text, pageNumber: ctx.sectionNum };
|
|
@@ -659,7 +781,7 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth =
|
|
|
659
781
|
if (tableStack.length > 0) {
|
|
660
782
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
661
783
|
} else {
|
|
662
|
-
blocks.push({ type: "table", table:
|
|
784
|
+
blocks.push({ type: "table", table: _chunkIVC5CB2Qcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
663
785
|
tableCtx = null;
|
|
664
786
|
}
|
|
665
787
|
} else {
|
|
@@ -767,7 +889,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
767
889
|
case "hyperlink": {
|
|
768
890
|
const url = child.getAttribute("url") || child.getAttribute("href") || "";
|
|
769
891
|
if (url) {
|
|
770
|
-
const safe =
|
|
892
|
+
const safe = _chunkIVC5CB2Qcjs.sanitizeHref.call(void 0, url);
|
|
771
893
|
if (safe) href = safe;
|
|
772
894
|
}
|
|
773
895
|
walk(child);
|
|
@@ -907,7 +1029,7 @@ function decompressStream(data) {
|
|
|
907
1029
|
return _zlib.inflateRawSync.call(void 0, data, opts);
|
|
908
1030
|
}
|
|
909
1031
|
function parseFileHeader(data) {
|
|
910
|
-
if (data.length < 40) throw new (0,
|
|
1032
|
+
if (data.length < 40) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("FileHeader\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 40\uBC14\uC774\uD2B8)");
|
|
911
1033
|
const sig = data.subarray(0, 32).toString("utf8").replace(/\0+$/, "");
|
|
912
1034
|
return {
|
|
913
1035
|
signature: sig,
|
|
@@ -1926,22 +2048,22 @@ function parseHwp5Document(buffer, options) {
|
|
|
1926
2048
|
lenientCfb = parseLenientCfb(buffer);
|
|
1927
2049
|
warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
|
|
1928
2050
|
} catch (e11) {
|
|
1929
|
-
throw new (0,
|
|
2051
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
|
|
1930
2052
|
}
|
|
1931
2053
|
}
|
|
1932
2054
|
const findStream = (path) => {
|
|
1933
2055
|
if (cfb) {
|
|
1934
2056
|
const entry = CFB.find(cfb, path);
|
|
1935
|
-
return _optionalChain([entry, 'optionalAccess',
|
|
2057
|
+
return _optionalChain([entry, 'optionalAccess', _21 => _21.content]) ? Buffer.from(entry.content) : null;
|
|
1936
2058
|
}
|
|
1937
2059
|
return lenientCfb.findStream(path);
|
|
1938
2060
|
};
|
|
1939
2061
|
const headerData = findStream("/FileHeader");
|
|
1940
|
-
if (!headerData) throw new (0,
|
|
2062
|
+
if (!headerData) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
|
|
1941
2063
|
const header = parseFileHeader(headerData);
|
|
1942
|
-
if (header.signature !== "HWP Document File") throw new (0,
|
|
1943
|
-
if (header.flags & FLAG_ENCRYPTED) throw new (0,
|
|
1944
|
-
if (header.flags & FLAG_DRM) throw new (0,
|
|
2064
|
+
if (header.signature !== "HWP Document File") throw new (0, _chunkIVC5CB2Qcjs.KordocError)("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
2065
|
+
if (header.flags & FLAG_ENCRYPTED) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2066
|
+
if (header.flags & FLAG_DRM) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
1945
2067
|
const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
|
|
1946
2068
|
const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
|
|
1947
2069
|
const metadata = {
|
|
@@ -1950,9 +2072,9 @@ function parseHwp5Document(buffer, options) {
|
|
|
1950
2072
|
if (cfb) extractHwp5Metadata(cfb, metadata);
|
|
1951
2073
|
const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
|
|
1952
2074
|
const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
|
|
1953
|
-
if (sections.length === 0) throw new (0,
|
|
2075
|
+
if (sections.length === 0) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
1954
2076
|
metadata.pageCount = sections.length;
|
|
1955
|
-
const pageFilter = _optionalChain([options, 'optionalAccess',
|
|
2077
|
+
const pageFilter = _optionalChain([options, 'optionalAccess', _22 => _22.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sections.length) : null;
|
|
1956
2078
|
const totalTarget = pageFilter ? pageFilter.size : sections.length;
|
|
1957
2079
|
const blocks = [];
|
|
1958
2080
|
const nestedTableCounter = { count: 0 };
|
|
@@ -1964,30 +2086,30 @@ function parseHwp5Document(buffer, options) {
|
|
|
1964
2086
|
const sectionData = sections[si];
|
|
1965
2087
|
const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
1966
2088
|
totalDecompressed += data.length;
|
|
1967
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2089
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
1968
2090
|
const records = readRecords(data);
|
|
1969
2091
|
const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, nestedTableCounter);
|
|
1970
2092
|
blocks.push(...sectionBlocks);
|
|
1971
2093
|
parsedSections++;
|
|
1972
|
-
_optionalChain([options, 'optionalAccess',
|
|
2094
|
+
_optionalChain([options, 'optionalAccess', _23 => _23.onProgress, 'optionalCall', _24 => _24(parsedSections, totalTarget)]);
|
|
1973
2095
|
} catch (secErr) {
|
|
1974
|
-
if (secErr instanceof
|
|
2096
|
+
if (secErr instanceof _chunkIVC5CB2Qcjs.KordocError) throw secErr;
|
|
1975
2097
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
1976
2098
|
}
|
|
1977
2099
|
}
|
|
1978
2100
|
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
1979
|
-
const flatBlocks =
|
|
2101
|
+
const flatBlocks = _chunkIVC5CB2Qcjs.flattenLayoutTables.call(void 0, blocks);
|
|
1980
2102
|
if (docInfo) {
|
|
1981
2103
|
detectHwp5Headings(flatBlocks, docInfo);
|
|
1982
2104
|
}
|
|
1983
2105
|
const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
1984
|
-
const markdown =
|
|
2106
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, flatBlocks);
|
|
1985
2107
|
return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
1986
2108
|
}
|
|
1987
2109
|
function parseDocInfoStream(cfb, compressed) {
|
|
1988
2110
|
try {
|
|
1989
2111
|
const entry = CFB.find(cfb, "/DocInfo");
|
|
1990
|
-
if (!_optionalChain([entry, 'optionalAccess',
|
|
2112
|
+
if (!_optionalChain([entry, 'optionalAccess', _25 => _25.content])) return null;
|
|
1991
2113
|
const data = compressed ? decompressStream(Buffer.from(entry.content)) : Buffer.from(entry.content);
|
|
1992
2114
|
const records = readRecords(data);
|
|
1993
2115
|
return parseDocInfo(records);
|
|
@@ -2010,7 +2132,7 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2010
2132
|
const name = (style.nameKo || style.name).toLowerCase();
|
|
2011
2133
|
if (name.includes("\uBC14\uD0D5") || name.includes("\uBCF8\uBB38") || name === "normal" || name === "body") {
|
|
2012
2134
|
const cs = docInfo.charShapes[style.charShapeId];
|
|
2013
|
-
if (_optionalChain([cs, 'optionalAccess',
|
|
2135
|
+
if (_optionalChain([cs, 'optionalAccess', _26 => _26.fontSize]) > 0) {
|
|
2014
2136
|
baseFontSize = cs.fontSize / 10;
|
|
2015
2137
|
break;
|
|
2016
2138
|
}
|
|
@@ -2019,7 +2141,7 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2019
2141
|
if (baseFontSize === 0) {
|
|
2020
2142
|
const sizeFreq = /* @__PURE__ */ new Map();
|
|
2021
2143
|
for (const b of blocks) {
|
|
2022
|
-
if (_optionalChain([b, 'access',
|
|
2144
|
+
if (_optionalChain([b, 'access', _27 => _27.style, 'optionalAccess', _28 => _28.fontSize])) {
|
|
2023
2145
|
sizeFreq.set(b.style.fontSize, (sizeFreq.get(b.style.fontSize) || 0) + 1);
|
|
2024
2146
|
}
|
|
2025
2147
|
}
|
|
@@ -2039,11 +2161,11 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2039
2161
|
if (text.length === 0 || text.length > 200) continue;
|
|
2040
2162
|
if (/^\d+$/.test(text)) continue;
|
|
2041
2163
|
let level = 0;
|
|
2042
|
-
if (_optionalChain([block, 'access',
|
|
2164
|
+
if (_optionalChain([block, 'access', _29 => _29.style, 'optionalAccess', _30 => _30.fontSize]) && baseFontSize > 0) {
|
|
2043
2165
|
const ratio = block.style.fontSize / baseFontSize;
|
|
2044
|
-
if (ratio >=
|
|
2045
|
-
else if (ratio >=
|
|
2046
|
-
else if (ratio >=
|
|
2166
|
+
if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H1) level = 1;
|
|
2167
|
+
else if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H2) level = 2;
|
|
2168
|
+
else if (ratio >= _chunkIVC5CB2Qcjs.HEADING_RATIO_H3) level = 3;
|
|
2047
2169
|
}
|
|
2048
2170
|
if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
|
|
2049
2171
|
if (level === 0) level = 2;
|
|
@@ -2059,7 +2181,7 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2059
2181
|
function extractHwp5Metadata(cfb, metadata) {
|
|
2060
2182
|
try {
|
|
2061
2183
|
const summaryEntry = CFB.find(cfb, "/HwpSummaryInformation") || CFB.find(cfb, "/SummaryInformation");
|
|
2062
|
-
if (!_optionalChain([summaryEntry, 'optionalAccess',
|
|
2184
|
+
if (!_optionalChain([summaryEntry, 'optionalAccess', _31 => _31.content])) return;
|
|
2063
2185
|
const data = Buffer.from(summaryEntry.content);
|
|
2064
2186
|
if (data.length < 48) return;
|
|
2065
2187
|
const numSets = data.readUInt32LE(24);
|
|
@@ -2092,7 +2214,7 @@ function findViewTextSections(cfb, compressed) {
|
|
|
2092
2214
|
const sections = [];
|
|
2093
2215
|
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2094
2216
|
const entry = CFB.find(cfb, `/ViewText/Section${i}`);
|
|
2095
|
-
if (!_optionalChain([entry, 'optionalAccess',
|
|
2217
|
+
if (!_optionalChain([entry, 'optionalAccess', _32 => _32.content])) break;
|
|
2096
2218
|
try {
|
|
2097
2219
|
const decrypted = decryptViewText(Buffer.from(entry.content), compressed);
|
|
2098
2220
|
sections.push({ idx: i, content: decrypted });
|
|
@@ -2106,13 +2228,13 @@ function findSections(cfb) {
|
|
|
2106
2228
|
const sections = [];
|
|
2107
2229
|
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2108
2230
|
const entry = CFB.find(cfb, `/BodyText/Section${i}`);
|
|
2109
|
-
if (!_optionalChain([entry, 'optionalAccess',
|
|
2231
|
+
if (!_optionalChain([entry, 'optionalAccess', _33 => _33.content])) break;
|
|
2110
2232
|
sections.push({ idx: i, content: Buffer.from(entry.content) });
|
|
2111
2233
|
}
|
|
2112
2234
|
if (sections.length === 0 && cfb.FileIndex) {
|
|
2113
2235
|
for (const entry of cfb.FileIndex) {
|
|
2114
2236
|
if (sections.length >= MAX_SECTIONS) break;
|
|
2115
|
-
if (_optionalChain([entry, 'access',
|
|
2237
|
+
if (_optionalChain([entry, 'access', _34 => _34.name, 'optionalAccess', _35 => _35.startsWith, 'call', _36 => _36("Section")]) && entry.content) {
|
|
2116
2238
|
const idx = parseInt(entry.name.replace("Section", ""), 10) || 0;
|
|
2117
2239
|
sections.push({ idx, content: Buffer.from(entry.content) });
|
|
2118
2240
|
}
|
|
@@ -2128,7 +2250,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2128
2250
|
if (!raw) break;
|
|
2129
2251
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2130
2252
|
totalDecompressed += content.length;
|
|
2131
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2253
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2132
2254
|
sections.push({ idx: i, content });
|
|
2133
2255
|
}
|
|
2134
2256
|
if (sections.length === 0) {
|
|
@@ -2140,7 +2262,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2140
2262
|
if (raw) {
|
|
2141
2263
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2142
2264
|
totalDecompressed += content.length;
|
|
2143
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2265
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2144
2266
|
sections.push({ idx, content });
|
|
2145
2267
|
}
|
|
2146
2268
|
}
|
|
@@ -2157,7 +2279,7 @@ function findViewTextSectionsLenient(lcfb, compressed) {
|
|
|
2157
2279
|
try {
|
|
2158
2280
|
const content = decryptViewText(raw, compressed);
|
|
2159
2281
|
totalDecompressed += content.length;
|
|
2160
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2282
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2161
2283
|
sections.push({ idx: i, content });
|
|
2162
2284
|
} catch (e16) {
|
|
2163
2285
|
break;
|
|
@@ -2195,7 +2317,7 @@ function extractHwp5Images(cfb, blocks, compressed, warnings) {
|
|
|
2195
2317
|
const binDataRe = /\/BinData\/[Bb][Ii][Nn](\d{4})$/;
|
|
2196
2318
|
if (cfb.FileIndex) {
|
|
2197
2319
|
for (const entry of cfb.FileIndex) {
|
|
2198
|
-
if (!_optionalChain([entry, 'optionalAccess',
|
|
2320
|
+
if (!_optionalChain([entry, 'optionalAccess', _37 => _37.name]) || !entry.content) continue;
|
|
2199
2321
|
const match = entry.name.match(binDataRe);
|
|
2200
2322
|
if (!match) continue;
|
|
2201
2323
|
const idx = parseInt(match[1], 10);
|
|
@@ -2345,7 +2467,7 @@ function parseSection(records, docInfo, warnings, sectionNum, counter) {
|
|
|
2345
2467
|
if (url && blocks.length > 0) {
|
|
2346
2468
|
const lastBlock = blocks[blocks.length - 1];
|
|
2347
2469
|
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
2348
|
-
lastBlock.href = _nullishCoalesce(
|
|
2470
|
+
lastBlock.href = _nullishCoalesce(_chunkIVC5CB2Qcjs.sanitizeHref.call(void 0, url), () => ( void 0));
|
|
2349
2471
|
}
|
|
2350
2472
|
}
|
|
2351
2473
|
}
|
|
@@ -2463,8 +2585,8 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
2463
2585
|
if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
|
|
2464
2586
|
if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
|
|
2465
2587
|
if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
|
|
2466
|
-
rows = Math.min(rec.data.readUInt16LE(4),
|
|
2467
|
-
cols = Math.min(rec.data.readUInt16LE(6),
|
|
2588
|
+
rows = Math.min(rec.data.readUInt16LE(4), _chunkIVC5CB2Qcjs.MAX_ROWS);
|
|
2589
|
+
cols = Math.min(rec.data.readUInt16LE(6), _chunkIVC5CB2Qcjs.MAX_COLS);
|
|
2468
2590
|
}
|
|
2469
2591
|
if (rec.tagId === TAG_LIST_HEADER) {
|
|
2470
2592
|
const { cell, nextIdx } = parseCellBlock(records, i, tableLevel, counter);
|
|
@@ -2486,7 +2608,7 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
2486
2608
|
return { table: { rows, cols, cells: irCells, hasHeader: rows > 1 }, nextIdx: i };
|
|
2487
2609
|
}
|
|
2488
2610
|
const cellRows = arrangeCells(rows, cols, cells);
|
|
2489
|
-
return { table:
|
|
2611
|
+
return { table: _chunkIVC5CB2Qcjs.buildTable.call(void 0, cellRows), nextIdx: i };
|
|
2490
2612
|
}
|
|
2491
2613
|
function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
2492
2614
|
const rec = records[startIdx];
|
|
@@ -2501,8 +2623,8 @@ function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
|
2501
2623
|
rowAddr = rec.data.readUInt16LE(10);
|
|
2502
2624
|
const cs = rec.data.readUInt16LE(12);
|
|
2503
2625
|
const rs = rec.data.readUInt16LE(14);
|
|
2504
|
-
if (cs > 0) colSpan = Math.min(cs,
|
|
2505
|
-
if (rs > 0) rowSpan = Math.min(rs,
|
|
2626
|
+
if (cs > 0) colSpan = Math.min(cs, _chunkIVC5CB2Qcjs.MAX_COLS);
|
|
2627
|
+
if (rs > 0) rowSpan = Math.min(rs, _chunkIVC5CB2Qcjs.MAX_ROWS);
|
|
2506
2628
|
}
|
|
2507
2629
|
let i = startIdx + 1;
|
|
2508
2630
|
while (i < records.length) {
|
|
@@ -2601,10 +2723,10 @@ function getElements(parent, tagName) {
|
|
|
2601
2723
|
return result;
|
|
2602
2724
|
}
|
|
2603
2725
|
function getTextContent(el) {
|
|
2604
|
-
return _nullishCoalesce(_optionalChain([el, 'access',
|
|
2726
|
+
return _nullishCoalesce(_optionalChain([el, 'access', _38 => _38.textContent, 'optionalAccess', _39 => _39.trim, 'call', _40 => _40()]), () => ( ""));
|
|
2605
2727
|
}
|
|
2606
2728
|
function parseXml(text) {
|
|
2607
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
2729
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, text), "text/xml");
|
|
2608
2730
|
}
|
|
2609
2731
|
function parseSharedStrings(xml) {
|
|
2610
2732
|
const doc = parseXml(xml);
|
|
@@ -2741,14 +2863,14 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2741
2863
|
const merge = mergeMap.get(key);
|
|
2742
2864
|
row.push({
|
|
2743
2865
|
text,
|
|
2744
|
-
colSpan: _nullishCoalesce(_optionalChain([merge, 'optionalAccess',
|
|
2745
|
-
rowSpan: _nullishCoalesce(_optionalChain([merge, 'optionalAccess',
|
|
2866
|
+
colSpan: _nullishCoalesce(_optionalChain([merge, 'optionalAccess', _41 => _41.colSpan]), () => ( 1)),
|
|
2867
|
+
rowSpan: _nullishCoalesce(_optionalChain([merge, 'optionalAccess', _42 => _42.rowSpan]), () => ( 1))
|
|
2746
2868
|
});
|
|
2747
2869
|
}
|
|
2748
2870
|
cellRows.push(row);
|
|
2749
2871
|
}
|
|
2750
2872
|
if (cellRows.length > 0) {
|
|
2751
|
-
const table =
|
|
2873
|
+
const table = _chunkIVC5CB2Qcjs.buildTable.call(void 0, cellRows);
|
|
2752
2874
|
if (table.rows > 0) {
|
|
2753
2875
|
blocks.push({ type: "table", table, pageNumber: sheetIndex + 1 });
|
|
2754
2876
|
}
|
|
@@ -2756,12 +2878,12 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2756
2878
|
return blocks;
|
|
2757
2879
|
}
|
|
2758
2880
|
async function parseXlsxDocument(buffer, options) {
|
|
2759
|
-
|
|
2881
|
+
_chunkIVC5CB2Qcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE3);
|
|
2760
2882
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
2761
2883
|
const warnings = [];
|
|
2762
2884
|
const workbookFile = zip.file("xl/workbook.xml");
|
|
2763
2885
|
if (!workbookFile) {
|
|
2764
|
-
throw new (0,
|
|
2886
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 XLSX \uD30C\uC77C: xl/workbook.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2765
2887
|
}
|
|
2766
2888
|
let sharedStrings = [];
|
|
2767
2889
|
const ssFile = zip.file("xl/sharedStrings.xml");
|
|
@@ -2770,7 +2892,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2770
2892
|
}
|
|
2771
2893
|
const sheets = parseWorkbook(await workbookFile.async("text"));
|
|
2772
2894
|
if (sheets.length === 0) {
|
|
2773
|
-
throw new (0,
|
|
2895
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("XLSX \uD30C\uC77C\uC5D0 \uC2DC\uD2B8\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2774
2896
|
}
|
|
2775
2897
|
let relsMap = /* @__PURE__ */ new Map();
|
|
2776
2898
|
const relsFile = zip.file("xl/_rels/workbook.xml.rels");
|
|
@@ -2778,7 +2900,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2778
2900
|
relsMap = parseRels(await relsFile.async("text"));
|
|
2779
2901
|
}
|
|
2780
2902
|
let pageFilter = null;
|
|
2781
|
-
if (_optionalChain([options, 'optionalAccess',
|
|
2903
|
+
if (_optionalChain([options, 'optionalAccess', _43 => _43.pages])) {
|
|
2782
2904
|
const { parsePageRange: parsePageRange2 } = await Promise.resolve().then(() => _interopRequireWildcard(require("./page-range-3C7UGGEK.cjs")));
|
|
2783
2905
|
pageFilter = parsePageRange2(options.pages, sheets.length);
|
|
2784
2906
|
}
|
|
@@ -2787,7 +2909,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2787
2909
|
for (let i = 0; i < processedSheets; i++) {
|
|
2788
2910
|
if (pageFilter && !pageFilter.has(i + 1)) continue;
|
|
2789
2911
|
const sheet = sheets[i];
|
|
2790
|
-
_optionalChain([options, 'optionalAccess',
|
|
2912
|
+
_optionalChain([options, 'optionalAccess', _44 => _44.onProgress, 'optionalCall', _45 => _45(i + 1, processedSheets)]);
|
|
2791
2913
|
let sheetPath = relsMap.get(sheet.rId);
|
|
2792
2914
|
if (sheetPath) {
|
|
2793
2915
|
if (!sheetPath.startsWith("xl/") && !sheetPath.startsWith("/")) {
|
|
@@ -2842,7 +2964,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2842
2964
|
} catch (e20) {
|
|
2843
2965
|
}
|
|
2844
2966
|
}
|
|
2845
|
-
const markdown =
|
|
2967
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, blocks);
|
|
2846
2968
|
return { markdown, blocks, metadata, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
2847
2969
|
}
|
|
2848
2970
|
|
|
@@ -2857,7 +2979,7 @@ function getChildElements(parent, localName3) {
|
|
|
2857
2979
|
const node = children[i];
|
|
2858
2980
|
if (node.nodeType === 1) {
|
|
2859
2981
|
const el = node;
|
|
2860
|
-
if (el.localName === localName3 || _optionalChain([el, 'access',
|
|
2982
|
+
if (el.localName === localName3 || _optionalChain([el, 'access', _46 => _46.tagName, 'optionalAccess', _47 => _47.endsWith, 'call', _48 => _48(`:${localName3}`)])) {
|
|
2861
2983
|
result.push(el);
|
|
2862
2984
|
}
|
|
2863
2985
|
}
|
|
@@ -2872,7 +2994,7 @@ function findElements(parent, localName3) {
|
|
|
2872
2994
|
const child = children[i];
|
|
2873
2995
|
if (child.nodeType === 1) {
|
|
2874
2996
|
const el = child;
|
|
2875
|
-
if (el.localName === localName3 || _optionalChain([el, 'access',
|
|
2997
|
+
if (el.localName === localName3 || _optionalChain([el, 'access', _49 => _49.tagName, 'optionalAccess', _50 => _50.endsWith, 'call', _51 => _51(`:${localName3}`)])) {
|
|
2876
2998
|
result.push(el);
|
|
2877
2999
|
}
|
|
2878
3000
|
walk(el);
|
|
@@ -2891,7 +3013,7 @@ function getAttr(el, localName3) {
|
|
|
2891
3013
|
return null;
|
|
2892
3014
|
}
|
|
2893
3015
|
function parseXml2(text) {
|
|
2894
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
3016
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, text), "text/xml");
|
|
2895
3017
|
}
|
|
2896
3018
|
function parseStyles(xml) {
|
|
2897
3019
|
const doc = parseXml2(xml);
|
|
@@ -3056,7 +3178,7 @@ function parseParagraph(p, styles, numbering, footnotes, rels) {
|
|
|
3056
3178
|
const text = parts.join("").trim();
|
|
3057
3179
|
if (!text) return null;
|
|
3058
3180
|
const style = styles.get(styleId);
|
|
3059
|
-
if (_optionalChain([style, 'optionalAccess',
|
|
3181
|
+
if (_optionalChain([style, 'optionalAccess', _52 => _52.outlineLevel]) !== void 0 && style.outlineLevel >= 0 && style.outlineLevel <= 5) {
|
|
3060
3182
|
return {
|
|
3061
3183
|
type: "heading",
|
|
3062
3184
|
text,
|
|
@@ -3065,8 +3187,8 @@ function parseParagraph(p, styles, numbering, footnotes, rels) {
|
|
|
3065
3187
|
}
|
|
3066
3188
|
if (numId && numId !== "0") {
|
|
3067
3189
|
const numDef = numbering.get(numId);
|
|
3068
|
-
const levelInfo = _optionalChain([numDef, 'optionalAccess',
|
|
3069
|
-
const listType = _optionalChain([levelInfo, 'optionalAccess',
|
|
3190
|
+
const levelInfo = _optionalChain([numDef, 'optionalAccess', _53 => _53.get, 'call', _54 => _54(ilvl)]);
|
|
3191
|
+
const listType = _optionalChain([levelInfo, 'optionalAccess', _55 => _55.numFmt]) === "bullet" ? "unordered" : "ordered";
|
|
3070
3192
|
return { type: "list", text, listType };
|
|
3071
3193
|
}
|
|
3072
3194
|
const block = { type: "paragraph", text };
|
|
@@ -3107,7 +3229,7 @@ function parseTable(tbl, styles, numbering, footnotes, rels) {
|
|
|
3107
3229
|
const pElements = getChildElements(tc, "p");
|
|
3108
3230
|
for (const p of pElements) {
|
|
3109
3231
|
const block = parseParagraph(p, styles, numbering, footnotes, rels);
|
|
3110
|
-
if (_optionalChain([block, 'optionalAccess',
|
|
3232
|
+
if (_optionalChain([block, 'optionalAccess', _56 => _56.text])) cellTexts.push(block.text);
|
|
3111
3233
|
}
|
|
3112
3234
|
row.push({ text: cellTexts.join("\n"), colSpan, rowSpan });
|
|
3113
3235
|
}
|
|
@@ -3120,7 +3242,7 @@ function parseTable(tbl, styles, numbering, footnotes, rels) {
|
|
|
3120
3242
|
if (!cell || cell.rowSpan === 0) continue;
|
|
3121
3243
|
let span = 1;
|
|
3122
3244
|
for (let nr = r + 1; nr < rows.length; nr++) {
|
|
3123
|
-
if (_optionalChain([rows, 'access',
|
|
3245
|
+
if (_optionalChain([rows, 'access', _57 => _57[nr], 'access', _58 => _58[c], 'optionalAccess', _59 => _59.rowSpan]) === 0) span++;
|
|
3124
3246
|
else break;
|
|
3125
3247
|
}
|
|
3126
3248
|
cell.rowSpan = span;
|
|
@@ -3164,7 +3286,7 @@ async function extractImages(zip, rels, doc) {
|
|
|
3164
3286
|
try {
|
|
3165
3287
|
const data = await imgFile.async("uint8array");
|
|
3166
3288
|
imgIdx++;
|
|
3167
|
-
const ext = _nullishCoalesce(_optionalChain([imgPath, 'access',
|
|
3289
|
+
const ext = _nullishCoalesce(_optionalChain([imgPath, 'access', _60 => _60.split, 'call', _61 => _61("."), 'access', _62 => _62.pop, 'call', _63 => _63(), 'optionalAccess', _64 => _64.toLowerCase, 'call', _65 => _65()]), () => ( "png"));
|
|
3168
3290
|
const mimeMap = {
|
|
3169
3291
|
png: "image/png",
|
|
3170
3292
|
jpg: "image/jpeg",
|
|
@@ -3184,12 +3306,12 @@ async function extractImages(zip, rels, doc) {
|
|
|
3184
3306
|
return { blocks, images };
|
|
3185
3307
|
}
|
|
3186
3308
|
async function parseDocxDocument(buffer, options) {
|
|
3187
|
-
|
|
3309
|
+
_chunkIVC5CB2Qcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE4);
|
|
3188
3310
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
3189
3311
|
const warnings = [];
|
|
3190
3312
|
const docFile = zip.file("word/document.xml");
|
|
3191
3313
|
if (!docFile) {
|
|
3192
|
-
throw new (0,
|
|
3314
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 DOCX \uD30C\uC77C: word/document.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3193
3315
|
}
|
|
3194
3316
|
let rels = /* @__PURE__ */ new Map();
|
|
3195
3317
|
const relsFile = zip.file("word/_rels/document.xml.rels");
|
|
@@ -3224,7 +3346,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3224
3346
|
const doc = parseXml2(docXml);
|
|
3225
3347
|
const body = findElements(doc, "body");
|
|
3226
3348
|
if (body.length === 0) {
|
|
3227
|
-
throw new (0,
|
|
3349
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("DOCX \uBCF8\uBB38(w:body)\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3228
3350
|
}
|
|
3229
3351
|
const blocks = [];
|
|
3230
3352
|
const bodyEl = body[0];
|
|
@@ -3233,7 +3355,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3233
3355
|
const node = children[i];
|
|
3234
3356
|
if (node.nodeType !== 1) continue;
|
|
3235
3357
|
const el = node;
|
|
3236
|
-
const localName3 = _nullishCoalesce(el.localName, () => ( _optionalChain([el, 'access',
|
|
3358
|
+
const localName3 = _nullishCoalesce(el.localName, () => ( _optionalChain([el, 'access', _66 => _66.tagName, 'optionalAccess', _67 => _67.split, 'call', _68 => _68(":"), 'access', _69 => _69.pop, 'call', _70 => _70()])));
|
|
3237
3359
|
if (localName3 === "p") {
|
|
3238
3360
|
const block = parseParagraph(el, styles, numbering, footnotes, rels);
|
|
3239
3361
|
if (block) blocks.push(block);
|
|
@@ -3264,7 +3386,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3264
3386
|
}
|
|
3265
3387
|
}
|
|
3266
3388
|
const outline = blocks.filter((b) => b.type === "heading").map((b) => ({ level: _nullishCoalesce(b.level, () => ( 2)), text: _nullishCoalesce(b.text, () => ( "")) }));
|
|
3267
|
-
const markdown =
|
|
3389
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, blocks);
|
|
3268
3390
|
return {
|
|
3269
3391
|
markdown,
|
|
3270
3392
|
blocks,
|
|
@@ -3287,7 +3409,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3287
3409
|
}
|
|
3288
3410
|
const text = new TextDecoder("utf-8").decode(buffer).replace(/^\uFEFF/, "");
|
|
3289
3411
|
const normalized = text.replace(/ /g, " ");
|
|
3290
|
-
const xml =
|
|
3412
|
+
const xml = _chunkIVC5CB2Qcjs.stripDtd.call(void 0, normalized);
|
|
3291
3413
|
const warnings = [];
|
|
3292
3414
|
const parser = new (0, _xmldom.DOMParser)({
|
|
3293
3415
|
onError: (_level, msg) => {
|
|
@@ -3315,7 +3437,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3315
3437
|
return { markdown: "", blocks: [], metadata, warnings };
|
|
3316
3438
|
}
|
|
3317
3439
|
const blocks = [];
|
|
3318
|
-
const pageFilter = _optionalChain([options, 'optionalAccess',
|
|
3440
|
+
const pageFilter = _optionalChain([options, 'optionalAccess', _71 => _71.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, countSections(body)) : null;
|
|
3319
3441
|
let sectionIdx = 0;
|
|
3320
3442
|
const children = body.childNodes;
|
|
3321
3443
|
for (let i = 0; i < children.length; i++) {
|
|
@@ -3327,7 +3449,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3327
3449
|
parseSection2(el, blocks, paraShapeMap, sectionIdx, warnings);
|
|
3328
3450
|
}
|
|
3329
3451
|
const outline = blocks.filter((b) => b.type === "heading" && b.text).map((b) => ({ level: _nullishCoalesce(b.level, () => ( 1)), text: b.text, pageNumber: b.pageNumber }));
|
|
3330
|
-
const markdown =
|
|
3452
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, blocks);
|
|
3331
3453
|
return {
|
|
3332
3454
|
markdown,
|
|
3333
3455
|
blocks,
|
|
@@ -3397,7 +3519,7 @@ function parseParagraph2(el, blocks, paraShapeMap, sectionNum) {
|
|
|
3397
3519
|
const shapeInfo = paraShapeMap.get(paraShapeId);
|
|
3398
3520
|
const text = extractParagraphText(el);
|
|
3399
3521
|
if (!text) return;
|
|
3400
|
-
if (_optionalChain([shapeInfo, 'optionalAccess',
|
|
3522
|
+
if (_optionalChain([shapeInfo, 'optionalAccess', _72 => _72.headingLevel]) != null) {
|
|
3401
3523
|
blocks.push({ type: "heading", text, level: shapeInfo.headingLevel, pageNumber: sectionNum });
|
|
3402
3524
|
} else {
|
|
3403
3525
|
blocks.push({ type: "paragraph", text, pageNumber: sectionNum });
|
|
@@ -3469,7 +3591,7 @@ function parseTable2(el, blocks, paraShapeMap, sectionNum, warnings) {
|
|
|
3469
3591
|
const cellRows = grid.map(
|
|
3470
3592
|
(row) => row.map((cell) => _nullishCoalesce(cell, () => ( { text: "", colSpan: 1, rowSpan: 1 })))
|
|
3471
3593
|
);
|
|
3472
|
-
const table =
|
|
3594
|
+
const table = _chunkIVC5CB2Qcjs.buildTable.call(void 0, cellRows);
|
|
3473
3595
|
blocks.push({ type: "table", table, pageNumber: sectionNum });
|
|
3474
3596
|
}
|
|
3475
3597
|
function extractCellText(cellEl) {
|
|
@@ -3761,7 +3883,7 @@ function fillFormFields(blocks, values) {
|
|
|
3761
3883
|
if (block.type !== "table" || !block.table) continue;
|
|
3762
3884
|
for (let r = 0; r < block.table.rows; r++) {
|
|
3763
3885
|
for (let c = 0; c < block.table.cols; c++) {
|
|
3764
|
-
const cell = _optionalChain([block, 'access',
|
|
3886
|
+
const cell = _optionalChain([block, 'access', _73 => _73.table, 'access', _74 => _74.cells, 'access', _75 => _75[r], 'optionalAccess', _76 => _76[c]]);
|
|
3765
3887
|
if (!cell) continue;
|
|
3766
3888
|
const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
|
|
3767
3889
|
if (result) {
|
|
@@ -3800,7 +3922,7 @@ function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
|
|
|
3800
3922
|
const matchKey = findMatchingKey(normalizedCellLabel, values);
|
|
3801
3923
|
if (matchKey === void 0) continue;
|
|
3802
3924
|
const newValue = values.get(matchKey);
|
|
3803
|
-
if (_optionalChain([patternFilledCells, 'optionalAccess',
|
|
3925
|
+
if (_optionalChain([patternFilledCells, 'optionalAccess', _77 => _77.has, 'call', _78 => _78(`${r},${c + 1}`)])) {
|
|
3804
3926
|
valueCell.text = newValue + " " + valueCell.text;
|
|
3805
3927
|
} else {
|
|
3806
3928
|
valueCell.text = newValue;
|
|
@@ -3870,7 +3992,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
3870
3992
|
const normalizedValues = normalizeValues(values);
|
|
3871
3993
|
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
3872
3994
|
if (sectionFiles.length === 0) {
|
|
3873
|
-
throw new (0,
|
|
3995
|
+
throw new (0, _chunkIVC5CB2Qcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3874
3996
|
}
|
|
3875
3997
|
const xmlParser = new (0, _xmldom.DOMParser)();
|
|
3876
3998
|
const xmlSerializer = new (0, _xmldom.XMLSerializer)();
|
|
@@ -3878,7 +4000,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
3878
4000
|
const zipEntry = zip.file(sectionPath);
|
|
3879
4001
|
if (!zipEntry) continue;
|
|
3880
4002
|
const rawXml = await zipEntry.async("text");
|
|
3881
|
-
const doc = xmlParser.parseFromString(
|
|
4003
|
+
const doc = xmlParser.parseFromString(_chunkIVC5CB2Qcjs.stripDtd.call(void 0, rawXml), "text/xml");
|
|
3882
4004
|
if (!doc.documentElement) continue;
|
|
3883
4005
|
let modified = false;
|
|
3884
4006
|
const tables = findAllElements(doc.documentElement, "tbl");
|
|
@@ -4736,16 +4858,17 @@ function diffTableCells(a, b) {
|
|
|
4736
4858
|
// src/index.ts
|
|
4737
4859
|
async function parse(input, options) {
|
|
4738
4860
|
let buffer;
|
|
4861
|
+
const opts = typeof input === "string" && !_optionalChain([options, 'optionalAccess', _79 => _79.filePath]) ? { ...options, filePath: input } : options;
|
|
4739
4862
|
if (typeof input === "string") {
|
|
4740
4863
|
try {
|
|
4741
4864
|
const buf = await _promises.readFile.call(void 0, input);
|
|
4742
|
-
buffer =
|
|
4865
|
+
buffer = _chunkIVC5CB2Qcjs.toArrayBuffer.call(void 0, buf);
|
|
4743
4866
|
} catch (err) {
|
|
4744
4867
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
4745
4868
|
return { success: false, fileType: "unknown", error: msg, code: "PARSE_ERROR" };
|
|
4746
4869
|
}
|
|
4747
4870
|
} else if (Buffer.isBuffer(input)) {
|
|
4748
|
-
buffer =
|
|
4871
|
+
buffer = _chunkIVC5CB2Qcjs.toArrayBuffer.call(void 0, input);
|
|
4749
4872
|
} else {
|
|
4750
4873
|
buffer = input;
|
|
4751
4874
|
}
|
|
@@ -4756,16 +4879,16 @@ async function parse(input, options) {
|
|
|
4756
4879
|
switch (format) {
|
|
4757
4880
|
case "hwpx": {
|
|
4758
4881
|
const zipFormat = await detectZipFormat(buffer);
|
|
4759
|
-
if (zipFormat === "xlsx") return parseXlsx(buffer,
|
|
4760
|
-
if (zipFormat === "docx") return parseDocx(buffer,
|
|
4761
|
-
return parseHwpx(buffer,
|
|
4882
|
+
if (zipFormat === "xlsx") return parseXlsx(buffer, opts);
|
|
4883
|
+
if (zipFormat === "docx") return parseDocx(buffer, opts);
|
|
4884
|
+
return parseHwpx(buffer, opts);
|
|
4762
4885
|
}
|
|
4763
4886
|
case "hwp":
|
|
4764
|
-
return parseHwp(buffer,
|
|
4887
|
+
return parseHwp(buffer, opts);
|
|
4765
4888
|
case "hwpml":
|
|
4766
|
-
return parseHwpml(buffer,
|
|
4889
|
+
return parseHwpml(buffer, opts);
|
|
4767
4890
|
case "pdf":
|
|
4768
|
-
return parsePdf(buffer,
|
|
4891
|
+
return parsePdf(buffer, opts);
|
|
4769
4892
|
default:
|
|
4770
4893
|
return { success: false, fileType: "unknown", error: "\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD30C\uC77C \uD615\uC2DD\uC785\uB2C8\uB2E4.", code: "UNSUPPORTED_FORMAT" };
|
|
4771
4894
|
}
|
|
@@ -4773,23 +4896,23 @@ async function parse(input, options) {
|
|
|
4773
4896
|
async function parseHwpx(buffer, options) {
|
|
4774
4897
|
try {
|
|
4775
4898
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseHwpxDocument(buffer, options);
|
|
4776
|
-
return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess',
|
|
4899
|
+
return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _80 => _80.length]) ? images : void 0 };
|
|
4777
4900
|
} catch (err) {
|
|
4778
|
-
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4901
|
+
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err) };
|
|
4779
4902
|
}
|
|
4780
4903
|
}
|
|
4781
4904
|
async function parseHwp(buffer, options) {
|
|
4782
4905
|
try {
|
|
4783
4906
|
const { markdown, blocks, metadata, outline, warnings, images } = parseHwp5Document(Buffer.from(buffer), options);
|
|
4784
|
-
return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess',
|
|
4907
|
+
return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _81 => _81.length]) ? images : void 0 };
|
|
4785
4908
|
} catch (err) {
|
|
4786
|
-
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4909
|
+
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err) };
|
|
4787
4910
|
}
|
|
4788
4911
|
}
|
|
4789
4912
|
async function parsePdf(buffer, options) {
|
|
4790
4913
|
let parsePdfDocument;
|
|
4791
4914
|
try {
|
|
4792
|
-
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-
|
|
4915
|
+
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-ZORW4RSC.cjs")));
|
|
4793
4916
|
parsePdfDocument = mod.parsePdfDocument;
|
|
4794
4917
|
} catch (e26) {
|
|
4795
4918
|
return {
|
|
@@ -4804,7 +4927,7 @@ async function parsePdf(buffer, options) {
|
|
|
4804
4927
|
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
|
|
4805
4928
|
} catch (err) {
|
|
4806
4929
|
const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
|
|
4807
|
-
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4930
|
+
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err), isImageBased };
|
|
4808
4931
|
}
|
|
4809
4932
|
}
|
|
4810
4933
|
async function parseXlsx(buffer, options) {
|
|
@@ -4812,15 +4935,15 @@ async function parseXlsx(buffer, options) {
|
|
|
4812
4935
|
const { markdown, blocks, metadata, warnings } = await parseXlsxDocument(buffer, options);
|
|
4813
4936
|
return { success: true, fileType: "xlsx", markdown, blocks, metadata, warnings };
|
|
4814
4937
|
} catch (err) {
|
|
4815
|
-
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4938
|
+
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err) };
|
|
4816
4939
|
}
|
|
4817
4940
|
}
|
|
4818
4941
|
async function parseDocx(buffer, options) {
|
|
4819
4942
|
try {
|
|
4820
4943
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseDocxDocument(buffer, options);
|
|
4821
|
-
return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess',
|
|
4944
|
+
return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _82 => _82.length]) ? images : void 0 };
|
|
4822
4945
|
} catch (err) {
|
|
4823
|
-
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4946
|
+
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err) };
|
|
4824
4947
|
}
|
|
4825
4948
|
}
|
|
4826
4949
|
async function parseHwpml(buffer, options) {
|
|
@@ -4828,16 +4951,16 @@ async function parseHwpml(buffer, options) {
|
|
|
4828
4951
|
const { markdown, blocks, metadata, outline, warnings } = parseHwpmlDocument(buffer, options);
|
|
4829
4952
|
return { success: true, fileType: "hwpml", markdown, blocks, metadata, outline, warnings };
|
|
4830
4953
|
} catch (err) {
|
|
4831
|
-
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4954
|
+
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code: _chunkIVC5CB2Qcjs.classifyError.call(void 0, err) };
|
|
4832
4955
|
}
|
|
4833
4956
|
}
|
|
4834
4957
|
async function fillForm(input, values, outputFormat = "markdown") {
|
|
4835
4958
|
let buffer;
|
|
4836
4959
|
if (typeof input === "string") {
|
|
4837
4960
|
const buf = await _promises.readFile.call(void 0, input);
|
|
4838
|
-
buffer =
|
|
4961
|
+
buffer = _chunkIVC5CB2Qcjs.toArrayBuffer.call(void 0, buf);
|
|
4839
4962
|
} else if (Buffer.isBuffer(input)) {
|
|
4840
|
-
buffer =
|
|
4963
|
+
buffer = _chunkIVC5CB2Qcjs.toArrayBuffer.call(void 0, input);
|
|
4841
4964
|
} else {
|
|
4842
4965
|
buffer = input;
|
|
4843
4966
|
}
|
|
@@ -4863,7 +4986,7 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
4863
4986
|
throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
|
|
4864
4987
|
}
|
|
4865
4988
|
const fill = fillFormFields(parsed.blocks, values);
|
|
4866
|
-
const markdown =
|
|
4989
|
+
const markdown = _chunkIVC5CB2Qcjs.blocksToMarkdown.call(void 0, fill.blocks);
|
|
4867
4990
|
if (outputFormat === "hwpx") {
|
|
4868
4991
|
const hwpxBuffer = await markdownToHwpx(markdown);
|
|
4869
4992
|
return { output: hwpxBuffer, format: "hwpx", fill };
|
|
@@ -4894,5 +5017,5 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
4894
5017
|
|
|
4895
5018
|
|
|
4896
5019
|
|
|
4897
|
-
exports.VERSION =
|
|
5020
|
+
exports.VERSION = _chunkIVC5CB2Qcjs.VERSION; exports.blocksToMarkdown = _chunkIVC5CB2Qcjs.blocksToMarkdown; exports.compare = compare; exports.detectFormat = detectFormat; exports.detectZipFormat = detectZipFormat; exports.diffBlocks = diffBlocks; exports.extractFormFields = extractFormFields; exports.fillForm = fillForm; exports.fillFormFields = fillFormFields; exports.fillHwpx = fillHwpx; exports.isHwpxFile = isHwpxFile; exports.isLabelCell = isLabelCell; exports.isOldHwpFile = isOldHwpFile; exports.isPdfFile = isPdfFile; exports.isZipFile = isZipFile; exports.markdownToHwpx = markdownToHwpx; exports.parse = parse; exports.parseDocx = parseDocx; exports.parseHwp = parseHwp; exports.parseHwpml = parseHwpml; exports.parseHwpx = parseHwpx; exports.parsePdf = parsePdf; exports.parseXlsx = parseXlsx;
|
|
4898
5021
|
//# sourceMappingURL=index.cjs.map
|