kordoc 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/dist/{chunk-OEJJPCMM.js → chunk-KSBPABBQ.js} +117 -9
- package/dist/chunk-KSBPABBQ.js.map +1 -0
- package/dist/{chunk-Z7UPTVMX.js → chunk-VJPDY4YT.js} +2 -2
- package/dist/{chunk-Z7UPTVMX.js.map → chunk-VJPDY4YT.js.map} +1 -1
- package/dist/{chunk-ZNJPRRIA.cjs → chunk-VLSATRNQ.cjs} +2 -2
- package/dist/{chunk-ZNJPRRIA.cjs.map → chunk-VLSATRNQ.cjs.map} +1 -1
- package/dist/{chunk-JFTFC2BB.js → chunk-XG5CQUSC.js} +2 -2
- package/dist/{chunk-JFTFC2BB.js.map → chunk-XG5CQUSC.js.map} +1 -1
- package/dist/cli.js +4 -4
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +239 -131
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +116 -8
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +3 -3
- package/dist/{parser-4LKJXBPP.js → parser-4275GJRB.js} +2 -2
- package/dist/{parser-KBQZB3QY.cjs → parser-STAOZMUC.cjs} +15 -15
- package/dist/{parser-KBQZB3QY.cjs.map → parser-STAOZMUC.cjs.map} +1 -1
- package/dist/{parser-25LF2S2J.js → parser-XRUZEFZT.js} +2 -2
- package/dist/{watch-GXRBLW3Y.js → watch-BFLNFJBE.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-OEJJPCMM.js.map +0 -1
- /package/dist/{parser-4LKJXBPP.js.map → parser-4275GJRB.js.map} +0 -0
- /package/dist/{parser-25LF2S2J.js.map → parser-XRUZEFZT.js.map} +0 -0
- /package/dist/{watch-GXRBLW3Y.js.map → watch-BFLNFJBE.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
var
|
|
19
|
+
var _chunkVLSATRNQcjs = require('./chunk-VLSATRNQ.cjs');
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -75,6 +75,100 @@ async function detectZipFormat(buffer) {
|
|
|
75
75
|
|
|
76
76
|
var _zlib = require('zlib');
|
|
77
77
|
var _xmldom = require('@xmldom/xmldom');
|
|
78
|
+
|
|
79
|
+
// src/hwpx/com-fallback.ts
|
|
80
|
+
var _child_process = require('child_process');
|
|
81
|
+
var _os = require('os');
|
|
82
|
+
function isComFallbackAvailable() {
|
|
83
|
+
return _os.platform.call(void 0, ) === "win32";
|
|
84
|
+
}
|
|
85
|
+
function isEncryptedHwpx(manifestXml) {
|
|
86
|
+
return manifestXml.includes("encryption-data");
|
|
87
|
+
}
|
|
88
|
+
function extractTextViaCom(filePath) {
|
|
89
|
+
if (!isComFallbackAvailable()) {
|
|
90
|
+
throw new Error("COM fallback\uC740 Windows\uC5D0\uC11C\uB9CC \uC0AC\uC6A9 \uAC00\uB2A5\uD569\uB2C8\uB2E4");
|
|
91
|
+
}
|
|
92
|
+
const escaped = filePath.replace(/'/g, "''");
|
|
93
|
+
const ps1 = `
|
|
94
|
+
[Console]::OutputEncoding = [System.Text.Encoding]::UTF8
|
|
95
|
+
$ErrorActionPreference = 'Stop'
|
|
96
|
+
try {
|
|
97
|
+
$hwp = New-Object -ComObject HWPFrame.HwpObject
|
|
98
|
+
$hwp.RegisterModule('FilePathCheckerModule', 'FilePathCheckerModuleExample') | Out-Null
|
|
99
|
+
$hwp.Open('${escaped}', '', '') | Out-Null
|
|
100
|
+
$pc = $hwp.PageCount
|
|
101
|
+
$result = @{ pageCount = $pc; pages = @() }
|
|
102
|
+
for ($p = 1; $p -le $pc; $p++) {
|
|
103
|
+
$t = $hwp.GetPageText($p, 0)
|
|
104
|
+
$result.pages += @($t)
|
|
105
|
+
}
|
|
106
|
+
$hwp.Clear(1)
|
|
107
|
+
[System.Runtime.InteropServices.Marshal]::ReleaseComObject($hwp) | Out-Null
|
|
108
|
+
$result | ConvertTo-Json -Depth 3 -Compress
|
|
109
|
+
} catch {
|
|
110
|
+
@{ error = $_.Exception.Message } | ConvertTo-Json -Compress
|
|
111
|
+
}
|
|
112
|
+
`;
|
|
113
|
+
const stdout = _child_process.execFileSync.call(void 0, "powershell", [
|
|
114
|
+
"-NoProfile",
|
|
115
|
+
"-NonInteractive",
|
|
116
|
+
"-ExecutionPolicy",
|
|
117
|
+
"Bypass",
|
|
118
|
+
"-Command",
|
|
119
|
+
ps1
|
|
120
|
+
], {
|
|
121
|
+
encoding: "utf-8",
|
|
122
|
+
timeout: 12e4,
|
|
123
|
+
// 2분 타임아웃
|
|
124
|
+
windowsHide: true,
|
|
125
|
+
maxBuffer: 50 * 1024 * 1024
|
|
126
|
+
// 50MB
|
|
127
|
+
});
|
|
128
|
+
const trimmed = stdout.trim();
|
|
129
|
+
const jsonStart = trimmed.indexOf("{");
|
|
130
|
+
if (jsonStart < 0) throw new Error(`COM \uCD9C\uB825\uC5D0 JSON\uC774 \uC5C6\uC2B5\uB2C8\uB2E4: ${trimmed.slice(0, 200)}`);
|
|
131
|
+
const json = JSON.parse(trimmed.slice(jsonStart));
|
|
132
|
+
if (json.error) {
|
|
133
|
+
throw new Error(`COM \uD14D\uC2A4\uD2B8 \uCD94\uCD9C \uC2E4\uD328: ${json.error}`);
|
|
134
|
+
}
|
|
135
|
+
const warnings = [];
|
|
136
|
+
const pages = Array.isArray(json.pages) ? json.pages : [];
|
|
137
|
+
const pageCount = _nullishCoalesce(json.pageCount, () => ( pages.length));
|
|
138
|
+
if (pages.length === 0) {
|
|
139
|
+
warnings.push({ message: "COM\uC73C\uB85C \uD14D\uC2A4\uD2B8\uB97C \uCD94\uCD9C\uD558\uC9C0 \uBABB\uD588\uC2B5\uB2C8\uB2E4", code: "COM_EMPTY" });
|
|
140
|
+
}
|
|
141
|
+
return { pages, pageCount, warnings };
|
|
142
|
+
}
|
|
143
|
+
function comResultToParseResult(pages, pageCount, warnings) {
|
|
144
|
+
const blocks = [];
|
|
145
|
+
const lines = [];
|
|
146
|
+
for (let i = 0; i < pages.length; i++) {
|
|
147
|
+
const text = (_nullishCoalesce(pages[i], () => ( ""))).trim();
|
|
148
|
+
if (!text) continue;
|
|
149
|
+
const paragraphs = text.split(/\n/);
|
|
150
|
+
for (const para of paragraphs) {
|
|
151
|
+
const trimmed = para.trim();
|
|
152
|
+
if (!trimmed) continue;
|
|
153
|
+
blocks.push({ type: "paragraph", text: trimmed, pageNumber: i + 1 });
|
|
154
|
+
lines.push(trimmed);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
const markdown = lines.join("\n\n");
|
|
158
|
+
const metadata = { pageCount };
|
|
159
|
+
warnings.push({
|
|
160
|
+
message: "DRM \uBB38\uC11C: \uD55C\uCEF4 COM API\uB85C \uD14D\uC2A4\uD2B8 \uCD94\uCD9C (\uC11C\uC2DD/\uD45C \uC815\uBCF4 \uC81C\uD55C\uC801)",
|
|
161
|
+
code: "DRM_COM_FALLBACK"
|
|
162
|
+
});
|
|
163
|
+
return {
|
|
164
|
+
markdown,
|
|
165
|
+
blocks,
|
|
166
|
+
metadata,
|
|
167
|
+
warnings: warnings.length > 0 ? warnings : void 0
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// src/hwpx/parser.ts
|
|
78
172
|
var MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024;
|
|
79
173
|
var MAX_ZIP_ENTRIES = 500;
|
|
80
174
|
function clampSpan(val, max) {
|
|
@@ -84,7 +178,7 @@ var MAX_XML_DEPTH = 200;
|
|
|
84
178
|
function createXmlParser(warnings) {
|
|
85
179
|
return new (0, _xmldom.DOMParser)({
|
|
86
180
|
onError(level, msg) {
|
|
87
|
-
if (level === "fatalError") throw new (0,
|
|
181
|
+
if (level === "fatalError") throw new (0, _chunkVLSATRNQcjs.KordocError)(`XML \uD30C\uC2F1 \uC2E4\uD328: ${msg}`);
|
|
88
182
|
_optionalChain([warnings, 'optionalAccess', _2 => _2.push, 'call', _3 => _3({ code: "MALFORMED_XML", message: `XML ${level === "warn" ? "\uACBD\uACE0" : "\uC624\uB958"}: ${msg}` })]);
|
|
89
183
|
}
|
|
90
184
|
});
|
|
@@ -103,10 +197,10 @@ async function extractHwpxStyles(zip, decompressed) {
|
|
|
103
197
|
const xml = await file.async("text");
|
|
104
198
|
if (decompressed) {
|
|
105
199
|
decompressed.total += xml.length * 2;
|
|
106
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
200
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkVLSATRNQcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
107
201
|
}
|
|
108
202
|
const parser = createXmlParser();
|
|
109
|
-
const doc = parser.parseFromString(
|
|
203
|
+
const doc = parser.parseFromString(_chunkVLSATRNQcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
110
204
|
if (!doc.documentElement) continue;
|
|
111
205
|
parseCharProperties(doc, result.charProperties);
|
|
112
206
|
parseStyleElements(doc, result.styles);
|
|
@@ -168,7 +262,7 @@ function parseStyleElements(doc, map) {
|
|
|
168
262
|
}
|
|
169
263
|
}
|
|
170
264
|
async function parseHwpxDocument(buffer, options) {
|
|
171
|
-
|
|
265
|
+
_chunkVLSATRNQcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE, MAX_ZIP_ENTRIES);
|
|
172
266
|
let zip;
|
|
173
267
|
try {
|
|
174
268
|
zip = await _jszip2.default.loadAsync(buffer);
|
|
@@ -177,7 +271,20 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
177
271
|
}
|
|
178
272
|
const actualEntryCount = Object.keys(zip.files).length;
|
|
179
273
|
if (actualEntryCount > MAX_ZIP_ENTRIES) {
|
|
180
|
-
throw new (0,
|
|
274
|
+
throw new (0, _chunkVLSATRNQcjs.KordocError)("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
275
|
+
}
|
|
276
|
+
const manifestFile = zip.file("META-INF/manifest.xml");
|
|
277
|
+
if (manifestFile) {
|
|
278
|
+
const manifestXml = await manifestFile.async("text");
|
|
279
|
+
if (isEncryptedHwpx(manifestXml)) {
|
|
280
|
+
if (isComFallbackAvailable() && _optionalChain([options, 'optionalAccess', _4 => _4.filePath])) {
|
|
281
|
+
const { pages, pageCount, warnings: warnings2 } = extractTextViaCom(options.filePath);
|
|
282
|
+
if (pages.some((p) => p && p.trim().length > 0)) {
|
|
283
|
+
return comResultToParseResult(pages, pageCount, warnings2);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
throw new (0, _chunkVLSATRNQcjs.KordocError)("DRM \uC554\uD638\uD654\uB41C HWPX \uD30C\uC77C\uC785\uB2C8\uB2E4. Windows + \uD55C\uCEF4 \uC624\uD53C\uC2A4 \uC124\uCE58 \uC2DC \uC790\uB3D9 \uCD94\uCD9C\uB429\uB2C8\uB2E4.");
|
|
287
|
+
}
|
|
181
288
|
}
|
|
182
289
|
const decompressed = { total: 0 };
|
|
183
290
|
const metadata = {};
|
|
@@ -185,9 +292,9 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
185
292
|
const styleMap = await extractHwpxStyles(zip, decompressed);
|
|
186
293
|
const warnings = [];
|
|
187
294
|
const sectionPaths = await resolveSectionPaths(zip);
|
|
188
|
-
if (sectionPaths.length === 0) throw new (0,
|
|
295
|
+
if (sectionPaths.length === 0) throw new (0, _chunkVLSATRNQcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
189
296
|
metadata.pageCount = sectionPaths.length;
|
|
190
|
-
const pageFilter = _optionalChain([options, 'optionalAccess',
|
|
297
|
+
const pageFilter = _optionalChain([options, 'optionalAccess', _5 => _5.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sectionPaths.length) : null;
|
|
191
298
|
const totalTarget = pageFilter ? pageFilter.size : sectionPaths.length;
|
|
192
299
|
const blocks = [];
|
|
193
300
|
const nestedTableCounter = { count: 0 };
|
|
@@ -199,19 +306,19 @@ async function parseHwpxDocument(buffer, options) {
|
|
|
199
306
|
try {
|
|
200
307
|
const xml = await file.async("text");
|
|
201
308
|
decompressed.total += xml.length * 2;
|
|
202
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
309
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkVLSATRNQcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
203
310
|
blocks.push(...parseSectionXml(xml, styleMap, warnings, si + 1, nestedTableCounter));
|
|
204
311
|
parsedSections++;
|
|
205
|
-
_optionalChain([options, 'optionalAccess',
|
|
312
|
+
_optionalChain([options, 'optionalAccess', _6 => _6.onProgress, 'optionalCall', _7 => _7(parsedSections, totalTarget)]);
|
|
206
313
|
} catch (secErr) {
|
|
207
|
-
if (secErr instanceof
|
|
314
|
+
if (secErr instanceof _chunkVLSATRNQcjs.KordocError) throw secErr;
|
|
208
315
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
209
316
|
}
|
|
210
317
|
}
|
|
211
318
|
const images = await extractImagesFromZip(zip, blocks, decompressed, warnings);
|
|
212
319
|
detectHwpxHeadings(blocks, styleMap);
|
|
213
320
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
214
|
-
const markdown =
|
|
321
|
+
const markdown = _chunkVLSATRNQcjs.blocksToMarkdown.call(void 0, blocks);
|
|
215
322
|
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
216
323
|
}
|
|
217
324
|
function imageExtToMime(ext) {
|
|
@@ -275,13 +382,13 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
275
382
|
let found = false;
|
|
276
383
|
const allCandidates = resolvedPath ? [resolvedPath, ...candidates] : candidates;
|
|
277
384
|
for (const path of allCandidates) {
|
|
278
|
-
if (
|
|
385
|
+
if (_chunkVLSATRNQcjs.isPathTraversal.call(void 0, path)) continue;
|
|
279
386
|
const file = zip.file(path);
|
|
280
387
|
if (!file) continue;
|
|
281
388
|
try {
|
|
282
389
|
const data = await file.async("uint8array");
|
|
283
390
|
decompressed.total += data.length;
|
|
284
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
391
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkVLSATRNQcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
285
392
|
const actualPath = path;
|
|
286
393
|
const ext = actualPath.includes(".") ? actualPath.split(".").pop() || "png" : "png";
|
|
287
394
|
const mimeType = imageExtToMime(ext);
|
|
@@ -293,11 +400,11 @@ async function extractImagesFromZip(zip, blocks, decompressed, warnings) {
|
|
|
293
400
|
found = true;
|
|
294
401
|
break;
|
|
295
402
|
} catch (err) {
|
|
296
|
-
if (err instanceof
|
|
403
|
+
if (err instanceof _chunkVLSATRNQcjs.KordocError) throw err;
|
|
297
404
|
}
|
|
298
405
|
}
|
|
299
406
|
if (!found) {
|
|
300
|
-
_optionalChain([warnings, 'optionalAccess',
|
|
407
|
+
_optionalChain([warnings, 'optionalAccess', _8 => _8.push, 'call', _9 => _9({ page: block.pageNumber, message: `\uC774\uBBF8\uC9C0 \uD30C\uC77C \uC5C6\uC74C: ${ref}`, code: "SKIPPED_IMAGE" })]);
|
|
301
408
|
block.type = "paragraph";
|
|
302
409
|
block.text = `[\uC774\uBBF8\uC9C0: ${ref}]`;
|
|
303
410
|
}
|
|
@@ -313,7 +420,7 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
313
420
|
const xml = await file.async("text");
|
|
314
421
|
if (decompressed) {
|
|
315
422
|
decompressed.total += xml.length * 2;
|
|
316
|
-
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
423
|
+
if (decompressed.total > MAX_DECOMPRESS_SIZE) throw new (0, _chunkVLSATRNQcjs.KordocError)("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
317
424
|
}
|
|
318
425
|
parseDublinCoreMetadata(xml, metadata);
|
|
319
426
|
if (metadata.title || metadata.author) return;
|
|
@@ -323,13 +430,13 @@ async function extractHwpxMetadata(zip, metadata, decompressed) {
|
|
|
323
430
|
}
|
|
324
431
|
function parseDublinCoreMetadata(xml, metadata) {
|
|
325
432
|
const parser = createXmlParser();
|
|
326
|
-
const doc = parser.parseFromString(
|
|
433
|
+
const doc = parser.parseFromString(_chunkVLSATRNQcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
327
434
|
if (!doc.documentElement) return;
|
|
328
435
|
const getText = (tagNames) => {
|
|
329
436
|
for (const tag of tagNames) {
|
|
330
437
|
const els = doc.getElementsByTagName(tag);
|
|
331
438
|
if (els.length > 0) {
|
|
332
|
-
const text = _optionalChain([els, 'access',
|
|
439
|
+
const text = _optionalChain([els, 'access', _10 => _10[0], 'access', _11 => _11.textContent, 'optionalAccess', _12 => _12.trim, 'call', _13 => _13()]);
|
|
333
440
|
if (text) return text;
|
|
334
441
|
}
|
|
335
442
|
}
|
|
@@ -383,7 +490,7 @@ function extractFromBrokenZip(buffer) {
|
|
|
383
490
|
}
|
|
384
491
|
const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
|
|
385
492
|
const name = new TextDecoder().decode(nameBytes);
|
|
386
|
-
if (
|
|
493
|
+
if (_chunkVLSATRNQcjs.isPathTraversal.call(void 0, name)) {
|
|
387
494
|
pos = fileStart + compSize;
|
|
388
495
|
continue;
|
|
389
496
|
}
|
|
@@ -401,15 +508,15 @@ function extractFromBrokenZip(buffer) {
|
|
|
401
508
|
continue;
|
|
402
509
|
}
|
|
403
510
|
totalDecompressed += content.length * 2;
|
|
404
|
-
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0,
|
|
511
|
+
if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new (0, _chunkVLSATRNQcjs.KordocError)("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
|
|
405
512
|
sectionNum++;
|
|
406
513
|
blocks.push(...parseSectionXml(content, void 0, warnings, sectionNum, nestedTableCounter));
|
|
407
514
|
} catch (e6) {
|
|
408
515
|
continue;
|
|
409
516
|
}
|
|
410
517
|
}
|
|
411
|
-
if (blocks.length === 0) throw new (0,
|
|
412
|
-
const markdown =
|
|
518
|
+
if (blocks.length === 0) throw new (0, _chunkVLSATRNQcjs.KordocError)("\uC190\uC0C1\uB41C HWPX\uC5D0\uC11C \uC139\uC158 \uB370\uC774\uD130\uB97C \uBCF5\uAD6C\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
519
|
+
const markdown = _chunkVLSATRNQcjs.blocksToMarkdown.call(void 0, blocks);
|
|
413
520
|
return { markdown, blocks, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
414
521
|
}
|
|
415
522
|
async function resolveSectionPaths(zip) {
|
|
@@ -427,7 +534,7 @@ async function resolveSectionPaths(zip) {
|
|
|
427
534
|
}
|
|
428
535
|
function parseSectionPathsFromManifest(xml) {
|
|
429
536
|
const parser = createXmlParser();
|
|
430
|
-
const doc = parser.parseFromString(
|
|
537
|
+
const doc = parser.parseFromString(_chunkVLSATRNQcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
431
538
|
const items = doc.getElementsByTagName("opf:item");
|
|
432
539
|
const spine = doc.getElementsByTagName("opf:itemref");
|
|
433
540
|
const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
|
|
@@ -456,7 +563,7 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
456
563
|
let baseFontSize = 0;
|
|
457
564
|
const sizeFreq = /* @__PURE__ */ new Map();
|
|
458
565
|
for (const b of blocks) {
|
|
459
|
-
if (_optionalChain([b, 'access',
|
|
566
|
+
if (_optionalChain([b, 'access', _14 => _14.style, 'optionalAccess', _15 => _15.fontSize])) {
|
|
460
567
|
sizeFreq.set(b.style.fontSize, (sizeFreq.get(b.style.fontSize) || 0) + 1);
|
|
461
568
|
}
|
|
462
569
|
}
|
|
@@ -472,11 +579,11 @@ function detectHwpxHeadings(blocks, styleMap) {
|
|
|
472
579
|
const text = block.text.trim();
|
|
473
580
|
if (text.length === 0 || text.length > 200 || /^\d+$/.test(text)) continue;
|
|
474
581
|
let level = 0;
|
|
475
|
-
if (baseFontSize > 0 && _optionalChain([block, 'access',
|
|
582
|
+
if (baseFontSize > 0 && _optionalChain([block, 'access', _16 => _16.style, 'optionalAccess', _17 => _17.fontSize])) {
|
|
476
583
|
const ratio = block.style.fontSize / baseFontSize;
|
|
477
|
-
if (ratio >=
|
|
478
|
-
else if (ratio >=
|
|
479
|
-
else if (ratio >=
|
|
584
|
+
if (ratio >= _chunkVLSATRNQcjs.HEADING_RATIO_H1) level = 1;
|
|
585
|
+
else if (ratio >= _chunkVLSATRNQcjs.HEADING_RATIO_H2) level = 2;
|
|
586
|
+
else if (ratio >= _chunkVLSATRNQcjs.HEADING_RATIO_H3) level = 3;
|
|
480
587
|
}
|
|
481
588
|
const compactText = text.replace(/\s+/g, "");
|
|
482
589
|
if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
|
|
@@ -501,13 +608,13 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
501
608
|
let nestedCols = 0;
|
|
502
609
|
for (const r of newTable.rows) if (r.length > nestedCols) nestedCols = r.length;
|
|
503
610
|
if (newTable.rows.length >= 3 && nestedCols >= 2) {
|
|
504
|
-
blocks.push({ type: "table", table:
|
|
611
|
+
blocks.push({ type: "table", table: _chunkVLSATRNQcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
505
612
|
if (parentTable.cell) {
|
|
506
613
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
507
614
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker;
|
|
508
615
|
}
|
|
509
616
|
} else {
|
|
510
|
-
const nestedText =
|
|
617
|
+
const nestedText = _chunkVLSATRNQcjs.convertTableToText.call(void 0, newTable.rows);
|
|
511
618
|
if (parentTable.cell) {
|
|
512
619
|
const marker = ctx.counter ? makeNestedTableMarker(ctx.counter, newTable.rows) : "[\uC911\uCCA9 \uD14C\uC774\uBE14]";
|
|
513
620
|
parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + marker + "\n" + nestedText;
|
|
@@ -517,7 +624,7 @@ function handleNestedTable(newTable, tableStack, blocks, ctx) {
|
|
|
517
624
|
}
|
|
518
625
|
function parseSectionXml(xml, styleMap, warnings, sectionNum, counter) {
|
|
519
626
|
const parser = createXmlParser(warnings);
|
|
520
|
-
const doc = parser.parseFromString(
|
|
627
|
+
const doc = parser.parseFromString(_chunkVLSATRNQcjs.stripDtd.call(void 0, xml), "text/xml");
|
|
521
628
|
if (!doc.documentElement) return [];
|
|
522
629
|
const blocks = [];
|
|
523
630
|
const ctx = { styleMap, warnings, sectionNum, counter };
|
|
@@ -560,7 +667,7 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
560
667
|
if (tableStack.length > 0) {
|
|
561
668
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
562
669
|
} else {
|
|
563
|
-
blocks.push({ type: "table", table:
|
|
670
|
+
blocks.push({ type: "table", table: _chunkVLSATRNQcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
564
671
|
tableCtx = null;
|
|
565
672
|
}
|
|
566
673
|
} else {
|
|
@@ -587,7 +694,7 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
587
694
|
}
|
|
588
695
|
break;
|
|
589
696
|
case "cellAddr":
|
|
590
|
-
if (_optionalChain([tableCtx, 'optionalAccess',
|
|
697
|
+
if (_optionalChain([tableCtx, 'optionalAccess', _18 => _18.cell])) {
|
|
591
698
|
const ca = parseInt(el.getAttribute("colAddr") || "", 10);
|
|
592
699
|
const ra = parseInt(el.getAttribute("rowAddr") || "", 10);
|
|
593
700
|
if (!isNaN(ca)) tableCtx.cell.colAddr = ca;
|
|
@@ -595,19 +702,19 @@ function walkSection(node, blocks, tableCtx, tableStack, ctx, depth = 0) {
|
|
|
595
702
|
}
|
|
596
703
|
break;
|
|
597
704
|
case "cellSpan":
|
|
598
|
-
if (_optionalChain([tableCtx, 'optionalAccess',
|
|
705
|
+
if (_optionalChain([tableCtx, 'optionalAccess', _19 => _19.cell])) {
|
|
599
706
|
const rawCs = parseInt(el.getAttribute("colSpan") || "1", 10);
|
|
600
707
|
const cs = isNaN(rawCs) ? 1 : rawCs;
|
|
601
708
|
const rawRs = parseInt(el.getAttribute("rowSpan") || "1", 10);
|
|
602
709
|
const rs = isNaN(rawRs) ? 1 : rawRs;
|
|
603
|
-
tableCtx.cell.colSpan = clampSpan(cs,
|
|
604
|
-
tableCtx.cell.rowSpan = clampSpan(rs,
|
|
710
|
+
tableCtx.cell.colSpan = clampSpan(cs, _chunkVLSATRNQcjs.MAX_COLS);
|
|
711
|
+
tableCtx.cell.rowSpan = clampSpan(rs, _chunkVLSATRNQcjs.MAX_ROWS);
|
|
605
712
|
}
|
|
606
713
|
break;
|
|
607
714
|
case "p": {
|
|
608
715
|
const { text, href, footnote, style } = extractParagraphInfo(el, ctx.styleMap);
|
|
609
716
|
if (text) {
|
|
610
|
-
if (_optionalChain([tableCtx, 'optionalAccess',
|
|
717
|
+
if (_optionalChain([tableCtx, 'optionalAccess', _20 => _20.cell])) {
|
|
611
718
|
tableCtx.cell.text += (tableCtx.cell.text ? "\n" : "") + text;
|
|
612
719
|
} else if (!tableCtx) {
|
|
613
720
|
const block = { type: "paragraph", text, pageNumber: ctx.sectionNum };
|
|
@@ -659,7 +766,7 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, ctx, depth =
|
|
|
659
766
|
if (tableStack.length > 0) {
|
|
660
767
|
tableCtx = handleNestedTable(newTable, tableStack, blocks, ctx);
|
|
661
768
|
} else {
|
|
662
|
-
blocks.push({ type: "table", table:
|
|
769
|
+
blocks.push({ type: "table", table: _chunkVLSATRNQcjs.buildTable.call(void 0, newTable.rows), pageNumber: ctx.sectionNum });
|
|
663
770
|
tableCtx = null;
|
|
664
771
|
}
|
|
665
772
|
} else {
|
|
@@ -767,7 +874,7 @@ function extractParagraphInfo(para, styleMap) {
|
|
|
767
874
|
case "hyperlink": {
|
|
768
875
|
const url = child.getAttribute("url") || child.getAttribute("href") || "";
|
|
769
876
|
if (url) {
|
|
770
|
-
const safe =
|
|
877
|
+
const safe = _chunkVLSATRNQcjs.sanitizeHref.call(void 0, url);
|
|
771
878
|
if (safe) href = safe;
|
|
772
879
|
}
|
|
773
880
|
walk(child);
|
|
@@ -907,7 +1014,7 @@ function decompressStream(data) {
|
|
|
907
1014
|
return _zlib.inflateRawSync.call(void 0, data, opts);
|
|
908
1015
|
}
|
|
909
1016
|
function parseFileHeader(data) {
|
|
910
|
-
if (data.length < 40) throw new (0,
|
|
1017
|
+
if (data.length < 40) throw new (0, _chunkVLSATRNQcjs.KordocError)("FileHeader\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 40\uBC14\uC774\uD2B8)");
|
|
911
1018
|
const sig = data.subarray(0, 32).toString("utf8").replace(/\0+$/, "");
|
|
912
1019
|
return {
|
|
913
1020
|
signature: sig,
|
|
@@ -1926,22 +2033,22 @@ function parseHwp5Document(buffer, options) {
|
|
|
1926
2033
|
lenientCfb = parseLenientCfb(buffer);
|
|
1927
2034
|
warnings.push({ message: "\uC190\uC0C1\uB41C CFB \uCEE8\uD14C\uC774\uB108 \u2014 lenient \uBAA8\uB4DC\uB85C \uBCF5\uAD6C", code: "LENIENT_CFB_RECOVERY" });
|
|
1928
2035
|
} catch (e11) {
|
|
1929
|
-
throw new (0,
|
|
2036
|
+
throw new (0, _chunkVLSATRNQcjs.KordocError)("CFB \uCEE8\uD14C\uC774\uB108 \uD30C\uC2F1 \uC2E4\uD328 (strict \uBC0F lenient \uBAA8\uB450)");
|
|
1930
2037
|
}
|
|
1931
2038
|
}
|
|
1932
2039
|
const findStream = (path) => {
|
|
1933
2040
|
if (cfb) {
|
|
1934
2041
|
const entry = CFB.find(cfb, path);
|
|
1935
|
-
return _optionalChain([entry, 'optionalAccess',
|
|
2042
|
+
return _optionalChain([entry, 'optionalAccess', _21 => _21.content]) ? Buffer.from(entry.content) : null;
|
|
1936
2043
|
}
|
|
1937
2044
|
return lenientCfb.findStream(path);
|
|
1938
2045
|
};
|
|
1939
2046
|
const headerData = findStream("/FileHeader");
|
|
1940
|
-
if (!headerData) throw new (0,
|
|
2047
|
+
if (!headerData) throw new (0, _chunkVLSATRNQcjs.KordocError)("FileHeader \uC2A4\uD2B8\uB9BC \uC5C6\uC74C");
|
|
1941
2048
|
const header = parseFileHeader(headerData);
|
|
1942
|
-
if (header.signature !== "HWP Document File") throw new (0,
|
|
1943
|
-
if (header.flags & FLAG_ENCRYPTED) throw new (0,
|
|
1944
|
-
if (header.flags & FLAG_DRM) throw new (0,
|
|
2049
|
+
if (header.signature !== "HWP Document File") throw new (0, _chunkVLSATRNQcjs.KordocError)("HWP \uC2DC\uADF8\uB2C8\uCC98 \uBD88\uC77C\uCE58");
|
|
2050
|
+
if (header.flags & FLAG_ENCRYPTED) throw new (0, _chunkVLSATRNQcjs.KordocError)("\uC554\uD638\uD654\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
2051
|
+
if (header.flags & FLAG_DRM) throw new (0, _chunkVLSATRNQcjs.KordocError)("DRM \uBCF4\uD638\uB41C HWP\uB294 \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4");
|
|
1945
2052
|
const compressed = (header.flags & FLAG_COMPRESSED) !== 0;
|
|
1946
2053
|
const distribution = (header.flags & FLAG_DISTRIBUTION) !== 0;
|
|
1947
2054
|
const metadata = {
|
|
@@ -1950,9 +2057,9 @@ function parseHwp5Document(buffer, options) {
|
|
|
1950
2057
|
if (cfb) extractHwp5Metadata(cfb, metadata);
|
|
1951
2058
|
const docInfo = cfb ? parseDocInfoStream(cfb, compressed) : parseDocInfoFromStream(findStream("/DocInfo"), compressed);
|
|
1952
2059
|
const sections = distribution ? cfb ? findViewTextSections(cfb, compressed) : findViewTextSectionsLenient(lenientCfb, compressed) : cfb ? findSections(cfb) : findSectionsLenient(lenientCfb, compressed);
|
|
1953
|
-
if (sections.length === 0) throw new (0,
|
|
2060
|
+
if (sections.length === 0) throw new (0, _chunkVLSATRNQcjs.KordocError)("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
1954
2061
|
metadata.pageCount = sections.length;
|
|
1955
|
-
const pageFilter = _optionalChain([options, 'optionalAccess',
|
|
2062
|
+
const pageFilter = _optionalChain([options, 'optionalAccess', _22 => _22.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, sections.length) : null;
|
|
1956
2063
|
const totalTarget = pageFilter ? pageFilter.size : sections.length;
|
|
1957
2064
|
const blocks = [];
|
|
1958
2065
|
const nestedTableCounter = { count: 0 };
|
|
@@ -1964,30 +2071,30 @@ function parseHwp5Document(buffer, options) {
|
|
|
1964
2071
|
const sectionData = sections[si];
|
|
1965
2072
|
const data = !distribution && compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
|
|
1966
2073
|
totalDecompressed += data.length;
|
|
1967
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2074
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkVLSATRNQcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
1968
2075
|
const records = readRecords(data);
|
|
1969
2076
|
const sectionBlocks = parseSection(records, docInfo, warnings, si + 1, nestedTableCounter);
|
|
1970
2077
|
blocks.push(...sectionBlocks);
|
|
1971
2078
|
parsedSections++;
|
|
1972
|
-
_optionalChain([options, 'optionalAccess',
|
|
2079
|
+
_optionalChain([options, 'optionalAccess', _23 => _23.onProgress, 'optionalCall', _24 => _24(parsedSections, totalTarget)]);
|
|
1973
2080
|
} catch (secErr) {
|
|
1974
|
-
if (secErr instanceof
|
|
2081
|
+
if (secErr instanceof _chunkVLSATRNQcjs.KordocError) throw secErr;
|
|
1975
2082
|
warnings.push({ page: si + 1, message: `\uC139\uC158 ${si + 1} \uD30C\uC2F1 \uC2E4\uD328: ${secErr instanceof Error ? secErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
1976
2083
|
}
|
|
1977
2084
|
}
|
|
1978
2085
|
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
1979
|
-
const flatBlocks =
|
|
2086
|
+
const flatBlocks = _chunkVLSATRNQcjs.flattenLayoutTables.call(void 0, blocks);
|
|
1980
2087
|
if (docInfo) {
|
|
1981
2088
|
detectHwp5Headings(flatBlocks, docInfo);
|
|
1982
2089
|
}
|
|
1983
2090
|
const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
1984
|
-
const markdown =
|
|
2091
|
+
const markdown = _chunkVLSATRNQcjs.blocksToMarkdown.call(void 0, flatBlocks);
|
|
1985
2092
|
return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
1986
2093
|
}
|
|
1987
2094
|
function parseDocInfoStream(cfb, compressed) {
|
|
1988
2095
|
try {
|
|
1989
2096
|
const entry = CFB.find(cfb, "/DocInfo");
|
|
1990
|
-
if (!_optionalChain([entry, 'optionalAccess',
|
|
2097
|
+
if (!_optionalChain([entry, 'optionalAccess', _25 => _25.content])) return null;
|
|
1991
2098
|
const data = compressed ? decompressStream(Buffer.from(entry.content)) : Buffer.from(entry.content);
|
|
1992
2099
|
const records = readRecords(data);
|
|
1993
2100
|
return parseDocInfo(records);
|
|
@@ -2010,7 +2117,7 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2010
2117
|
const name = (style.nameKo || style.name).toLowerCase();
|
|
2011
2118
|
if (name.includes("\uBC14\uD0D5") || name.includes("\uBCF8\uBB38") || name === "normal" || name === "body") {
|
|
2012
2119
|
const cs = docInfo.charShapes[style.charShapeId];
|
|
2013
|
-
if (_optionalChain([cs, 'optionalAccess',
|
|
2120
|
+
if (_optionalChain([cs, 'optionalAccess', _26 => _26.fontSize]) > 0) {
|
|
2014
2121
|
baseFontSize = cs.fontSize / 10;
|
|
2015
2122
|
break;
|
|
2016
2123
|
}
|
|
@@ -2019,7 +2126,7 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2019
2126
|
if (baseFontSize === 0) {
|
|
2020
2127
|
const sizeFreq = /* @__PURE__ */ new Map();
|
|
2021
2128
|
for (const b of blocks) {
|
|
2022
|
-
if (_optionalChain([b, 'access',
|
|
2129
|
+
if (_optionalChain([b, 'access', _27 => _27.style, 'optionalAccess', _28 => _28.fontSize])) {
|
|
2023
2130
|
sizeFreq.set(b.style.fontSize, (sizeFreq.get(b.style.fontSize) || 0) + 1);
|
|
2024
2131
|
}
|
|
2025
2132
|
}
|
|
@@ -2039,11 +2146,11 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2039
2146
|
if (text.length === 0 || text.length > 200) continue;
|
|
2040
2147
|
if (/^\d+$/.test(text)) continue;
|
|
2041
2148
|
let level = 0;
|
|
2042
|
-
if (_optionalChain([block, 'access',
|
|
2149
|
+
if (_optionalChain([block, 'access', _29 => _29.style, 'optionalAccess', _30 => _30.fontSize]) && baseFontSize > 0) {
|
|
2043
2150
|
const ratio = block.style.fontSize / baseFontSize;
|
|
2044
|
-
if (ratio >=
|
|
2045
|
-
else if (ratio >=
|
|
2046
|
-
else if (ratio >=
|
|
2151
|
+
if (ratio >= _chunkVLSATRNQcjs.HEADING_RATIO_H1) level = 1;
|
|
2152
|
+
else if (ratio >= _chunkVLSATRNQcjs.HEADING_RATIO_H2) level = 2;
|
|
2153
|
+
else if (ratio >= _chunkVLSATRNQcjs.HEADING_RATIO_H3) level = 3;
|
|
2047
2154
|
}
|
|
2048
2155
|
if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
|
|
2049
2156
|
if (level === 0) level = 2;
|
|
@@ -2059,7 +2166,7 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2059
2166
|
function extractHwp5Metadata(cfb, metadata) {
|
|
2060
2167
|
try {
|
|
2061
2168
|
const summaryEntry = CFB.find(cfb, "/HwpSummaryInformation") || CFB.find(cfb, "/SummaryInformation");
|
|
2062
|
-
if (!_optionalChain([summaryEntry, 'optionalAccess',
|
|
2169
|
+
if (!_optionalChain([summaryEntry, 'optionalAccess', _31 => _31.content])) return;
|
|
2063
2170
|
const data = Buffer.from(summaryEntry.content);
|
|
2064
2171
|
if (data.length < 48) return;
|
|
2065
2172
|
const numSets = data.readUInt32LE(24);
|
|
@@ -2092,7 +2199,7 @@ function findViewTextSections(cfb, compressed) {
|
|
|
2092
2199
|
const sections = [];
|
|
2093
2200
|
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2094
2201
|
const entry = CFB.find(cfb, `/ViewText/Section${i}`);
|
|
2095
|
-
if (!_optionalChain([entry, 'optionalAccess',
|
|
2202
|
+
if (!_optionalChain([entry, 'optionalAccess', _32 => _32.content])) break;
|
|
2096
2203
|
try {
|
|
2097
2204
|
const decrypted = decryptViewText(Buffer.from(entry.content), compressed);
|
|
2098
2205
|
sections.push({ idx: i, content: decrypted });
|
|
@@ -2106,13 +2213,13 @@ function findSections(cfb) {
|
|
|
2106
2213
|
const sections = [];
|
|
2107
2214
|
for (let i = 0; i < MAX_SECTIONS; i++) {
|
|
2108
2215
|
const entry = CFB.find(cfb, `/BodyText/Section${i}`);
|
|
2109
|
-
if (!_optionalChain([entry, 'optionalAccess',
|
|
2216
|
+
if (!_optionalChain([entry, 'optionalAccess', _33 => _33.content])) break;
|
|
2110
2217
|
sections.push({ idx: i, content: Buffer.from(entry.content) });
|
|
2111
2218
|
}
|
|
2112
2219
|
if (sections.length === 0 && cfb.FileIndex) {
|
|
2113
2220
|
for (const entry of cfb.FileIndex) {
|
|
2114
2221
|
if (sections.length >= MAX_SECTIONS) break;
|
|
2115
|
-
if (_optionalChain([entry, 'access',
|
|
2222
|
+
if (_optionalChain([entry, 'access', _34 => _34.name, 'optionalAccess', _35 => _35.startsWith, 'call', _36 => _36("Section")]) && entry.content) {
|
|
2116
2223
|
const idx = parseInt(entry.name.replace("Section", ""), 10) || 0;
|
|
2117
2224
|
sections.push({ idx, content: Buffer.from(entry.content) });
|
|
2118
2225
|
}
|
|
@@ -2128,7 +2235,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2128
2235
|
if (!raw) break;
|
|
2129
2236
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2130
2237
|
totalDecompressed += content.length;
|
|
2131
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2238
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkVLSATRNQcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2132
2239
|
sections.push({ idx: i, content });
|
|
2133
2240
|
}
|
|
2134
2241
|
if (sections.length === 0) {
|
|
@@ -2140,7 +2247,7 @@ function findSectionsLenient(lcfb, compressed) {
|
|
|
2140
2247
|
if (raw) {
|
|
2141
2248
|
const content = compressed ? decompressStream(raw) : raw;
|
|
2142
2249
|
totalDecompressed += content.length;
|
|
2143
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2250
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkVLSATRNQcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2144
2251
|
sections.push({ idx, content });
|
|
2145
2252
|
}
|
|
2146
2253
|
}
|
|
@@ -2157,7 +2264,7 @@ function findViewTextSectionsLenient(lcfb, compressed) {
|
|
|
2157
2264
|
try {
|
|
2158
2265
|
const content = decryptViewText(raw, compressed);
|
|
2159
2266
|
totalDecompressed += content.length;
|
|
2160
|
-
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0,
|
|
2267
|
+
if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new (0, _chunkVLSATRNQcjs.KordocError)("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
|
|
2161
2268
|
sections.push({ idx: i, content });
|
|
2162
2269
|
} catch (e16) {
|
|
2163
2270
|
break;
|
|
@@ -2195,7 +2302,7 @@ function extractHwp5Images(cfb, blocks, compressed, warnings) {
|
|
|
2195
2302
|
const binDataRe = /\/BinData\/[Bb][Ii][Nn](\d{4})$/;
|
|
2196
2303
|
if (cfb.FileIndex) {
|
|
2197
2304
|
for (const entry of cfb.FileIndex) {
|
|
2198
|
-
if (!_optionalChain([entry, 'optionalAccess',
|
|
2305
|
+
if (!_optionalChain([entry, 'optionalAccess', _37 => _37.name]) || !entry.content) continue;
|
|
2199
2306
|
const match = entry.name.match(binDataRe);
|
|
2200
2307
|
if (!match) continue;
|
|
2201
2308
|
const idx = parseInt(match[1], 10);
|
|
@@ -2345,7 +2452,7 @@ function parseSection(records, docInfo, warnings, sectionNum, counter) {
|
|
|
2345
2452
|
if (url && blocks.length > 0) {
|
|
2346
2453
|
const lastBlock = blocks[blocks.length - 1];
|
|
2347
2454
|
if (lastBlock.type === "paragraph" && !lastBlock.href) {
|
|
2348
|
-
lastBlock.href = _nullishCoalesce(
|
|
2455
|
+
lastBlock.href = _nullishCoalesce(_chunkVLSATRNQcjs.sanitizeHref.call(void 0, url), () => ( void 0));
|
|
2349
2456
|
}
|
|
2350
2457
|
}
|
|
2351
2458
|
}
|
|
@@ -2463,8 +2570,8 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
2463
2570
|
if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
|
|
2464
2571
|
if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
|
|
2465
2572
|
if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
|
|
2466
|
-
rows = Math.min(rec.data.readUInt16LE(4),
|
|
2467
|
-
cols = Math.min(rec.data.readUInt16LE(6),
|
|
2573
|
+
rows = Math.min(rec.data.readUInt16LE(4), _chunkVLSATRNQcjs.MAX_ROWS);
|
|
2574
|
+
cols = Math.min(rec.data.readUInt16LE(6), _chunkVLSATRNQcjs.MAX_COLS);
|
|
2468
2575
|
}
|
|
2469
2576
|
if (rec.tagId === TAG_LIST_HEADER) {
|
|
2470
2577
|
const { cell, nextIdx } = parseCellBlock(records, i, tableLevel, counter);
|
|
@@ -2486,7 +2593,7 @@ function parseTableBlock(records, startIdx, counter) {
|
|
|
2486
2593
|
return { table: { rows, cols, cells: irCells, hasHeader: rows > 1 }, nextIdx: i };
|
|
2487
2594
|
}
|
|
2488
2595
|
const cellRows = arrangeCells(rows, cols, cells);
|
|
2489
|
-
return { table:
|
|
2596
|
+
return { table: _chunkVLSATRNQcjs.buildTable.call(void 0, cellRows), nextIdx: i };
|
|
2490
2597
|
}
|
|
2491
2598
|
function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
2492
2599
|
const rec = records[startIdx];
|
|
@@ -2501,8 +2608,8 @@ function parseCellBlock(records, startIdx, tableLevel, counter) {
|
|
|
2501
2608
|
rowAddr = rec.data.readUInt16LE(10);
|
|
2502
2609
|
const cs = rec.data.readUInt16LE(12);
|
|
2503
2610
|
const rs = rec.data.readUInt16LE(14);
|
|
2504
|
-
if (cs > 0) colSpan = Math.min(cs,
|
|
2505
|
-
if (rs > 0) rowSpan = Math.min(rs,
|
|
2611
|
+
if (cs > 0) colSpan = Math.min(cs, _chunkVLSATRNQcjs.MAX_COLS);
|
|
2612
|
+
if (rs > 0) rowSpan = Math.min(rs, _chunkVLSATRNQcjs.MAX_ROWS);
|
|
2506
2613
|
}
|
|
2507
2614
|
let i = startIdx + 1;
|
|
2508
2615
|
while (i < records.length) {
|
|
@@ -2601,10 +2708,10 @@ function getElements(parent, tagName) {
|
|
|
2601
2708
|
return result;
|
|
2602
2709
|
}
|
|
2603
2710
|
function getTextContent(el) {
|
|
2604
|
-
return _nullishCoalesce(_optionalChain([el, 'access',
|
|
2711
|
+
return _nullishCoalesce(_optionalChain([el, 'access', _38 => _38.textContent, 'optionalAccess', _39 => _39.trim, 'call', _40 => _40()]), () => ( ""));
|
|
2605
2712
|
}
|
|
2606
2713
|
function parseXml(text) {
|
|
2607
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
2714
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkVLSATRNQcjs.stripDtd.call(void 0, text), "text/xml");
|
|
2608
2715
|
}
|
|
2609
2716
|
function parseSharedStrings(xml) {
|
|
2610
2717
|
const doc = parseXml(xml);
|
|
@@ -2741,14 +2848,14 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2741
2848
|
const merge = mergeMap.get(key);
|
|
2742
2849
|
row.push({
|
|
2743
2850
|
text,
|
|
2744
|
-
colSpan: _nullishCoalesce(_optionalChain([merge, 'optionalAccess',
|
|
2745
|
-
rowSpan: _nullishCoalesce(_optionalChain([merge, 'optionalAccess',
|
|
2851
|
+
colSpan: _nullishCoalesce(_optionalChain([merge, 'optionalAccess', _41 => _41.colSpan]), () => ( 1)),
|
|
2852
|
+
rowSpan: _nullishCoalesce(_optionalChain([merge, 'optionalAccess', _42 => _42.rowSpan]), () => ( 1))
|
|
2746
2853
|
});
|
|
2747
2854
|
}
|
|
2748
2855
|
cellRows.push(row);
|
|
2749
2856
|
}
|
|
2750
2857
|
if (cellRows.length > 0) {
|
|
2751
|
-
const table =
|
|
2858
|
+
const table = _chunkVLSATRNQcjs.buildTable.call(void 0, cellRows);
|
|
2752
2859
|
if (table.rows > 0) {
|
|
2753
2860
|
blocks.push({ type: "table", table, pageNumber: sheetIndex + 1 });
|
|
2754
2861
|
}
|
|
@@ -2756,12 +2863,12 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
2756
2863
|
return blocks;
|
|
2757
2864
|
}
|
|
2758
2865
|
async function parseXlsxDocument(buffer, options) {
|
|
2759
|
-
|
|
2866
|
+
_chunkVLSATRNQcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE3);
|
|
2760
2867
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
2761
2868
|
const warnings = [];
|
|
2762
2869
|
const workbookFile = zip.file("xl/workbook.xml");
|
|
2763
2870
|
if (!workbookFile) {
|
|
2764
|
-
throw new (0,
|
|
2871
|
+
throw new (0, _chunkVLSATRNQcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 XLSX \uD30C\uC77C: xl/workbook.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2765
2872
|
}
|
|
2766
2873
|
let sharedStrings = [];
|
|
2767
2874
|
const ssFile = zip.file("xl/sharedStrings.xml");
|
|
@@ -2770,7 +2877,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2770
2877
|
}
|
|
2771
2878
|
const sheets = parseWorkbook(await workbookFile.async("text"));
|
|
2772
2879
|
if (sheets.length === 0) {
|
|
2773
|
-
throw new (0,
|
|
2880
|
+
throw new (0, _chunkVLSATRNQcjs.KordocError)("XLSX \uD30C\uC77C\uC5D0 \uC2DC\uD2B8\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
2774
2881
|
}
|
|
2775
2882
|
let relsMap = /* @__PURE__ */ new Map();
|
|
2776
2883
|
const relsFile = zip.file("xl/_rels/workbook.xml.rels");
|
|
@@ -2778,7 +2885,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2778
2885
|
relsMap = parseRels(await relsFile.async("text"));
|
|
2779
2886
|
}
|
|
2780
2887
|
let pageFilter = null;
|
|
2781
|
-
if (_optionalChain([options, 'optionalAccess',
|
|
2888
|
+
if (_optionalChain([options, 'optionalAccess', _43 => _43.pages])) {
|
|
2782
2889
|
const { parsePageRange: parsePageRange2 } = await Promise.resolve().then(() => _interopRequireWildcard(require("./page-range-3C7UGGEK.cjs")));
|
|
2783
2890
|
pageFilter = parsePageRange2(options.pages, sheets.length);
|
|
2784
2891
|
}
|
|
@@ -2787,7 +2894,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2787
2894
|
for (let i = 0; i < processedSheets; i++) {
|
|
2788
2895
|
if (pageFilter && !pageFilter.has(i + 1)) continue;
|
|
2789
2896
|
const sheet = sheets[i];
|
|
2790
|
-
_optionalChain([options, 'optionalAccess',
|
|
2897
|
+
_optionalChain([options, 'optionalAccess', _44 => _44.onProgress, 'optionalCall', _45 => _45(i + 1, processedSheets)]);
|
|
2791
2898
|
let sheetPath = relsMap.get(sheet.rId);
|
|
2792
2899
|
if (sheetPath) {
|
|
2793
2900
|
if (!sheetPath.startsWith("xl/") && !sheetPath.startsWith("/")) {
|
|
@@ -2842,7 +2949,7 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
2842
2949
|
} catch (e20) {
|
|
2843
2950
|
}
|
|
2844
2951
|
}
|
|
2845
|
-
const markdown =
|
|
2952
|
+
const markdown = _chunkVLSATRNQcjs.blocksToMarkdown.call(void 0, blocks);
|
|
2846
2953
|
return { markdown, blocks, metadata, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
2847
2954
|
}
|
|
2848
2955
|
|
|
@@ -2857,7 +2964,7 @@ function getChildElements(parent, localName3) {
|
|
|
2857
2964
|
const node = children[i];
|
|
2858
2965
|
if (node.nodeType === 1) {
|
|
2859
2966
|
const el = node;
|
|
2860
|
-
if (el.localName === localName3 || _optionalChain([el, 'access',
|
|
2967
|
+
if (el.localName === localName3 || _optionalChain([el, 'access', _46 => _46.tagName, 'optionalAccess', _47 => _47.endsWith, 'call', _48 => _48(`:${localName3}`)])) {
|
|
2861
2968
|
result.push(el);
|
|
2862
2969
|
}
|
|
2863
2970
|
}
|
|
@@ -2872,7 +2979,7 @@ function findElements(parent, localName3) {
|
|
|
2872
2979
|
const child = children[i];
|
|
2873
2980
|
if (child.nodeType === 1) {
|
|
2874
2981
|
const el = child;
|
|
2875
|
-
if (el.localName === localName3 || _optionalChain([el, 'access',
|
|
2982
|
+
if (el.localName === localName3 || _optionalChain([el, 'access', _49 => _49.tagName, 'optionalAccess', _50 => _50.endsWith, 'call', _51 => _51(`:${localName3}`)])) {
|
|
2876
2983
|
result.push(el);
|
|
2877
2984
|
}
|
|
2878
2985
|
walk(el);
|
|
@@ -2891,7 +2998,7 @@ function getAttr(el, localName3) {
|
|
|
2891
2998
|
return null;
|
|
2892
2999
|
}
|
|
2893
3000
|
function parseXml2(text) {
|
|
2894
|
-
return new (0, _xmldom.DOMParser)().parseFromString(
|
|
3001
|
+
return new (0, _xmldom.DOMParser)().parseFromString(_chunkVLSATRNQcjs.stripDtd.call(void 0, text), "text/xml");
|
|
2895
3002
|
}
|
|
2896
3003
|
function parseStyles(xml) {
|
|
2897
3004
|
const doc = parseXml2(xml);
|
|
@@ -3056,7 +3163,7 @@ function parseParagraph(p, styles, numbering, footnotes, rels) {
|
|
|
3056
3163
|
const text = parts.join("").trim();
|
|
3057
3164
|
if (!text) return null;
|
|
3058
3165
|
const style = styles.get(styleId);
|
|
3059
|
-
if (_optionalChain([style, 'optionalAccess',
|
|
3166
|
+
if (_optionalChain([style, 'optionalAccess', _52 => _52.outlineLevel]) !== void 0 && style.outlineLevel >= 0 && style.outlineLevel <= 5) {
|
|
3060
3167
|
return {
|
|
3061
3168
|
type: "heading",
|
|
3062
3169
|
text,
|
|
@@ -3065,8 +3172,8 @@ function parseParagraph(p, styles, numbering, footnotes, rels) {
|
|
|
3065
3172
|
}
|
|
3066
3173
|
if (numId && numId !== "0") {
|
|
3067
3174
|
const numDef = numbering.get(numId);
|
|
3068
|
-
const levelInfo = _optionalChain([numDef, 'optionalAccess',
|
|
3069
|
-
const listType = _optionalChain([levelInfo, 'optionalAccess',
|
|
3175
|
+
const levelInfo = _optionalChain([numDef, 'optionalAccess', _53 => _53.get, 'call', _54 => _54(ilvl)]);
|
|
3176
|
+
const listType = _optionalChain([levelInfo, 'optionalAccess', _55 => _55.numFmt]) === "bullet" ? "unordered" : "ordered";
|
|
3070
3177
|
return { type: "list", text, listType };
|
|
3071
3178
|
}
|
|
3072
3179
|
const block = { type: "paragraph", text };
|
|
@@ -3107,7 +3214,7 @@ function parseTable(tbl, styles, numbering, footnotes, rels) {
|
|
|
3107
3214
|
const pElements = getChildElements(tc, "p");
|
|
3108
3215
|
for (const p of pElements) {
|
|
3109
3216
|
const block = parseParagraph(p, styles, numbering, footnotes, rels);
|
|
3110
|
-
if (_optionalChain([block, 'optionalAccess',
|
|
3217
|
+
if (_optionalChain([block, 'optionalAccess', _56 => _56.text])) cellTexts.push(block.text);
|
|
3111
3218
|
}
|
|
3112
3219
|
row.push({ text: cellTexts.join("\n"), colSpan, rowSpan });
|
|
3113
3220
|
}
|
|
@@ -3120,7 +3227,7 @@ function parseTable(tbl, styles, numbering, footnotes, rels) {
|
|
|
3120
3227
|
if (!cell || cell.rowSpan === 0) continue;
|
|
3121
3228
|
let span = 1;
|
|
3122
3229
|
for (let nr = r + 1; nr < rows.length; nr++) {
|
|
3123
|
-
if (_optionalChain([rows, 'access',
|
|
3230
|
+
if (_optionalChain([rows, 'access', _57 => _57[nr], 'access', _58 => _58[c], 'optionalAccess', _59 => _59.rowSpan]) === 0) span++;
|
|
3124
3231
|
else break;
|
|
3125
3232
|
}
|
|
3126
3233
|
cell.rowSpan = span;
|
|
@@ -3164,7 +3271,7 @@ async function extractImages(zip, rels, doc) {
|
|
|
3164
3271
|
try {
|
|
3165
3272
|
const data = await imgFile.async("uint8array");
|
|
3166
3273
|
imgIdx++;
|
|
3167
|
-
const ext = _nullishCoalesce(_optionalChain([imgPath, 'access',
|
|
3274
|
+
const ext = _nullishCoalesce(_optionalChain([imgPath, 'access', _60 => _60.split, 'call', _61 => _61("."), 'access', _62 => _62.pop, 'call', _63 => _63(), 'optionalAccess', _64 => _64.toLowerCase, 'call', _65 => _65()]), () => ( "png"));
|
|
3168
3275
|
const mimeMap = {
|
|
3169
3276
|
png: "image/png",
|
|
3170
3277
|
jpg: "image/jpeg",
|
|
@@ -3184,12 +3291,12 @@ async function extractImages(zip, rels, doc) {
|
|
|
3184
3291
|
return { blocks, images };
|
|
3185
3292
|
}
|
|
3186
3293
|
async function parseDocxDocument(buffer, options) {
|
|
3187
|
-
|
|
3294
|
+
_chunkVLSATRNQcjs.precheckZipSize.call(void 0, buffer, MAX_DECOMPRESS_SIZE4);
|
|
3188
3295
|
const zip = await _jszip2.default.loadAsync(buffer);
|
|
3189
3296
|
const warnings = [];
|
|
3190
3297
|
const docFile = zip.file("word/document.xml");
|
|
3191
3298
|
if (!docFile) {
|
|
3192
|
-
throw new (0,
|
|
3299
|
+
throw new (0, _chunkVLSATRNQcjs.KordocError)("\uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 DOCX \uD30C\uC77C: word/document.xml\uC774 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3193
3300
|
}
|
|
3194
3301
|
let rels = /* @__PURE__ */ new Map();
|
|
3195
3302
|
const relsFile = zip.file("word/_rels/document.xml.rels");
|
|
@@ -3224,7 +3331,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3224
3331
|
const doc = parseXml2(docXml);
|
|
3225
3332
|
const body = findElements(doc, "body");
|
|
3226
3333
|
if (body.length === 0) {
|
|
3227
|
-
throw new (0,
|
|
3334
|
+
throw new (0, _chunkVLSATRNQcjs.KordocError)("DOCX \uBCF8\uBB38(w:body)\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3228
3335
|
}
|
|
3229
3336
|
const blocks = [];
|
|
3230
3337
|
const bodyEl = body[0];
|
|
@@ -3233,7 +3340,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3233
3340
|
const node = children[i];
|
|
3234
3341
|
if (node.nodeType !== 1) continue;
|
|
3235
3342
|
const el = node;
|
|
3236
|
-
const localName3 = _nullishCoalesce(el.localName, () => ( _optionalChain([el, 'access',
|
|
3343
|
+
const localName3 = _nullishCoalesce(el.localName, () => ( _optionalChain([el, 'access', _66 => _66.tagName, 'optionalAccess', _67 => _67.split, 'call', _68 => _68(":"), 'access', _69 => _69.pop, 'call', _70 => _70()])));
|
|
3237
3344
|
if (localName3 === "p") {
|
|
3238
3345
|
const block = parseParagraph(el, styles, numbering, footnotes, rels);
|
|
3239
3346
|
if (block) blocks.push(block);
|
|
@@ -3264,7 +3371,7 @@ async function parseDocxDocument(buffer, options) {
|
|
|
3264
3371
|
}
|
|
3265
3372
|
}
|
|
3266
3373
|
const outline = blocks.filter((b) => b.type === "heading").map((b) => ({ level: _nullishCoalesce(b.level, () => ( 2)), text: _nullishCoalesce(b.text, () => ( "")) }));
|
|
3267
|
-
const markdown =
|
|
3374
|
+
const markdown = _chunkVLSATRNQcjs.blocksToMarkdown.call(void 0, blocks);
|
|
3268
3375
|
return {
|
|
3269
3376
|
markdown,
|
|
3270
3377
|
blocks,
|
|
@@ -3287,7 +3394,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3287
3394
|
}
|
|
3288
3395
|
const text = new TextDecoder("utf-8").decode(buffer).replace(/^\uFEFF/, "");
|
|
3289
3396
|
const normalized = text.replace(/ /g, " ");
|
|
3290
|
-
const xml =
|
|
3397
|
+
const xml = _chunkVLSATRNQcjs.stripDtd.call(void 0, normalized);
|
|
3291
3398
|
const warnings = [];
|
|
3292
3399
|
const parser = new (0, _xmldom.DOMParser)({
|
|
3293
3400
|
onError: (_level, msg) => {
|
|
@@ -3315,7 +3422,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3315
3422
|
return { markdown: "", blocks: [], metadata, warnings };
|
|
3316
3423
|
}
|
|
3317
3424
|
const blocks = [];
|
|
3318
|
-
const pageFilter = _optionalChain([options, 'optionalAccess',
|
|
3425
|
+
const pageFilter = _optionalChain([options, 'optionalAccess', _71 => _71.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, countSections(body)) : null;
|
|
3319
3426
|
let sectionIdx = 0;
|
|
3320
3427
|
const children = body.childNodes;
|
|
3321
3428
|
for (let i = 0; i < children.length; i++) {
|
|
@@ -3327,7 +3434,7 @@ function parseHwpmlDocument(buffer, options) {
|
|
|
3327
3434
|
parseSection2(el, blocks, paraShapeMap, sectionIdx, warnings);
|
|
3328
3435
|
}
|
|
3329
3436
|
const outline = blocks.filter((b) => b.type === "heading" && b.text).map((b) => ({ level: _nullishCoalesce(b.level, () => ( 1)), text: b.text, pageNumber: b.pageNumber }));
|
|
3330
|
-
const markdown =
|
|
3437
|
+
const markdown = _chunkVLSATRNQcjs.blocksToMarkdown.call(void 0, blocks);
|
|
3331
3438
|
return {
|
|
3332
3439
|
markdown,
|
|
3333
3440
|
blocks,
|
|
@@ -3397,7 +3504,7 @@ function parseParagraph2(el, blocks, paraShapeMap, sectionNum) {
|
|
|
3397
3504
|
const shapeInfo = paraShapeMap.get(paraShapeId);
|
|
3398
3505
|
const text = extractParagraphText(el);
|
|
3399
3506
|
if (!text) return;
|
|
3400
|
-
if (_optionalChain([shapeInfo, 'optionalAccess',
|
|
3507
|
+
if (_optionalChain([shapeInfo, 'optionalAccess', _72 => _72.headingLevel]) != null) {
|
|
3401
3508
|
blocks.push({ type: "heading", text, level: shapeInfo.headingLevel, pageNumber: sectionNum });
|
|
3402
3509
|
} else {
|
|
3403
3510
|
blocks.push({ type: "paragraph", text, pageNumber: sectionNum });
|
|
@@ -3469,7 +3576,7 @@ function parseTable2(el, blocks, paraShapeMap, sectionNum, warnings) {
|
|
|
3469
3576
|
const cellRows = grid.map(
|
|
3470
3577
|
(row) => row.map((cell) => _nullishCoalesce(cell, () => ( { text: "", colSpan: 1, rowSpan: 1 })))
|
|
3471
3578
|
);
|
|
3472
|
-
const table =
|
|
3579
|
+
const table = _chunkVLSATRNQcjs.buildTable.call(void 0, cellRows);
|
|
3473
3580
|
blocks.push({ type: "table", table, pageNumber: sectionNum });
|
|
3474
3581
|
}
|
|
3475
3582
|
function extractCellText(cellEl) {
|
|
@@ -3761,7 +3868,7 @@ function fillFormFields(blocks, values) {
|
|
|
3761
3868
|
if (block.type !== "table" || !block.table) continue;
|
|
3762
3869
|
for (let r = 0; r < block.table.rows; r++) {
|
|
3763
3870
|
for (let c = 0; c < block.table.cols; c++) {
|
|
3764
|
-
const cell = _optionalChain([block, 'access',
|
|
3871
|
+
const cell = _optionalChain([block, 'access', _73 => _73.table, 'access', _74 => _74.cells, 'access', _75 => _75[r], 'optionalAccess', _76 => _76[c]]);
|
|
3765
3872
|
if (!cell) continue;
|
|
3766
3873
|
const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
|
|
3767
3874
|
if (result) {
|
|
@@ -3800,7 +3907,7 @@ function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
|
|
|
3800
3907
|
const matchKey = findMatchingKey(normalizedCellLabel, values);
|
|
3801
3908
|
if (matchKey === void 0) continue;
|
|
3802
3909
|
const newValue = values.get(matchKey);
|
|
3803
|
-
if (_optionalChain([patternFilledCells, 'optionalAccess',
|
|
3910
|
+
if (_optionalChain([patternFilledCells, 'optionalAccess', _77 => _77.has, 'call', _78 => _78(`${r},${c + 1}`)])) {
|
|
3804
3911
|
valueCell.text = newValue + " " + valueCell.text;
|
|
3805
3912
|
} else {
|
|
3806
3913
|
valueCell.text = newValue;
|
|
@@ -3870,7 +3977,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
3870
3977
|
const normalizedValues = normalizeValues(values);
|
|
3871
3978
|
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
3872
3979
|
if (sectionFiles.length === 0) {
|
|
3873
|
-
throw new (0,
|
|
3980
|
+
throw new (0, _chunkVLSATRNQcjs.KordocError)("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
3874
3981
|
}
|
|
3875
3982
|
const xmlParser = new (0, _xmldom.DOMParser)();
|
|
3876
3983
|
const xmlSerializer = new (0, _xmldom.XMLSerializer)();
|
|
@@ -3878,7 +3985,7 @@ async function fillHwpx(hwpxBuffer, values) {
|
|
|
3878
3985
|
const zipEntry = zip.file(sectionPath);
|
|
3879
3986
|
if (!zipEntry) continue;
|
|
3880
3987
|
const rawXml = await zipEntry.async("text");
|
|
3881
|
-
const doc = xmlParser.parseFromString(
|
|
3988
|
+
const doc = xmlParser.parseFromString(_chunkVLSATRNQcjs.stripDtd.call(void 0, rawXml), "text/xml");
|
|
3882
3989
|
if (!doc.documentElement) continue;
|
|
3883
3990
|
let modified = false;
|
|
3884
3991
|
const tables = findAllElements(doc.documentElement, "tbl");
|
|
@@ -4736,16 +4843,17 @@ function diffTableCells(a, b) {
|
|
|
4736
4843
|
// src/index.ts
|
|
4737
4844
|
async function parse(input, options) {
|
|
4738
4845
|
let buffer;
|
|
4846
|
+
const opts = typeof input === "string" && !_optionalChain([options, 'optionalAccess', _79 => _79.filePath]) ? { ...options, filePath: input } : options;
|
|
4739
4847
|
if (typeof input === "string") {
|
|
4740
4848
|
try {
|
|
4741
4849
|
const buf = await _promises.readFile.call(void 0, input);
|
|
4742
|
-
buffer =
|
|
4850
|
+
buffer = _chunkVLSATRNQcjs.toArrayBuffer.call(void 0, buf);
|
|
4743
4851
|
} catch (err) {
|
|
4744
4852
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
4745
4853
|
return { success: false, fileType: "unknown", error: msg, code: "PARSE_ERROR" };
|
|
4746
4854
|
}
|
|
4747
4855
|
} else if (Buffer.isBuffer(input)) {
|
|
4748
|
-
buffer =
|
|
4856
|
+
buffer = _chunkVLSATRNQcjs.toArrayBuffer.call(void 0, input);
|
|
4749
4857
|
} else {
|
|
4750
4858
|
buffer = input;
|
|
4751
4859
|
}
|
|
@@ -4756,16 +4864,16 @@ async function parse(input, options) {
|
|
|
4756
4864
|
switch (format) {
|
|
4757
4865
|
case "hwpx": {
|
|
4758
4866
|
const zipFormat = await detectZipFormat(buffer);
|
|
4759
|
-
if (zipFormat === "xlsx") return parseXlsx(buffer,
|
|
4760
|
-
if (zipFormat === "docx") return parseDocx(buffer,
|
|
4761
|
-
return parseHwpx(buffer,
|
|
4867
|
+
if (zipFormat === "xlsx") return parseXlsx(buffer, opts);
|
|
4868
|
+
if (zipFormat === "docx") return parseDocx(buffer, opts);
|
|
4869
|
+
return parseHwpx(buffer, opts);
|
|
4762
4870
|
}
|
|
4763
4871
|
case "hwp":
|
|
4764
|
-
return parseHwp(buffer,
|
|
4872
|
+
return parseHwp(buffer, opts);
|
|
4765
4873
|
case "hwpml":
|
|
4766
|
-
return parseHwpml(buffer,
|
|
4874
|
+
return parseHwpml(buffer, opts);
|
|
4767
4875
|
case "pdf":
|
|
4768
|
-
return parsePdf(buffer,
|
|
4876
|
+
return parsePdf(buffer, opts);
|
|
4769
4877
|
default:
|
|
4770
4878
|
return { success: false, fileType: "unknown", error: "\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD30C\uC77C \uD615\uC2DD\uC785\uB2C8\uB2E4.", code: "UNSUPPORTED_FORMAT" };
|
|
4771
4879
|
}
|
|
@@ -4773,23 +4881,23 @@ async function parse(input, options) {
|
|
|
4773
4881
|
async function parseHwpx(buffer, options) {
|
|
4774
4882
|
try {
|
|
4775
4883
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseHwpxDocument(buffer, options);
|
|
4776
|
-
return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess',
|
|
4884
|
+
return { success: true, fileType: "hwpx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _80 => _80.length]) ? images : void 0 };
|
|
4777
4885
|
} catch (err) {
|
|
4778
|
-
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4886
|
+
return { success: false, fileType: "hwpx", error: err instanceof Error ? err.message : "HWPX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkVLSATRNQcjs.classifyError.call(void 0, err) };
|
|
4779
4887
|
}
|
|
4780
4888
|
}
|
|
4781
4889
|
async function parseHwp(buffer, options) {
|
|
4782
4890
|
try {
|
|
4783
4891
|
const { markdown, blocks, metadata, outline, warnings, images } = parseHwp5Document(Buffer.from(buffer), options);
|
|
4784
|
-
return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess',
|
|
4892
|
+
return { success: true, fileType: "hwp", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _81 => _81.length]) ? images : void 0 };
|
|
4785
4893
|
} catch (err) {
|
|
4786
|
-
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4894
|
+
return { success: false, fileType: "hwp", error: err instanceof Error ? err.message : "HWP \uD30C\uC2F1 \uC2E4\uD328", code: _chunkVLSATRNQcjs.classifyError.call(void 0, err) };
|
|
4787
4895
|
}
|
|
4788
4896
|
}
|
|
4789
4897
|
async function parsePdf(buffer, options) {
|
|
4790
4898
|
let parsePdfDocument;
|
|
4791
4899
|
try {
|
|
4792
|
-
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-
|
|
4900
|
+
const mod = await Promise.resolve().then(() => _interopRequireWildcard(require("./parser-STAOZMUC.cjs")));
|
|
4793
4901
|
parsePdfDocument = mod.parsePdfDocument;
|
|
4794
4902
|
} catch (e26) {
|
|
4795
4903
|
return {
|
|
@@ -4804,7 +4912,7 @@ async function parsePdf(buffer, options) {
|
|
|
4804
4912
|
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
|
|
4805
4913
|
} catch (err) {
|
|
4806
4914
|
const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
|
|
4807
|
-
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4915
|
+
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: _chunkVLSATRNQcjs.classifyError.call(void 0, err), isImageBased };
|
|
4808
4916
|
}
|
|
4809
4917
|
}
|
|
4810
4918
|
async function parseXlsx(buffer, options) {
|
|
@@ -4812,15 +4920,15 @@ async function parseXlsx(buffer, options) {
|
|
|
4812
4920
|
const { markdown, blocks, metadata, warnings } = await parseXlsxDocument(buffer, options);
|
|
4813
4921
|
return { success: true, fileType: "xlsx", markdown, blocks, metadata, warnings };
|
|
4814
4922
|
} catch (err) {
|
|
4815
|
-
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4923
|
+
return { success: false, fileType: "xlsx", error: err instanceof Error ? err.message : "XLSX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkVLSATRNQcjs.classifyError.call(void 0, err) };
|
|
4816
4924
|
}
|
|
4817
4925
|
}
|
|
4818
4926
|
async function parseDocx(buffer, options) {
|
|
4819
4927
|
try {
|
|
4820
4928
|
const { markdown, blocks, metadata, outline, warnings, images } = await parseDocxDocument(buffer, options);
|
|
4821
|
-
return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess',
|
|
4929
|
+
return { success: true, fileType: "docx", markdown, blocks, metadata, outline, warnings, images: _optionalChain([images, 'optionalAccess', _82 => _82.length]) ? images : void 0 };
|
|
4822
4930
|
} catch (err) {
|
|
4823
|
-
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4931
|
+
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: _chunkVLSATRNQcjs.classifyError.call(void 0, err) };
|
|
4824
4932
|
}
|
|
4825
4933
|
}
|
|
4826
4934
|
async function parseHwpml(buffer, options) {
|
|
@@ -4828,16 +4936,16 @@ async function parseHwpml(buffer, options) {
|
|
|
4828
4936
|
const { markdown, blocks, metadata, outline, warnings } = parseHwpmlDocument(buffer, options);
|
|
4829
4937
|
return { success: true, fileType: "hwpml", markdown, blocks, metadata, outline, warnings };
|
|
4830
4938
|
} catch (err) {
|
|
4831
|
-
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code:
|
|
4939
|
+
return { success: false, fileType: "hwpml", error: err instanceof Error ? err.message : "HWPML \uD30C\uC2F1 \uC2E4\uD328", code: _chunkVLSATRNQcjs.classifyError.call(void 0, err) };
|
|
4832
4940
|
}
|
|
4833
4941
|
}
|
|
4834
4942
|
async function fillForm(input, values, outputFormat = "markdown") {
|
|
4835
4943
|
let buffer;
|
|
4836
4944
|
if (typeof input === "string") {
|
|
4837
4945
|
const buf = await _promises.readFile.call(void 0, input);
|
|
4838
|
-
buffer =
|
|
4946
|
+
buffer = _chunkVLSATRNQcjs.toArrayBuffer.call(void 0, buf);
|
|
4839
4947
|
} else if (Buffer.isBuffer(input)) {
|
|
4840
|
-
buffer =
|
|
4948
|
+
buffer = _chunkVLSATRNQcjs.toArrayBuffer.call(void 0, input);
|
|
4841
4949
|
} else {
|
|
4842
4950
|
buffer = input;
|
|
4843
4951
|
}
|
|
@@ -4863,7 +4971,7 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
4863
4971
|
throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
|
|
4864
4972
|
}
|
|
4865
4973
|
const fill = fillFormFields(parsed.blocks, values);
|
|
4866
|
-
const markdown =
|
|
4974
|
+
const markdown = _chunkVLSATRNQcjs.blocksToMarkdown.call(void 0, fill.blocks);
|
|
4867
4975
|
if (outputFormat === "hwpx") {
|
|
4868
4976
|
const hwpxBuffer = await markdownToHwpx(markdown);
|
|
4869
4977
|
return { output: hwpxBuffer, format: "hwpx", fill };
|
|
@@ -4894,5 +5002,5 @@ async function fillForm(input, values, outputFormat = "markdown") {
|
|
|
4894
5002
|
|
|
4895
5003
|
|
|
4896
5004
|
|
|
4897
|
-
exports.VERSION =
|
|
5005
|
+
exports.VERSION = _chunkVLSATRNQcjs.VERSION; exports.blocksToMarkdown = _chunkVLSATRNQcjs.blocksToMarkdown; exports.compare = compare; exports.detectFormat = detectFormat; exports.detectZipFormat = detectZipFormat; exports.diffBlocks = diffBlocks; exports.extractFormFields = extractFormFields; exports.fillForm = fillForm; exports.fillFormFields = fillFormFields; exports.fillHwpx = fillHwpx; exports.isHwpxFile = isHwpxFile; exports.isLabelCell = isLabelCell; exports.isOldHwpFile = isOldHwpFile; exports.isPdfFile = isPdfFile; exports.isZipFile = isZipFile; exports.markdownToHwpx = markdownToHwpx; exports.parse = parse; exports.parseDocx = parseDocx; exports.parseHwp = parseHwp; exports.parseHwpml = parseHwpml; exports.parseHwpx = parseHwpx; exports.parsePdf = parsePdf; exports.parseXlsx = parseXlsx;
|
|
4898
5006
|
//# sourceMappingURL=index.cjs.map
|