kordoc 0.2.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -8
- package/dist/{chunk-KZMWHK72.js → chunk-6FDCSJYE.js} +28 -10
- package/dist/chunk-6FDCSJYE.js.map +1 -0
- package/dist/cli.js +2 -1
- package/dist/cli.js.map +1 -0
- package/dist/index.cjs +27 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +27 -10
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +8 -3
- package/dist/mcp.js.map +1 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -49,7 +49,7 @@ npm install kordoc
|
|
|
49
49
|
npm install pdfjs-dist
|
|
50
50
|
```
|
|
51
51
|
|
|
52
|
-
>
|
|
52
|
+
> `pdfjs-dist` is an optional peer dependency. Not needed for HWP/HWPX parsing.
|
|
53
53
|
|
|
54
54
|
## Usage
|
|
55
55
|
|
|
@@ -147,18 +147,21 @@ import type { IRBlock, IRTable, IRCell, CellContext } from "kordoc"
|
|
|
147
147
|
|
|
148
148
|
## Security
|
|
149
149
|
|
|
150
|
-
|
|
150
|
+
v1.0.0 production-grade security hardening:
|
|
151
151
|
|
|
152
|
-
- **ZIP bomb protection** — 100MB decompression limit, 500 entry cap
|
|
152
|
+
- **ZIP bomb protection** — Pre-scan declared uncompressed sizes, 100MB decompression limit, 500 entry cap
|
|
153
153
|
- **XXE/Billion Laughs prevention** — Internal DTD subsets fully stripped from HWPX XML
|
|
154
154
|
- **Decompression bomb guard** — `maxOutputLength` on HWP5 zlib streams, cumulative 100MB limit across sections
|
|
155
|
+
- **PDF resource limits** — MAX_PAGES=5,000, cumulative text size 100MB cap, `doc.destroy()` cleanup
|
|
156
|
+
- **HWP5 record cap** — Max 500,000 records per section, prevents memory exhaustion from crafted files
|
|
157
|
+
- **Table dimension clamping** — rows/cols read from HWP5 binary clamped to MAX_ROWS/MAX_COLS before allocation
|
|
155
158
|
- **colSpan/rowSpan clamping** — Crafted merge values clamped to grid bounds (MAX_COLS=200, MAX_ROWS=10,000)
|
|
156
|
-
- **
|
|
157
|
-
- **MCP
|
|
159
|
+
- **Path traversal guard** — Backslash normalization, `..`, absolute paths, Windows drive letters all rejected
|
|
160
|
+
- **MCP error sanitization** — Filesystem paths stripped from error messages returned to clients
|
|
161
|
+
- **MCP path restriction** — Only `.hwp`, `.hwpx`, `.pdf` extensions allowed, symlink resolution
|
|
158
162
|
- **File size limit** — 500MB max in MCP server and CLI
|
|
159
|
-
- **
|
|
160
|
-
- **
|
|
161
|
-
- **HWP5 section limit** — Max 100 sections to prevent infinite loop on corrupted files
|
|
163
|
+
- **HWP5 section limit** — Max 100 sections in both primary and fallback paths
|
|
164
|
+
- **HWP5 control char fix** — Character code 10 (footnote/endnote) now correctly handled
|
|
162
165
|
|
|
163
166
|
## How It Works
|
|
164
167
|
|
|
@@ -17,6 +17,7 @@ function isPdfFile(buffer) {
|
|
|
17
17
|
return b[0] === 37 && b[1] === 80 && b[2] === 68 && b[3] === 70;
|
|
18
18
|
}
|
|
19
19
|
function detectFormat(buffer) {
|
|
20
|
+
if (buffer.byteLength < 4) return "unknown";
|
|
20
21
|
if (isHwpxFile(buffer)) return "hwpx";
|
|
21
22
|
if (isOldHwpFile(buffer)) return "hwp";
|
|
22
23
|
if (isPdfFile(buffer)) return "pdf";
|
|
@@ -24,7 +25,7 @@ function detectFormat(buffer) {
|
|
|
24
25
|
}
|
|
25
26
|
|
|
26
27
|
// src/utils.ts
|
|
27
|
-
var VERSION = true ? "0.
|
|
28
|
+
var VERSION = true ? "1.0.0" : "0.0.0-dev";
|
|
28
29
|
function toArrayBuffer(buf) {
|
|
29
30
|
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
30
31
|
}
|
|
@@ -179,6 +180,11 @@ async function parseHwpxDocument(buffer) {
|
|
|
179
180
|
} catch {
|
|
180
181
|
return extractFromBrokenZip(buffer);
|
|
181
182
|
}
|
|
183
|
+
let declaredTotal = 0;
|
|
184
|
+
zip.forEach((_, file) => {
|
|
185
|
+
declaredTotal += file._data?.uncompressedSize ?? 0;
|
|
186
|
+
});
|
|
187
|
+
if (declaredTotal > MAX_DECOMPRESS_SIZE) throw new Error("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
182
188
|
const sectionPaths = await resolveSectionPaths(zip);
|
|
183
189
|
if (sectionPaths.length === 0) throw new Error("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
184
190
|
let totalDecompressed = 0;
|
|
@@ -219,7 +225,8 @@ function extractFromBrokenZip(buffer) {
|
|
|
219
225
|
}
|
|
220
226
|
const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
|
|
221
227
|
const name = new TextDecoder().decode(nameBytes);
|
|
222
|
-
|
|
228
|
+
const normalizedName = name.replace(/\\/g, "/");
|
|
229
|
+
if (normalizedName.includes("..") || normalizedName.startsWith("/") || /^[A-Za-z]:/.test(normalizedName)) {
|
|
223
230
|
pos = fileStart + compSize;
|
|
224
231
|
continue;
|
|
225
232
|
}
|
|
@@ -250,7 +257,8 @@ function extractFromBrokenZip(buffer) {
|
|
|
250
257
|
async function resolveSectionPaths(zip) {
|
|
251
258
|
const manifestPaths = ["Contents/content.hpf", "content.hpf"];
|
|
252
259
|
for (const mp of manifestPaths) {
|
|
253
|
-
const
|
|
260
|
+
const mpLower = mp.toLowerCase();
|
|
261
|
+
const file = zip.file(mp) || Object.values(zip.files).find((f) => f.name.toLowerCase() === mpLower) || null;
|
|
254
262
|
if (!file) continue;
|
|
255
263
|
const xml = await file.async("text");
|
|
256
264
|
const paths = parseSectionPathsFromManifest(xml);
|
|
@@ -424,10 +432,11 @@ var CHAR_FIXED_NBSP = 24;
|
|
|
424
432
|
var FLAG_COMPRESSED = 1 << 0;
|
|
425
433
|
var FLAG_ENCRYPTED = 1 << 1;
|
|
426
434
|
var FLAG_DRM = 1 << 4;
|
|
435
|
+
var MAX_RECORDS = 5e5;
|
|
427
436
|
function readRecords(data) {
|
|
428
437
|
const records = [];
|
|
429
438
|
let offset = 0;
|
|
430
|
-
while (offset + 4 <= data.length) {
|
|
439
|
+
while (offset + 4 <= data.length && records.length < MAX_RECORDS) {
|
|
431
440
|
const header = data.readUInt32LE(offset);
|
|
432
441
|
offset += 4;
|
|
433
442
|
const tagId = header & 1023;
|
|
@@ -488,7 +497,7 @@ function extractText(data) {
|
|
|
488
497
|
break;
|
|
489
498
|
default:
|
|
490
499
|
if (ch >= 1 && ch <= 31) {
|
|
491
|
-
const isExt = ch >= 1 && ch <= 3 || ch >=
|
|
500
|
+
const isExt = ch >= 1 && ch <= 3 || ch >= 10 && ch <= 18 || ch >= 21 && ch <= 23;
|
|
492
501
|
const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
|
|
493
502
|
if ((isExt || isInline) && i + 14 <= data.length) i += 14;
|
|
494
503
|
} else if (ch >= 32) {
|
|
@@ -546,6 +555,7 @@ function findSections(cfb) {
|
|
|
546
555
|
}
|
|
547
556
|
if (sections.length === 0 && cfb.FileIndex) {
|
|
548
557
|
for (const entry of cfb.FileIndex) {
|
|
558
|
+
if (sections.length >= MAX_SECTIONS) break;
|
|
549
559
|
if (entry.name?.startsWith("Section") && entry.content) {
|
|
550
560
|
const idx = parseInt(entry.name.replace("Section", ""), 10) || 0;
|
|
551
561
|
sections.push({ idx, content: Buffer.from(entry.content) });
|
|
@@ -614,8 +624,8 @@ function parseTableBlock(records, startIdx) {
|
|
|
614
624
|
if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
|
|
615
625
|
if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
|
|
616
626
|
if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
|
|
617
|
-
rows = rec.data.readUInt16LE(4);
|
|
618
|
-
cols = rec.data.readUInt16LE(6);
|
|
627
|
+
rows = Math.min(rec.data.readUInt16LE(4), MAX_ROWS);
|
|
628
|
+
cols = Math.min(rec.data.readUInt16LE(6), MAX_COLS);
|
|
619
629
|
}
|
|
620
630
|
if (rec.tagId === TAG_LIST_HEADER) {
|
|
621
631
|
const { cell, nextIdx } = parseCellBlock(records, i, tableLevel);
|
|
@@ -677,6 +687,8 @@ function arrangeCells(rows, cols, cells) {
|
|
|
677
687
|
// src/pdf/parser.ts
|
|
678
688
|
import { createRequire as createRequire2 } from "module";
|
|
679
689
|
import { pathToFileURL } from "url";
|
|
690
|
+
var MAX_PAGES = 5e3;
|
|
691
|
+
var MAX_TOTAL_TEXT = 100 * 1024 * 1024;
|
|
680
692
|
var pdfjsModule = null;
|
|
681
693
|
async function loadPdfjs() {
|
|
682
694
|
if (pdfjsModule) return pdfjsModule;
|
|
@@ -719,15 +731,19 @@ async function parsePdfDocument(buffer) {
|
|
|
719
731
|
}
|
|
720
732
|
const pageTexts = [];
|
|
721
733
|
let totalChars = 0;
|
|
722
|
-
|
|
734
|
+
let totalTextBytes = 0;
|
|
735
|
+
const effectivePageCount = Math.min(pageCount, MAX_PAGES);
|
|
736
|
+
for (let i = 1; i <= effectivePageCount; i++) {
|
|
723
737
|
const page = await doc.getPage(i);
|
|
724
738
|
const textContent = await page.getTextContent();
|
|
725
739
|
const lines = groupTextItemsByLine(textContent.items);
|
|
726
740
|
const pageText = lines.join("\n");
|
|
727
741
|
totalChars += pageText.replace(/\s/g, "").length;
|
|
742
|
+
totalTextBytes += pageText.length * 2;
|
|
743
|
+
if (totalTextBytes > MAX_TOTAL_TEXT) throw new Error(`\uD14D\uC2A4\uD2B8 \uCD94\uCD9C \uD06C\uAE30 \uCD08\uACFC (${MAX_TOTAL_TEXT / 1024 / 1024}MB \uC81C\uD55C)`);
|
|
728
744
|
pageTexts.push(pageText);
|
|
729
745
|
}
|
|
730
|
-
const avgCharsPerPage = totalChars /
|
|
746
|
+
const avgCharsPerPage = totalChars / effectivePageCount;
|
|
731
747
|
if (avgCharsPerPage < 10) {
|
|
732
748
|
return {
|
|
733
749
|
success: false,
|
|
@@ -746,7 +762,8 @@ async function parsePdfDocument(buffer) {
|
|
|
746
762
|
}
|
|
747
763
|
}
|
|
748
764
|
markdown = reconstructTables(markdown);
|
|
749
|
-
|
|
765
|
+
const truncated = pageCount > MAX_PAGES;
|
|
766
|
+
return { success: true, fileType: "pdf", markdown, pageCount: effectivePageCount, isImageBased: false, ...truncated && { warning: `PDF\uAC00 ${pageCount}\uD398\uC774\uC9C0\uC774\uC9C0\uB9CC ${MAX_PAGES}\uD398\uC774\uC9C0\uAE4C\uC9C0\uB9CC \uCC98\uB9AC\uD588\uC2B5\uB2C8\uB2E4` } };
|
|
750
767
|
} finally {
|
|
751
768
|
await doc.destroy().catch(() => {
|
|
752
769
|
});
|
|
@@ -872,3 +889,4 @@ export {
|
|
|
872
889
|
toArrayBuffer,
|
|
873
890
|
parse
|
|
874
891
|
};
|
|
892
|
+
//# sourceMappingURL=chunk-6FDCSJYE.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/detect.ts","../src/utils.ts","../src/hwpx/parser.ts","../src/table/builder.ts","../src/hwp5/record.ts","../src/hwp5/parser.ts","../src/pdf/parser.ts","../src/index.ts"],"sourcesContent":["/** 매직 바이트 기반 파일 포맷 감지 */\n\nimport type { FileType } from \"./types.js\"\n\n/** 매직 바이트 뷰 생성 (복사 없이 view) */\nfunction magicBytes(buffer: ArrayBuffer): Uint8Array {\n return new Uint8Array(buffer, 0, Math.min(4, buffer.byteLength))\n}\n\n/** HWPX (ZIP 기반 한컴 문서): PK\\x03\\x04 */\nexport function isHwpxFile(buffer: ArrayBuffer): boolean {\n const b = magicBytes(buffer)\n return b[0] === 0x50 && b[1] === 0x4b && b[2] === 0x03 && b[3] === 0x04\n}\n\n/** HWP 5.x (OLE2 바이너리 한컴 문서): \\xD0\\xCF\\x11\\xE0 */\nexport function isOldHwpFile(buffer: ArrayBuffer): boolean {\n const b = magicBytes(buffer)\n return b[0] === 0xd0 && b[1] === 0xcf && b[2] === 0x11 && b[3] === 0xe0\n}\n\n/** PDF 문서: %PDF */\nexport function isPdfFile(buffer: ArrayBuffer): boolean {\n const b = magicBytes(buffer)\n return b[0] === 0x25 && b[1] === 0x50 && b[2] === 0x44 && b[3] === 0x46\n}\n\n/** 버퍼로부터 파일 포맷 감지 */\nexport function detectFormat(buffer: ArrayBuffer): FileType {\n if (buffer.byteLength < 4) return \"unknown\"\n if (isHwpxFile(buffer)) return \"hwpx\"\n if (isOldHwpFile(buffer)) return \"hwp\"\n if (isPdfFile(buffer)) return \"pdf\"\n return \"unknown\"\n}\n","/** kordoc 공용 유틸리티 */\n\n/** 빌드 타임에 tsup define으로 주입되는 버전 */\ndeclare const __KORDOC_VERSION__: string\nexport const VERSION: string = typeof __KORDOC_VERSION__ !== \"undefined\" ? __KORDOC_VERSION__ : \"0.0.0-dev\"\n\n/**\n * Node.js Buffer → ArrayBuffer 안전 변환\n * Buffer.buffer는 pool에서 할당된 공유 ArrayBuffer이므로 반드시 slice 필요\n */\nexport function toArrayBuffer(buf: Buffer): ArrayBuffer {\n return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength) as ArrayBuffer\n}\n","/**\n * HWPX 파서 — manifest 멀티섹션, colSpan/rowSpan, 중첩테이블\n *\n * lexdiff 기반 + edu-facility-ai 손상ZIP 복구\n */\n\nimport JSZip from \"jszip\"\nimport { inflateRawSync } from \"zlib\"\nimport { DOMParser } from \"@xmldom/xmldom\"\nimport { buildTable, convertTableToText, blocksToMarkdown, MAX_COLS, MAX_ROWS } from \"../table/builder.js\"\nimport type { CellContext, IRBlock } from \"../types.js\"\n\n/** 압축 해제 최대 크기 (100MB) — ZIP bomb 방지 */\nconst MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024\n/** 손상 ZIP 복구 시 최대 엔트리 수 */\nconst MAX_ZIP_ENTRIES = 500\n\n/** colSpan/rowSpan을 안전한 범위로 클램핑 */\nfunction clampSpan(val: number, max: number): number {\n return Math.max(1, Math.min(val, max))\n}\n\ninterface TableState { rows: CellContext[][]; currentRow: CellContext[]; cell: CellContext | null }\n\n/** XXE/Billion Laughs 방지 — DOCTYPE 제거 (내부 DTD 서브셋 포함) */\nfunction stripDtd(xml: string): string {\n return xml.replace(/<!DOCTYPE\\s[^[>]*(\\[[\\s\\S]*?\\])?\\s*>/gi, \"\")\n}\n\nexport async function parseHwpxDocument(buffer: ArrayBuffer): Promise<string> {\n let zip: JSZip\n\n try {\n zip = await JSZip.loadAsync(buffer)\n } catch {\n // ZIP Central Directory 손상 — Local File Header 스캔으로 폴백\n return extractFromBrokenZip(buffer)\n }\n\n // ZIP 전체 엔트리의 선언된 비압축 크기 합산 검증 (ZIP bomb 조기 감지)\n let declaredTotal = 0\n zip.forEach((_, file) => { declaredTotal += (file as unknown as { _data?: { uncompressedSize?: number } })._data?.uncompressedSize ?? 0 })\n if (declaredTotal > MAX_DECOMPRESS_SIZE) throw new Error(\"ZIP 비압축 크기 초과 (ZIP bomb 의심)\")\n\n const sectionPaths = await resolveSectionPaths(zip)\n if (sectionPaths.length === 0) throw new Error(\"HWPX에서 섹션 파일을 찾을 수 없습니다\")\n\n let totalDecompressed = 0\n const blocks: IRBlock[] = []\n for (const path of sectionPaths) {\n const file = zip.file(path)\n if (!file) continue\n const xml = await file.async(\"text\")\n totalDecompressed += xml.length * 2 // UTF-16 추정\n if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new Error(\"ZIP 압축 해제 크기 초과 (ZIP bomb 의심)\")\n blocks.push(...parseSectionXml(xml))\n }\n return blocksToMarkdown(blocks)\n}\n\n// ─── 손상 ZIP 복구 (edu-facility-ai에서 포팅) ──────────\n\nfunction extractFromBrokenZip(buffer: ArrayBuffer): string {\n const data = new Uint8Array(buffer)\n const view = new DataView(buffer)\n let pos = 0\n const texts: string[] = []\n let totalDecompressed = 0\n let entryCount = 0\n\n while (pos < data.length - 30) {\n // PK\\x03\\x04 시그니처 확인\n if (data[pos] !== 0x50 || data[pos + 1] !== 0x4b || data[pos + 2] !== 0x03 || data[pos + 3] !== 0x04) break\n\n if (++entryCount > MAX_ZIP_ENTRIES) break\n\n const method = view.getUint16(pos + 8, true)\n const compSize = view.getUint32(pos + 18, true)\n const nameLen = view.getUint16(pos + 26, true)\n const extraLen = view.getUint16(pos + 28, true)\n\n // nameLen 상한 — 비정상 값에 의한 대규모 버퍼 할당 방지\n if (nameLen > 1024 || extraLen > 65535) { pos += 30 + nameLen + extraLen; continue }\n\n const fileStart = pos + 30 + nameLen + extraLen\n // 범위 초과 검증 — OOB 및 무한 루프 방지\n if (fileStart + compSize > data.length) break\n if (compSize === 0 && method !== 0) { pos = fileStart; continue }\n\n const nameBytes = data.slice(pos + 30, pos + 30 + nameLen)\n const name = new TextDecoder().decode(nameBytes)\n\n // 경로 순회 방지 — 상위 디렉토리 참조 및 절대 경로 차단\n const normalizedName = name.replace(/\\\\/g, \"/\")\n if (normalizedName.includes(\"..\") || normalizedName.startsWith(\"/\") || /^[A-Za-z]:/.test(normalizedName)) { pos = fileStart + compSize; continue }\n const fileData = data.slice(fileStart, fileStart + compSize)\n pos = fileStart + compSize\n\n if (!name.toLowerCase().includes(\"section\") || !name.endsWith(\".xml\")) continue\n\n try {\n let content: string\n if (method === 0) {\n content = new TextDecoder().decode(fileData)\n } else if (method === 8) {\n const decompressed = inflateRawSync(Buffer.from(fileData), { maxOutputLength: MAX_DECOMPRESS_SIZE })\n content = new TextDecoder().decode(decompressed)\n } else {\n continue\n }\n totalDecompressed += content.length * 2\n if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new Error(\"압축 해제 크기 초과\")\n const sectionText = blocksToMarkdown(parseSectionXml(content))\n if (sectionText) texts.push(sectionText)\n } catch {\n continue\n }\n }\n\n if (texts.length === 0) throw new Error(\"손상된 HWPX에서 섹션 데이터를 복구할 수 없습니다\")\n return texts.join(\"\\n\\n\")\n}\n\n// ─── Manifest 해석 ───────────────────────────────────\n\nasync function resolveSectionPaths(zip: JSZip): Promise<string[]> {\n const manifestPaths = [\"Contents/content.hpf\", \"content.hpf\"]\n for (const mp of manifestPaths) {\n const mpLower = mp.toLowerCase()\n const file = zip.file(mp) || Object.values(zip.files).find(f => f.name.toLowerCase() === mpLower) || null\n if (!file) continue\n const xml = await file.async(\"text\")\n const paths = parseSectionPathsFromManifest(xml)\n if (paths.length > 0) return paths\n }\n\n // fallback: section*.xml 직접 검색\n const sectionFiles = zip.file(/[Ss]ection\\d+\\.xml$/)\n return sectionFiles.map(f => f.name).sort()\n}\n\nfunction parseSectionPathsFromManifest(xml: string): string[] {\n const parser = new DOMParser()\n const doc = parser.parseFromString(stripDtd(xml), \"text/xml\")\n const items = doc.getElementsByTagName(\"opf:item\")\n const spine = doc.getElementsByTagName(\"opf:itemref\")\n\n const isSectionId = (id: string) => /^s/i.test(id) || id.toLowerCase().includes(\"section\")\n const idToHref = new Map<string, string>()\n for (let i = 0; i < items.length; i++) {\n const item = items[i]\n const id = item.getAttribute(\"id\") || \"\"\n let href = item.getAttribute(\"href\") || \"\"\n const mediaType = item.getAttribute(\"media-type\") || \"\"\n if (!isSectionId(id) && !mediaType.includes(\"xml\")) continue\n if (!href.startsWith(\"/\") && !href.startsWith(\"Contents/\") && isSectionId(id))\n href = \"Contents/\" + href\n idToHref.set(id, href)\n }\n\n if (spine.length > 0) {\n const ordered: string[] = []\n for (let i = 0; i < spine.length; i++) {\n const href = idToHref.get(spine[i].getAttribute(\"idref\") || \"\")\n if (href) ordered.push(href)\n }\n if (ordered.length > 0) return ordered\n }\n return Array.from(idToHref.entries())\n .filter(([id]) => isSectionId(id))\n .sort((a, b) => a[0].localeCompare(b[0]))\n .map(([, href]) => href)\n}\n\n// ─── 섹션 XML 파싱 ──────────────────────────────────\n\nfunction parseSectionXml(xml: string): IRBlock[] {\n const parser = new DOMParser()\n const doc = parser.parseFromString(stripDtd(xml), \"text/xml\")\n if (!doc.documentElement) return []\n\n const blocks: IRBlock[] = []\n walkSection(doc.documentElement, blocks, null, [])\n return blocks\n}\n\nfunction walkSection(\n node: Node, blocks: IRBlock[],\n tableCtx: TableState | null, tableStack: TableState[]\n): void {\n const children = node.childNodes\n if (!children) return\n\n for (let i = 0; i < children.length; i++) {\n const el = children[i] as Element\n if (el.nodeType !== 1) continue\n\n const tag = el.tagName || el.localName || \"\"\n const localTag = tag.replace(/^[^:]+:/, \"\")\n\n switch (localTag) {\n case \"tbl\": {\n if (tableCtx) tableStack.push(tableCtx)\n const newTable: TableState = { rows: [], currentRow: [], cell: null }\n walkSection(el, blocks, newTable, tableStack)\n\n if (newTable.rows.length > 0) {\n if (tableStack.length > 0) {\n const parentTable = tableStack.pop()!\n const nestedText = convertTableToText(newTable.rows)\n if (parentTable.cell) {\n parentTable.cell.text += (parentTable.cell.text ? \"\\n\" : \"\") + nestedText\n }\n tableCtx = parentTable\n } else {\n blocks.push({ type: \"table\", table: buildTable(newTable.rows) })\n tableCtx = null\n }\n } else {\n tableCtx = tableStack.length > 0 ? tableStack.pop()! : null\n }\n break\n }\n\n case \"tr\":\n if (tableCtx) {\n tableCtx.currentRow = []\n walkSection(el, blocks, tableCtx, tableStack)\n if (tableCtx.currentRow.length > 0) tableCtx.rows.push(tableCtx.currentRow)\n tableCtx.currentRow = []\n }\n break\n\n case \"tc\":\n if (tableCtx) {\n tableCtx.cell = { text: \"\", colSpan: 1, rowSpan: 1 }\n walkSection(el, blocks, tableCtx, tableStack)\n if (tableCtx.cell) {\n tableCtx.currentRow.push(tableCtx.cell)\n tableCtx.cell = null\n }\n }\n break\n\n case \"cellSpan\":\n if (tableCtx?.cell) {\n const cs = parseInt(el.getAttribute(\"colSpan\") || \"1\", 10)\n const rs = parseInt(el.getAttribute(\"rowSpan\") || \"1\", 10)\n tableCtx.cell.colSpan = clampSpan(cs, MAX_COLS)\n tableCtx.cell.rowSpan = clampSpan(rs, MAX_ROWS)\n }\n break\n\n case \"p\": {\n const text = extractParagraphText(el)\n if (text) {\n if (tableCtx?.cell) {\n tableCtx.cell.text += (tableCtx.cell.text ? \"\\n\" : \"\") + text\n } else if (!tableCtx) {\n blocks.push({ type: \"paragraph\", text })\n }\n }\n walkSection(el, blocks, tableCtx, tableStack)\n break\n }\n\n default:\n walkSection(el, blocks, tableCtx, tableStack)\n break\n }\n }\n}\n\nfunction extractParagraphText(para: Node): string {\n let text = \"\"\n const walk = (node: Node) => {\n const children = node.childNodes\n if (!children) return\n for (let i = 0; i < children.length; i++) {\n const child = children[i] as Element\n if (child.nodeType === 3) { text += child.textContent || \"\"; continue }\n if (child.nodeType !== 1) continue\n\n const tag = (child.tagName || child.localName || \"\").replace(/^[^:]+:/, \"\")\n switch (tag) {\n case \"t\": text += child.textContent || \"\"; break\n case \"tab\": text += \"\\t\"; break\n case \"br\":\n if ((child.getAttribute(\"type\") || \"line\") === \"line\") text += \"\\n\"\n break\n case \"fwSpace\": case \"hwSpace\": text += \" \"; break\n case \"tbl\": break // 테이블은 walkSection에서 처리\n default: walk(child); break\n }\n }\n }\n walk(para)\n return text.replace(/[ \\t]+/g, \" \").trim()\n}\n","/** 2-pass colSpan/rowSpan 테이블 빌더 및 Markdown 변환 */\n\nimport type { CellContext, IRBlock, IRCell, IRTable } from \"../types.js\"\n\n/** 테이블 열 수 상한 — 한국 공공문서 기준 충분한 값 */\nexport const MAX_COLS = 200\n/** 테이블 행 수 상한 — 메모리 폭주 방지 */\nexport const MAX_ROWS = 10000\n\nexport function buildTable(rows: CellContext[][]): IRTable {\n if (rows.length > MAX_ROWS) rows = rows.slice(0, MAX_ROWS)\n const numRows = rows.length\n\n // Pass 1: maxCols 계산 (sparse Set — 메모리 효율적)\n const tempOccupied = new Set<number>()\n let maxCols = 0\n\n for (let rowIdx = 0; rowIdx < numRows; rowIdx++) {\n let colIdx = 0\n for (const cell of rows[rowIdx]) {\n while (colIdx < MAX_COLS && tempOccupied.has(rowIdx * MAX_COLS + colIdx)) colIdx++\n if (colIdx >= MAX_COLS) break\n\n for (let r = rowIdx; r < Math.min(rowIdx + cell.rowSpan, numRows); r++) {\n for (let c = colIdx; c < Math.min(colIdx + cell.colSpan, MAX_COLS); c++) {\n tempOccupied.add(r * MAX_COLS + c)\n }\n }\n colIdx += cell.colSpan\n if (colIdx > maxCols) maxCols = colIdx\n }\n }\n tempOccupied.clear()\n\n if (maxCols === 0) return { rows: 0, cols: 0, cells: [], hasHeader: false }\n\n // Pass 2: 실제 배치\n const grid: IRCell[][] = Array.from({ length: numRows }, () =>\n Array.from({ length: maxCols }, () => ({ text: \"\", colSpan: 1, rowSpan: 1 }))\n )\n const occupied: boolean[][] = Array.from({ length: numRows }, () => Array(maxCols).fill(false))\n\n for (let rowIdx = 0; rowIdx < numRows; rowIdx++) {\n let colIdx = 0\n let cellIdx = 0\n\n while (colIdx < maxCols && cellIdx < rows[rowIdx].length) {\n while (colIdx < maxCols && occupied[rowIdx][colIdx]) colIdx++\n if (colIdx >= maxCols) break\n\n const cell = rows[rowIdx][cellIdx]\n grid[rowIdx][colIdx] = {\n text: cell.text.trim(),\n colSpan: cell.colSpan,\n rowSpan: cell.rowSpan,\n }\n\n for (let r = rowIdx; r < Math.min(rowIdx + cell.rowSpan, numRows); r++) {\n for (let c = colIdx; c < Math.min(colIdx + cell.colSpan, maxCols); c++) {\n occupied[r][c] = true\n }\n }\n\n colIdx += cell.colSpan\n cellIdx++\n }\n }\n\n return { rows: numRows, cols: maxCols, cells: grid, hasHeader: numRows > 1 }\n}\n\nexport function convertTableToText(rows: CellContext[][]): string {\n return rows\n .map(row =>\n row\n .map(c => c.text.trim().replace(/\\n/g, \" \"))\n .filter(Boolean)\n .join(\" | \")\n )\n .filter(Boolean)\n .join(\"\\n\")\n}\n\nexport function blocksToMarkdown(blocks: IRBlock[]): string {\n const lines: string[] = []\n\n for (let i = 0; i < blocks.length; i++) {\n const block = blocks[i]\n\n if (block.type === \"paragraph\" && block.text) {\n const text = block.text\n\n if (/^\\[별표\\s*\\d+/.test(text)) {\n const nextBlock = blocks[i + 1]\n if (nextBlock?.type === \"paragraph\" && nextBlock.text && /관련\\)?$/.test(nextBlock.text)) {\n lines.push(\"\", `## ${text} ${nextBlock.text}`, \"\")\n i++\n } else {\n lines.push(\"\", `## ${text}`, \"\")\n }\n continue\n }\n\n if (/^\\([^)]*조[^)]*관련\\)$/.test(text)) {\n lines.push(`*${text}*`, \"\")\n continue\n }\n\n lines.push(text)\n } else if (block.type === \"table\" && block.table) {\n lines.push(tableToMarkdown(block.table))\n }\n }\n\n return lines.join(\"\\n\").trim()\n}\n\nfunction tableToMarkdown(table: IRTable): string {\n if (table.rows === 0 || table.cols === 0) return \"\"\n\n const { cells, rows: numRows, cols: numCols } = table\n\n // 1행 1열 → 구조화된 텍스트\n if (numRows === 1 && numCols === 1) {\n const content = cells[0][0].text\n return content\n .split(/\\n/)\n .map(line => {\n const trimmed = line.trim()\n if (!trimmed) return \"\"\n if (/^\\d+\\.\\s/.test(trimmed)) return `**${trimmed}**`\n if (/^[가-힣]\\.\\s/.test(trimmed)) return ` ${trimmed}`\n return trimmed\n })\n .filter(Boolean)\n .join(\"\\n\")\n }\n\n // 병합 셀: 행/열 병합된 셀은 빈 칸으로\n const display: string[][] = Array.from({ length: numRows }, () => Array(numCols).fill(\"\"))\n const skip = new Set<string>()\n\n for (let r = 0; r < numRows; r++) {\n for (let c = 0; c < numCols; c++) {\n if (skip.has(`${r},${c}`)) continue\n const cell = cells[r][c]\n display[r][c] = cell.text.replace(/\\n/g, \"<br>\")\n\n for (let dr = 0; dr < cell.rowSpan; dr++) {\n for (let dc = 0; dc < cell.colSpan; dc++) {\n if (dr === 0 && dc === 0) continue\n if (r + dr < numRows && c + dc < numCols) {\n skip.add(`${r + dr},${c + dc}`)\n }\n }\n }\n }\n }\n\n // rowSpan에 의해 생긴 빈 placeholder 행만 제거 (내용이 동일한 실제 데이터 행은 유지)\n const uniqueRows: string[][] = []\n for (const row of display) {\n const isEmptyPlaceholder = row.every(cell => cell === \"\")\n if (!isEmptyPlaceholder) uniqueRows.push(row)\n }\n\n if (uniqueRows.length === 0) return \"\"\n\n const md: string[] = []\n md.push(\"| \" + uniqueRows[0].join(\" | \") + \" |\")\n md.push(\"| \" + uniqueRows[0].map(() => \"---\").join(\" | \") + \" |\")\n for (let i = 1; i < uniqueRows.length; i++) {\n md.push(\"| \" + uniqueRows[i].join(\" | \") + \" |\")\n }\n return md.join(\"\\n\")\n}\n","/** HWP 5.x 레코드 리더, UTF-16LE 텍스트 추출, 스트림 압축해제 */\n\nimport { inflateRawSync, inflateSync } from \"zlib\"\n\n// ─── 레코드 태그 상수 ────────────────────────────────\n\nexport const TAG_PARA_HEADER = 0x0042\nexport const TAG_PARA_TEXT = 0x0043\nexport const TAG_CTRL_HEADER = 0x0047\nexport const TAG_LIST_HEADER = 0x0048\nexport const TAG_TABLE = 0x004d\n\n// 특수 문자 코드 (UTF-16LE)\n// HWP 스펙에서 0x0000은 NUL이 아닌 줄바꿈(line break)으로 정의됨\nconst CHAR_LINE = 0x0000\nconst CHAR_PARA = 0x000d\nconst CHAR_TAB = 0x0009\nconst CHAR_HYPHEN = 0x001e\nconst CHAR_NBSP = 0x001f\nconst CHAR_FIXED_NBSP = 0x0018\n\n// FileHeader 플래그\nexport const FLAG_COMPRESSED = 1 << 0\nexport const FLAG_ENCRYPTED = 1 << 1\nexport const FLAG_DRM = 1 << 4\n\n// ─── 레코드 구조 ─────────────────────────────────────\n\nexport interface HwpRecord {\n tagId: number\n level: number\n size: number\n data: Buffer\n}\n\nexport interface HwpFileHeader {\n signature: string\n versionMajor: number\n flags: number\n}\n\n// ─── 레코드 리더 ─────────────────────────────────────\n\n/** 최대 레코드 수 — 비정상 파일에 의한 메모리 폭주 방지 */\nconst MAX_RECORDS = 500_000\n\nexport function readRecords(data: Buffer): HwpRecord[] {\n const records: HwpRecord[] = []\n let offset = 0\n\n while (offset + 4 <= data.length && records.length < MAX_RECORDS) {\n const header = data.readUInt32LE(offset)\n offset += 4\n\n const tagId = header & 0x3ff\n const level = (header >> 10) & 0x3ff\n let size = (header >> 20) & 0xfff\n\n // 확장 크기\n if (size === 0xfff) {\n if (offset + 4 > data.length) break\n size = data.readUInt32LE(offset)\n offset += 4\n }\n\n if (offset + size > data.length) break\n records.push({ tagId, level, size, data: data.subarray(offset, offset + size) })\n offset += size\n }\n\n return records\n}\n\n// ─── 스트림 압축 해제 ────────────────────────────────\n\n/** 압축 해제 최대 크기 (100MB) — decompression bomb 방지 */\nconst MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024\n\nexport function decompressStream(data: Buffer): Buffer {\n const opts = { maxOutputLength: MAX_DECOMPRESS_SIZE }\n if (data.length >= 2 && data[0] === 0x78) {\n try { return inflateSync(data, opts) } catch { /* fallback to raw */ }\n }\n return inflateRawSync(data, opts)\n}\n\n// ─── FileHeader 파싱 ─────────────────────────────────\n\nexport function parseFileHeader(data: Buffer): HwpFileHeader {\n if (data.length < 40) throw new Error(\"FileHeader가 너무 짧습니다 (최소 40바이트)\")\n const sig = data.subarray(0, 32).toString(\"utf8\").replace(/\\0+$/, \"\")\n return {\n signature: sig,\n versionMajor: data[35],\n flags: data.readUInt32LE(36),\n }\n}\n\n// ─── UTF-16LE 텍스트 추출 (21가지 제어문자 처리) ─────\n\nexport function extractText(data: Buffer): string {\n let result = \"\"\n let i = 0\n\n while (i + 1 < data.length) {\n const ch = data.readUInt16LE(i)\n i += 2\n\n switch (ch) {\n case CHAR_LINE: result += \"\\n\"; break\n case CHAR_PARA: break\n case CHAR_TAB: result += \"\\t\"; break\n case CHAR_HYPHEN: result += \"-\"; break\n case CHAR_NBSP: case CHAR_FIXED_NBSP: result += \" \"; break\n default:\n if (ch >= 0x0001 && ch <= 0x001f) {\n const isExt = (ch >= 1 && ch <= 3) || (ch >= 10 && ch <= 18) || (ch >= 21 && ch <= 23)\n const isInline = (ch >= 4 && ch <= 9) || (ch >= 19 && ch <= 20)\n if ((isExt || isInline) && i + 14 <= data.length) i += 14\n } else if (ch >= 0x0020) {\n // UTF-16 surrogate pair 처리 (BMP 외 문자: 이모지, CJK 확장 등)\n if (ch >= 0xd800 && ch <= 0xdbff && i + 1 < data.length) {\n const lo = data.readUInt16LE(i)\n if (lo >= 0xdc00 && lo <= 0xdfff) {\n i += 2\n const codePoint = ((ch - 0xd800) << 10) + (lo - 0xdc00) + 0x10000\n result += String.fromCodePoint(codePoint)\n break\n }\n }\n result += String.fromCharCode(ch)\n }\n break\n }\n }\n\n return result\n}\n","/** HWP 5.x 바이너리 파서 — OLE2 컨테이너 → 섹션 → Markdown */\n\nimport {\n readRecords, decompressStream, parseFileHeader, extractText,\n TAG_PARA_HEADER, TAG_PARA_TEXT, TAG_CTRL_HEADER, TAG_LIST_HEADER, TAG_TABLE,\n FLAG_COMPRESSED, FLAG_ENCRYPTED, FLAG_DRM,\n type HwpRecord,\n} from \"./record.js\"\nimport { buildTable, blocksToMarkdown, MAX_COLS, MAX_ROWS } from \"../table/builder.js\"\nimport type { CellContext, IRBlock } from \"../types.js\"\n\nimport { createRequire } from \"module\"\nconst require = createRequire(import.meta.url)\nconst CFB: CfbModule = require(\"cfb\")\n\ninterface CfbEntry { name?: string; content?: Buffer | Uint8Array }\ninterface CfbContainer { FileIndex?: CfbEntry[] }\ninterface CfbModule {\n parse(data: Buffer): CfbContainer\n find(cfb: CfbContainer, path: string): CfbEntry | null\n}\n\n/** 최대 섹션 수 — 비정상 파일에 의한 무한 루프 방지 */\nconst MAX_SECTIONS = 100\n/** 누적 압축 해제 최대 크기 (100MB) */\nconst MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024\n\nexport function parseHwp5Document(buffer: Buffer): string {\n const cfb = CFB.parse(buffer)\n\n const headerEntry = CFB.find(cfb, \"/FileHeader\")\n if (!headerEntry?.content) throw new Error(\"FileHeader 스트림 없음\")\n const header = parseFileHeader(Buffer.from(headerEntry.content))\n if (header.signature !== \"HWP Document File\") throw new Error(\"HWP 시그니처 불일치\")\n if (header.flags & FLAG_ENCRYPTED) throw new Error(\"암호화된 HWP는 지원하지 않습니다\")\n if (header.flags & FLAG_DRM) throw new Error(\"DRM 보호된 HWP는 지원하지 않습니다\")\n const compressed = (header.flags & FLAG_COMPRESSED) !== 0\n\n const sections = findSections(cfb)\n if (sections.length === 0) throw new Error(\"섹션 스트림을 찾을 수 없습니다\")\n\n const blocks: IRBlock[] = []\n let totalDecompressed = 0\n for (const sectionData of sections) {\n const data = compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData)\n totalDecompressed += data.length\n if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new Error(\"총 압축 해제 크기 초과 (decompression bomb 의심)\")\n const records = readRecords(data)\n blocks.push(...parseSection(records))\n }\n\n return blocksToMarkdown(blocks)\n}\n\nfunction findSections(cfb: CfbContainer): Buffer[] {\n const sections: Array<{ idx: number; content: Buffer }> = []\n\n for (let i = 0; i < MAX_SECTIONS; i++) {\n const entry = CFB.find(cfb, `/BodyText/Section${i}`)\n if (!entry?.content) break\n sections.push({ idx: i, content: Buffer.from(entry.content) })\n }\n\n if (sections.length === 0 && cfb.FileIndex) {\n for (const entry of cfb.FileIndex) {\n if (sections.length >= MAX_SECTIONS) break\n if (entry.name?.startsWith(\"Section\") && entry.content) {\n const idx = parseInt(entry.name.replace(\"Section\", \"\"), 10) || 0\n sections.push({ idx, content: Buffer.from(entry.content) })\n }\n }\n }\n\n return sections.sort((a, b) => a.idx - b.idx).map(s => s.content)\n}\n\nfunction parseSection(records: HwpRecord[]): IRBlock[] {\n const blocks: IRBlock[] = []\n let i = 0\n\n while (i < records.length) {\n const rec = records[i]\n\n if (rec.tagId === TAG_PARA_HEADER && rec.level === 0) {\n const { paragraph, tables, nextIdx } = parseParagraphWithTables(records, i)\n if (paragraph) blocks.push({ type: \"paragraph\", text: paragraph })\n for (const t of tables) blocks.push({ type: \"table\", table: t })\n i = nextIdx\n continue\n }\n\n if (rec.tagId === TAG_CTRL_HEADER && rec.level <= 1 && rec.data.length >= 4) {\n const ctrlId = rec.data.subarray(0, 4).toString(\"ascii\")\n if (ctrlId === \" lbt\" || ctrlId === \"tbl \") {\n const { table, nextIdx } = parseTableBlock(records, i)\n if (table) blocks.push({ type: \"table\", table })\n i = nextIdx\n continue\n }\n }\n\n i++\n }\n\n return blocks\n}\n\nfunction parseParagraphWithTables(records: HwpRecord[], startIdx: number) {\n const startLevel = records[startIdx].level\n let text = \"\"\n const tables: ReturnType<typeof buildTable>[] = []\n let i = startIdx + 1\n\n while (i < records.length) {\n const rec = records[i]\n if (rec.tagId === TAG_PARA_HEADER && rec.level <= startLevel) break\n\n if (rec.tagId === TAG_PARA_TEXT) {\n text = extractText(rec.data)\n }\n\n if (rec.tagId === TAG_CTRL_HEADER && rec.data.length >= 4) {\n const ctrlId = rec.data.subarray(0, 4).toString(\"ascii\")\n if (ctrlId === \" lbt\" || ctrlId === \"tbl \") {\n const { table, nextIdx } = parseTableBlock(records, i)\n if (table) tables.push(table)\n i = nextIdx\n continue\n }\n }\n i++\n }\n\n const trimmed = text.trim()\n return { paragraph: trimmed || null, tables, nextIdx: i }\n}\n\nfunction parseTableBlock(records: HwpRecord[], startIdx: number) {\n const tableLevel = records[startIdx].level\n let i = startIdx + 1\n let rows = 0, cols = 0\n const cells: CellContext[] = []\n\n while (i < records.length) {\n const rec = records[i]\n if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break\n if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break\n\n if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {\n rows = Math.min(rec.data.readUInt16LE(4), MAX_ROWS)\n cols = Math.min(rec.data.readUInt16LE(6), MAX_COLS)\n }\n\n if (rec.tagId === TAG_LIST_HEADER) {\n const { cell, nextIdx } = parseCellBlock(records, i, tableLevel)\n if (cell) cells.push(cell)\n i = nextIdx\n continue\n }\n i++\n }\n\n if (rows === 0 || cols === 0 || cells.length === 0) return { table: null, nextIdx: i }\n\n const cellRows = arrangeCells(rows, cols, cells)\n return { table: buildTable(cellRows), nextIdx: i }\n}\n\nfunction parseCellBlock(records: HwpRecord[], startIdx: number, tableLevel: number) {\n const rec = records[startIdx]\n const cellLevel = rec.level\n const texts: string[] = []\n\n // LIST_HEADER에서 셀 병합 정보 추출\n // HWP5 셀 LIST_HEADER 구조: paraCount(u16) + flags(u32) + colAddr(u16) + rowAddr(u16) + colSpan(u16) + rowSpan(u16)\n let colSpan = 1\n let rowSpan = 1\n if (rec.data.length >= 14) {\n const cs = rec.data.readUInt16LE(10)\n const rs = rec.data.readUInt16LE(12)\n if (cs > 0) colSpan = Math.min(cs, MAX_COLS)\n if (rs > 0) rowSpan = Math.min(rs, MAX_ROWS)\n }\n\n let i = startIdx + 1\n\n while (i < records.length) {\n const r = records[i]\n if (r.tagId === TAG_LIST_HEADER && r.level <= cellLevel) break\n if (r.level <= tableLevel && (r.tagId === TAG_PARA_HEADER || r.tagId === TAG_CTRL_HEADER)) break\n\n if (r.tagId === TAG_PARA_TEXT) {\n const t = extractText(r.data).trim()\n if (t) texts.push(t)\n }\n i++\n }\n\n return { cell: { text: texts.join(\"\\n\"), colSpan, rowSpan } as CellContext, nextIdx: i }\n}\n\nfunction arrangeCells(rows: number, cols: number, cells: CellContext[]): CellContext[][] {\n const grid: (CellContext | null)[][] = Array.from({ length: rows }, () => Array(cols).fill(null))\n let cellIdx = 0\n\n for (let r = 0; r < rows && cellIdx < cells.length; r++) {\n for (let c = 0; c < cols && cellIdx < cells.length; c++) {\n if (grid[r][c] !== null) continue\n const cell = cells[cellIdx++]\n grid[r][c] = cell\n\n for (let dr = 0; dr < cell.rowSpan; dr++) {\n for (let dc = 0; dc < cell.colSpan; dc++) {\n if (dr === 0 && dc === 0) continue\n if (r + dr < rows && c + dc < cols)\n grid[r + dr][c + dc] = { text: \"\", colSpan: 1, rowSpan: 1 }\n }\n }\n }\n }\n\n return grid.map(row => row.map(c => c || { text: \"\", colSpan: 1, rowSpan: 1 }))\n}\n","/** PDF 텍스트 추출 (pdfjs-dist 기반 서버사이드 파싱) */\n\nimport type { ParseResult } from \"../types.js\"\n\n/** 최대 처리 페이지 수 — OOM 방지 */\nconst MAX_PAGES = 5000\n/** 누적 텍스트 최대 크기 (100MB) — 메모리 폭주 방지 */\nconst MAX_TOTAL_TEXT = 100 * 1024 * 1024\n\nimport { createRequire } from \"module\"\nimport { pathToFileURL } from \"url\"\n\n// pdfjs-dist는 external로 빌드됨 — 설치 안 되어 있으면 런타임에 잡힘\ninterface PdfjsModule {\n getDocument: (opts: Record<string, unknown>) => { promise: Promise<PdfjsDocument> }\n GlobalWorkerOptions: { workerSrc: string }\n}\ninterface PdfjsDocument {\n numPages: number\n getPage: (n: number) => Promise<PdfjsPage>\n destroy: () => Promise<void>\n}\ninterface PdfjsPage {\n getTextContent: () => Promise<{ items: PdfjsTextItem[] }>\n}\ninterface PdfjsTextItem {\n str: string\n transform: number[]\n width: number\n height: number\n}\n\nlet pdfjsModule: PdfjsModule | null = null\n\nasync function loadPdfjs(): Promise<PdfjsModule | null> {\n if (pdfjsModule) return pdfjsModule\n try {\n const mod = await import(\"pdfjs-dist/legacy/build/pdf.mjs\") as unknown as PdfjsModule\n // 워커 경로를 file:// URL로 설정 (Node.js ESM 환경 필수)\n const req = createRequire(import.meta.url)\n const workerPath = req.resolve(\"pdfjs-dist/legacy/build/pdf.worker.mjs\")\n mod.GlobalWorkerOptions.workerSrc = pathToFileURL(workerPath).href\n pdfjsModule = mod\n return mod\n } catch (err) {\n // import 실패 원인을 구분하여 반환\n const msg = err instanceof Error ? err.message : String(err)\n if (msg.includes(\"Cannot find\") || msg.includes(\"MODULE_NOT_FOUND\")) {\n return null // 미설치\n }\n throw new Error(`pdfjs-dist 로딩 실패: ${msg}`)\n }\n}\n\nexport async function parsePdfDocument(buffer: ArrayBuffer): Promise<ParseResult> {\n const pdfjs = await loadPdfjs()\n if (!pdfjs) {\n return {\n success: false,\n fileType: \"pdf\",\n pageCount: 0,\n error: \"pdfjs-dist가 설치되지 않았습니다. npm install pdfjs-dist\",\n }\n }\n\n const data = new Uint8Array(buffer)\n const doc = await pdfjs.getDocument({\n data,\n useSystemFonts: true,\n disableFontFace: true,\n isEvalSupported: false,\n }).promise\n\n try {\n const pageCount = doc.numPages\n if (pageCount === 0) {\n return { success: false, fileType: \"pdf\", pageCount: 0, error: \"PDF에 페이지가 없습니다.\" }\n }\n\n const pageTexts: string[] = []\n let totalChars = 0\n let totalTextBytes = 0\n const effectivePageCount = Math.min(pageCount, MAX_PAGES)\n\n for (let i = 1; i <= effectivePageCount; i++) {\n const page = await doc.getPage(i)\n const textContent = await page.getTextContent()\n const lines = groupTextItemsByLine(textContent.items)\n const pageText = lines.join(\"\\n\")\n totalChars += pageText.replace(/\\s/g, \"\").length\n totalTextBytes += pageText.length * 2\n if (totalTextBytes > MAX_TOTAL_TEXT) throw new Error(`텍스트 추출 크기 초과 (${MAX_TOTAL_TEXT / 1024 / 1024}MB 제한)`)\n pageTexts.push(pageText)\n }\n\n const avgCharsPerPage = totalChars / effectivePageCount\n if (avgCharsPerPage < 10) {\n return {\n success: false,\n fileType: \"pdf\",\n pageCount,\n isImageBased: true,\n error: `이미지 기반 PDF로 추정됩니다 (${pageCount}페이지, 추출 텍스트 ${totalChars}자).`,\n }\n }\n\n let markdown = \"\"\n for (let i = 0; i < pageTexts.length; i++) {\n const cleaned = cleanPdfText(pageTexts[i])\n if (cleaned.trim()) {\n if (i > 0 && markdown) markdown += \"\\n\\n\"\n markdown += cleaned\n }\n }\n\n markdown = reconstructTables(markdown)\n\n const truncated = pageCount > MAX_PAGES\n return { success: true, fileType: \"pdf\", markdown, pageCount: effectivePageCount, isImageBased: false, ...(truncated && { warning: `PDF가 ${pageCount}페이지이지만 ${MAX_PAGES}페이지까지만 처리했습니다` }) }\n } finally {\n await doc.destroy().catch(() => {})\n }\n}\n\n// ─── 텍스트 아이템 → 행 그룹핑 ──────────────────────\n\nfunction groupTextItemsByLine(items: PdfjsTextItem[]): string[] {\n if (items.length === 0) return []\n\n const textItems = items.filter(item => typeof item.str === \"string\" && item.str.trim() !== \"\")\n if (textItems.length === 0) return []\n\n textItems.sort((a, b) => {\n const yDiff = b.transform[5] - a.transform[5]\n if (Math.abs(yDiff) < 2) return a.transform[4] - b.transform[4]\n return yDiff\n })\n\n const lines: string[] = []\n let currentY = textItems[0].transform[5]\n let currentLine: { text: string; x: number; width: number }[] = []\n\n for (const item of textItems) {\n const y = item.transform[5]\n\n if (Math.abs(currentY - y) > Math.max(item.height * 0.5, 2)) {\n if (currentLine.length > 0) lines.push(mergeLineItems(currentLine))\n currentLine = []\n currentY = y\n }\n\n currentLine.push({ text: item.str, x: item.transform[4], width: item.width })\n }\n\n if (currentLine.length > 0) lines.push(mergeLineItems(currentLine))\n return lines\n}\n\nfunction mergeLineItems(items: { text: string; x: number; width: number }[]): string {\n if (items.length <= 1) return items[0]?.text || \"\"\n items.sort((a, b) => a.x - b.x)\n\n let result = items[0].text\n for (let i = 1; i < items.length; i++) {\n const gap = items[i].x - (items[i - 1].x + items[i - 1].width)\n if (gap > 15) result += \"\\t\"\n else if (gap > 3) result += \" \"\n result += items[i].text\n }\n return result\n}\n\nexport function cleanPdfText(text: string): string {\n return text\n .replace(/^[\\s]*[-–—]\\s*\\d+\\s*[-–—][\\s]*$/gm, \"\")\n .replace(/^\\s*\\d+\\s*\\/\\s*\\d+\\s*$/gm, \"\")\n .replace(/([가-힣·,\\-])\\n([가-힣(])/g, \"$1 $2\")\n .replace(/\\n{3,}/g, \"\\n\\n\")\n .trim()\n}\n\nfunction reconstructTables(text: string): string {\n const lines = text.split(\"\\n\")\n const result: string[] = []\n let tableBuffer: string[][] = []\n\n for (const line of lines) {\n if (line.includes(\"\\t\")) {\n tableBuffer.push(line.split(\"\\t\").map(c => c.trim()))\n } else {\n if (tableBuffer.length >= 2) result.push(formatAsMarkdownTable(tableBuffer))\n else if (tableBuffer.length === 1) result.push(tableBuffer[0].join(\" | \"))\n tableBuffer = []\n result.push(line)\n }\n }\n\n if (tableBuffer.length >= 2) result.push(formatAsMarkdownTable(tableBuffer))\n else if (tableBuffer.length === 1) result.push(tableBuffer[0].join(\" | \"))\n\n return result.join(\"\\n\")\n}\n\nfunction formatAsMarkdownTable(rows: string[][]): string {\n const maxCols = Math.max(...rows.map(r => r.length))\n // defensive copy — 원본 배열 변경 방지\n const normalized = rows.map(r => {\n const copy = [...r]\n while (copy.length < maxCols) copy.push(\"\")\n return copy\n })\n\n const lines: string[] = []\n lines.push(\"| \" + normalized[0].join(\" | \") + \" |\")\n lines.push(\"| \" + normalized[0].map(() => \"---\").join(\" | \") + \" |\")\n for (let i = 1; i < normalized.length; i++) {\n lines.push(\"| \" + normalized[i].join(\" | \") + \" |\")\n }\n return lines.join(\"\\n\")\n}\n","/**\n * kordoc — 모두 파싱해버리겠다\n *\n * HWP, HWPX, PDF → Markdown 변환 통합 라이브러리\n */\n\nimport { detectFormat, isHwpxFile, isOldHwpFile, isPdfFile } from \"./detect.js\"\nimport { parseHwpxDocument } from \"./hwpx/parser.js\"\nimport { parseHwp5Document } from \"./hwp5/parser.js\"\nimport { parsePdfDocument } from \"./pdf/parser.js\"\nimport type { ParseResult } from \"./types.js\"\n\n// ─── 메인 API ────────────────────────────────────────\n\n/**\n * 파일 버퍼를 자동 감지하여 Markdown으로 변환\n *\n * @example\n * ```ts\n * import { parse } from \"kordoc\"\n * const result = await parse(buffer)\n * if (result.success) console.log(result.markdown)\n * ```\n */\nexport async function parse(buffer: ArrayBuffer): Promise<ParseResult> {\n if (!buffer || buffer.byteLength === 0) {\n return { success: false, fileType: \"unknown\", error: \"빈 버퍼이거나 유효하지 않은 입력입니다.\" }\n }\n const format = detectFormat(buffer)\n\n switch (format) {\n case \"hwpx\":\n return parseHwpx(buffer)\n case \"hwp\":\n return parseHwp(buffer)\n case \"pdf\":\n return parsePdf(buffer)\n default:\n return { success: false, fileType: \"unknown\", error: \"지원하지 않는 파일 형식입니다.\" }\n }\n}\n\n// ─── 포맷별 API ──────────────────────────────────────\n\n/** HWPX 파일을 Markdown으로 변환 */\nexport async function parseHwpx(buffer: ArrayBuffer): Promise<ParseResult> {\n try {\n const markdown = await parseHwpxDocument(buffer)\n return { success: true, fileType: \"hwpx\", markdown }\n } catch (err) {\n return { success: false, fileType: \"hwpx\", error: err instanceof Error ? err.message : \"HWPX 파싱 실패\" }\n }\n}\n\n/** HWP 5.x 바이너리 파일을 Markdown으로 변환 */\nexport async function parseHwp(buffer: ArrayBuffer): Promise<ParseResult> {\n try {\n const markdown = parseHwp5Document(Buffer.from(buffer))\n return { success: true, fileType: \"hwp\", markdown }\n } catch (err) {\n return { success: false, fileType: \"hwp\", error: err instanceof Error ? err.message : \"HWP 파싱 실패\" }\n }\n}\n\n/** PDF 파일에서 텍스트를 추출하여 Markdown으로 변환 */\nexport async function parsePdf(buffer: ArrayBuffer): Promise<ParseResult> {\n try {\n return await parsePdfDocument(buffer)\n } catch (err) {\n return { success: false, fileType: \"pdf\", error: err instanceof Error ? err.message : \"PDF 파싱 실패\" }\n }\n}\n\n// ─── Re-exports ──────────────────────────────────────\n\nexport { detectFormat, isHwpxFile, isOldHwpFile, isPdfFile } from \"./detect.js\"\nexport type { ParseResult, ParseSuccess, ParseFailure, FileType, IRBlock, IRTable, IRCell, CellContext } from \"./types.js\"\nexport { buildTable, blocksToMarkdown, convertTableToText } from \"./table/builder.js\"\nexport { VERSION } from \"./utils.js\"\n"],"mappings":";;;AAKA,SAAS,WAAW,QAAiC;AACnD,SAAO,IAAI,WAAW,QAAQ,GAAG,KAAK,IAAI,GAAG,OAAO,UAAU,CAAC;AACjE;AAGO,SAAS,WAAW,QAA8B;AACvD,QAAM,IAAI,WAAW,MAAM;AAC3B,SAAO,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM,KAAQ,EAAE,CAAC,MAAM;AACrE;AAGO,SAAS,aAAa,QAA8B;AACzD,QAAM,IAAI,WAAW,MAAM;AAC3B,SAAO,EAAE,CAAC,MAAM,OAAQ,EAAE,CAAC,MAAM,OAAQ,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM;AACrE;AAGO,SAAS,UAAU,QAA8B;AACtD,QAAM,IAAI,WAAW,MAAM;AAC3B,SAAO,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM;AACrE;AAGO,SAAS,aAAa,QAA+B;AAC1D,MAAI,OAAO,aAAa,EAAG,QAAO;AAClC,MAAI,WAAW,MAAM,EAAG,QAAO;AAC/B,MAAI,aAAa,MAAM,EAAG,QAAO;AACjC,MAAI,UAAU,MAAM,EAAG,QAAO;AAC9B,SAAO;AACT;;;AC9BO,IAAM,UAAkB,OAA4C,UAAqB;AAMzF,SAAS,cAAc,KAA0B;AACtD,SAAO,IAAI,OAAO,MAAM,IAAI,YAAY,IAAI,aAAa,IAAI,UAAU;AACzE;;;ACNA,OAAO,WAAW;AAClB,SAAS,sBAAsB;AAC/B,SAAS,iBAAiB;;;ACHnB,IAAM,WAAW;AAEjB,IAAM,WAAW;AAEjB,SAAS,WAAW,MAAgC;AACzD,MAAI,KAAK,SAAS,SAAU,QAAO,KAAK,MAAM,GAAG,QAAQ;AACzD,QAAM,UAAU,KAAK;AAGrB,QAAM,eAAe,oBAAI,IAAY;AACrC,MAAI,UAAU;AAEd,WAAS,SAAS,GAAG,SAAS,SAAS,UAAU;AAC/C,QAAI,SAAS;AACb,eAAW,QAAQ,KAAK,MAAM,GAAG;AAC/B,aAAO,SAAS,YAAY,aAAa,IAAI,SAAS,WAAW,MAAM,EAAG;AAC1E,UAAI,UAAU,SAAU;AAExB,eAAS,IAAI,QAAQ,IAAI,KAAK,IAAI,SAAS,KAAK,SAAS,OAAO,GAAG,KAAK;AACtE,iBAAS,IAAI,QAAQ,IAAI,KAAK,IAAI,SAAS,KAAK,SAAS,QAAQ,GAAG,KAAK;AACvE,uBAAa,IAAI,IAAI,WAAW,CAAC;AAAA,QACnC;AAAA,MACF;AACA,gBAAU,KAAK;AACf,UAAI,SAAS,QAAS,WAAU;AAAA,IAClC;AAAA,EACF;AACA,eAAa,MAAM;AAEnB,MAAI,YAAY,EAAG,QAAO,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC,GAAG,WAAW,MAAM;AAG1E,QAAM,OAAmB,MAAM;AAAA,IAAK,EAAE,QAAQ,QAAQ;AAAA,IAAG,MACvD,MAAM,KAAK,EAAE,QAAQ,QAAQ,GAAG,OAAO,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE,EAAE;AAAA,EAC9E;AACA,QAAM,WAAwB,MAAM,KAAK,EAAE,QAAQ,QAAQ,GAAG,MAAM,MAAM,OAAO,EAAE,KAAK,KAAK,CAAC;AAE9F,WAAS,SAAS,GAAG,SAAS,SAAS,UAAU;AAC/C,QAAI,SAAS;AACb,QAAI,UAAU;AAEd,WAAO,SAAS,WAAW,UAAU,KAAK,MAAM,EAAE,QAAQ;AACxD,aAAO,SAAS,WAAW,SAAS,MAAM,EAAE,MAAM,EAAG;AACrD,UAAI,UAAU,QAAS;AAEvB,YAAM,OAAO,KAAK,MAAM,EAAE,OAAO;AACjC,WAAK,MAAM,EAAE,MAAM,IAAI;AAAA,QACrB,MAAM,KAAK,KAAK,KAAK;AAAA,QACrB,SAAS,KAAK;AAAA,QACd,SAAS,KAAK;AAAA,MAChB;AAEA,eAAS,IAAI,QAAQ,IAAI,KAAK,IAAI,SAAS,KAAK,SAAS,OAAO,GAAG,KAAK;AACtE,iBAAS,IAAI,QAAQ,IAAI,KAAK,IAAI,SAAS,KAAK,SAAS,OAAO,GAAG,KAAK;AACtE,mBAAS,CAAC,EAAE,CAAC,IAAI;AAAA,QACnB;AAAA,MACF;AAEA,gBAAU,KAAK;AACf;AAAA,IACF;AAAA,EACF;AAEA,SAAO,EAAE,MAAM,SAAS,MAAM,SAAS,OAAO,MAAM,WAAW,UAAU,EAAE;AAC7E;AAEO,SAAS,mBAAmB,MAA+B;AAChE,SAAO,KACJ;AAAA,IAAI,SACH,IACG,IAAI,OAAK,EAAE,KAAK,KAAK,EAAE,QAAQ,OAAO,GAAG,CAAC,EAC1C,OAAO,OAAO,EACd,KAAK,KAAK;AAAA,EACf,EACC,OAAO,OAAO,EACd,KAAK,IAAI;AACd;AAEO,SAAS,iBAAiB,QAA2B;AAC1D,QAAM,QAAkB,CAAC;AAEzB,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,UAAM,QAAQ,OAAO,CAAC;AAEtB,QAAI,MAAM,SAAS,eAAe,MAAM,MAAM;AAC5C,YAAM,OAAO,MAAM;AAEnB,UAAI,cAAc,KAAK,IAAI,GAAG;AAC5B,cAAM,YAAY,OAAO,IAAI,CAAC;AAC9B,YAAI,WAAW,SAAS,eAAe,UAAU,QAAQ,SAAS,KAAK,UAAU,IAAI,GAAG;AACtF,gBAAM,KAAK,IAAI,MAAM,IAAI,IAAI,UAAU,IAAI,IAAI,EAAE;AACjD;AAAA,QACF,OAAO;AACL,gBAAM,KAAK,IAAI,MAAM,IAAI,IAAI,EAAE;AAAA,QACjC;AACA;AAAA,MACF;AAEA,UAAI,sBAAsB,KAAK,IAAI,GAAG;AACpC,cAAM,KAAK,IAAI,IAAI,KAAK,EAAE;AAC1B;AAAA,MACF;AAEA,YAAM,KAAK,IAAI;AAAA,IACjB,WAAW,MAAM,SAAS,WAAW,MAAM,OAAO;AAChD,YAAM,KAAK,gBAAgB,MAAM,KAAK,CAAC;AAAA,IACzC;AAAA,EACF;AAEA,SAAO,MAAM,KAAK,IAAI,EAAE,KAAK;AAC/B;AAEA,SAAS,gBAAgB,OAAwB;AAC/C,MAAI,MAAM,SAAS,KAAK,MAAM,SAAS,EAAG,QAAO;AAEjD,QAAM,EAAE,OAAO,MAAM,SAAS,MAAM,QAAQ,IAAI;AAGhD,MAAI,YAAY,KAAK,YAAY,GAAG;AAClC,UAAM,UAAU,MAAM,CAAC,EAAE,CAAC,EAAE;AAC5B,WAAO,QACJ,MAAM,IAAI,EACV,IAAI,UAAQ;AACX,YAAM,UAAU,KAAK,KAAK;AAC1B,UAAI,CAAC,QAAS,QAAO;AACrB,UAAI,WAAW,KAAK,OAAO,EAAG,QAAO,KAAK,OAAO;AACjD,UAAI,aAAa,KAAK,OAAO,EAAG,QAAO,KAAK,OAAO;AACnD,aAAO;AAAA,IACT,CAAC,EACA,OAAO,OAAO,EACd,KAAK,IAAI;AAAA,EACd;AAGA,QAAM,UAAsB,MAAM,KAAK,EAAE,QAAQ,QAAQ,GAAG,MAAM,MAAM,OAAO,EAAE,KAAK,EAAE,CAAC;AACzF,QAAM,OAAO,oBAAI,IAAY;AAE7B,WAAS,IAAI,GAAG,IAAI,SAAS,KAAK;AAChC,aAAS,IAAI,GAAG,IAAI,SAAS,KAAK;AAChC,UAAI,KAAK,IAAI,GAAG,CAAC,IAAI,CAAC,EAAE,EAAG;AAC3B,YAAM,OAAO,MAAM,CAAC,EAAE,CAAC;AACvB,cAAQ,CAAC,EAAE,CAAC,IAAI,KAAK,KAAK,QAAQ,OAAO,MAAM;AAE/C,eAAS,KAAK,GAAG,KAAK,KAAK,SAAS,MAAM;AACxC,iBAAS,KAAK,GAAG,KAAK,KAAK,SAAS,MAAM;AACxC,cAAI,OAAO,KAAK,OAAO,EAAG;AAC1B,cAAI,IAAI,KAAK,WAAW,IAAI,KAAK,SAAS;AACxC,iBAAK,IAAI,GAAG,IAAI,EAAE,IAAI,IAAI,EAAE,EAAE;AAAA,UAChC;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,QAAM,aAAyB,CAAC;AAChC,aAAW,OAAO,SAAS;AACzB,UAAM,qBAAqB,IAAI,MAAM,UAAQ,SAAS,EAAE;AACxD,QAAI,CAAC,mBAAoB,YAAW,KAAK,GAAG;AAAA,EAC9C;AAEA,MAAI,WAAW,WAAW,EAAG,QAAO;AAEpC,QAAM,KAAe,CAAC;AACtB,KAAG,KAAK,OAAO,WAAW,CAAC,EAAE,KAAK,KAAK,IAAI,IAAI;AAC/C,KAAG,KAAK,OAAO,WAAW,CAAC,EAAE,IAAI,MAAM,KAAK,EAAE,KAAK,KAAK,IAAI,IAAI;AAChE,WAAS,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;AAC1C,OAAG,KAAK,OAAO,WAAW,CAAC,EAAE,KAAK,KAAK,IAAI,IAAI;AAAA,EACjD;AACA,SAAO,GAAG,KAAK,IAAI;AACrB;;;ADlKA,IAAM,sBAAsB,MAAM,OAAO;AAEzC,IAAM,kBAAkB;AAGxB,SAAS,UAAU,KAAa,KAAqB;AACnD,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,KAAK,GAAG,CAAC;AACvC;AAKA,SAAS,SAAS,KAAqB;AACrC,SAAO,IAAI,QAAQ,0CAA0C,EAAE;AACjE;AAEA,eAAsB,kBAAkB,QAAsC;AAC5E,MAAI;AAEJ,MAAI;AACF,UAAM,MAAM,MAAM,UAAU,MAAM;AAAA,EACpC,QAAQ;AAEN,WAAO,qBAAqB,MAAM;AAAA,EACpC;AAGA,MAAI,gBAAgB;AACpB,MAAI,QAAQ,CAAC,GAAG,SAAS;AAAE,qBAAkB,KAA8D,OAAO,oBAAoB;AAAA,EAAE,CAAC;AACzI,MAAI,gBAAgB,oBAAqB,OAAM,IAAI,MAAM,0EAA6B;AAEtF,QAAM,eAAe,MAAM,oBAAoB,GAAG;AAClD,MAAI,aAAa,WAAW,EAAG,OAAM,IAAI,MAAM,+FAAyB;AAExE,MAAI,oBAAoB;AACxB,QAAM,SAAoB,CAAC;AAC3B,aAAW,QAAQ,cAAc;AAC/B,UAAM,OAAO,IAAI,KAAK,IAAI;AAC1B,QAAI,CAAC,KAAM;AACX,UAAM,MAAM,MAAM,KAAK,MAAM,MAAM;AACnC,yBAAqB,IAAI,SAAS;AAClC,QAAI,oBAAoB,oBAAqB,OAAM,IAAI,MAAM,iFAA+B;AAC5F,WAAO,KAAK,GAAG,gBAAgB,GAAG,CAAC;AAAA,EACrC;AACA,SAAO,iBAAiB,MAAM;AAChC;AAIA,SAAS,qBAAqB,QAA6B;AACzD,QAAM,OAAO,IAAI,WAAW,MAAM;AAClC,QAAM,OAAO,IAAI,SAAS,MAAM;AAChC,MAAI,MAAM;AACV,QAAM,QAAkB,CAAC;AACzB,MAAI,oBAAoB;AACxB,MAAI,aAAa;AAEjB,SAAO,MAAM,KAAK,SAAS,IAAI;AAE7B,QAAI,KAAK,GAAG,MAAM,MAAQ,KAAK,MAAM,CAAC,MAAM,MAAQ,KAAK,MAAM,CAAC,MAAM,KAAQ,KAAK,MAAM,CAAC,MAAM,EAAM;AAEtG,QAAI,EAAE,aAAa,gBAAiB;AAEpC,UAAM,SAAS,KAAK,UAAU,MAAM,GAAG,IAAI;AAC3C,UAAM,WAAW,KAAK,UAAU,MAAM,IAAI,IAAI;AAC9C,UAAM,UAAU,KAAK,UAAU,MAAM,IAAI,IAAI;AAC7C,UAAM,WAAW,KAAK,UAAU,MAAM,IAAI,IAAI;AAG9C,QAAI,UAAU,QAAQ,WAAW,OAAO;AAAE,aAAO,KAAK,UAAU;AAAU;AAAA,IAAS;AAEnF,UAAM,YAAY,MAAM,KAAK,UAAU;AAEvC,QAAI,YAAY,WAAW,KAAK,OAAQ;AACxC,QAAI,aAAa,KAAK,WAAW,GAAG;AAAE,YAAM;AAAW;AAAA,IAAS;AAEhE,UAAM,YAAY,KAAK,MAAM,MAAM,IAAI,MAAM,KAAK,OAAO;AACzD,UAAM,OAAO,IAAI,YAAY,EAAE,OAAO,SAAS;AAG/C,UAAM,iBAAiB,KAAK,QAAQ,OAAO,GAAG;AAC9C,QAAI,eAAe,SAAS,IAAI,KAAK,eAAe,WAAW,GAAG,KAAK,aAAa,KAAK,cAAc,GAAG;AAAE,YAAM,YAAY;AAAU;AAAA,IAAS;AACjJ,UAAM,WAAW,KAAK,MAAM,WAAW,YAAY,QAAQ;AAC3D,UAAM,YAAY;AAElB,QAAI,CAAC,KAAK,YAAY,EAAE,SAAS,SAAS,KAAK,CAAC,KAAK,SAAS,MAAM,EAAG;AAEvE,QAAI;AACF,UAAI;AACJ,UAAI,WAAW,GAAG;AAChB,kBAAU,IAAI,YAAY,EAAE,OAAO,QAAQ;AAAA,MAC7C,WAAW,WAAW,GAAG;AACvB,cAAM,eAAe,eAAe,OAAO,KAAK,QAAQ,GAAG,EAAE,iBAAiB,oBAAoB,CAAC;AACnG,kBAAU,IAAI,YAAY,EAAE,OAAO,YAAY;AAAA,MACjD,OAAO;AACL;AAAA,MACF;AACA,2BAAqB,QAAQ,SAAS;AACtC,UAAI,oBAAoB,oBAAqB,OAAM,IAAI,MAAM,qDAAa;AAC1E,YAAM,cAAc,iBAAiB,gBAAgB,OAAO,CAAC;AAC7D,UAAI,YAAa,OAAM,KAAK,WAAW;AAAA,IACzC,QAAQ;AACN;AAAA,IACF;AAAA,EACF;AAEA,MAAI,MAAM,WAAW,EAAG,OAAM,IAAI,MAAM,8HAA+B;AACvE,SAAO,MAAM,KAAK,MAAM;AAC1B;AAIA,eAAe,oBAAoB,KAA+B;AAChE,QAAM,gBAAgB,CAAC,wBAAwB,aAAa;AAC5D,aAAW,MAAM,eAAe;AAC9B,UAAM,UAAU,GAAG,YAAY;AAC/B,UAAM,OAAO,IAAI,KAAK,EAAE,KAAK,OAAO,OAAO,IAAI,KAAK,EAAE,KAAK,OAAK,EAAE,KAAK,YAAY,MAAM,OAAO,KAAK;AACrG,QAAI,CAAC,KAAM;AACX,UAAM,MAAM,MAAM,KAAK,MAAM,MAAM;AACnC,UAAM,QAAQ,8BAA8B,GAAG;AAC/C,QAAI,MAAM,SAAS,EAAG,QAAO;AAAA,EAC/B;AAGA,QAAM,eAAe,IAAI,KAAK,qBAAqB;AACnD,SAAO,aAAa,IAAI,OAAK,EAAE,IAAI,EAAE,KAAK;AAC5C;AAEA,SAAS,8BAA8B,KAAuB;AAC5D,QAAM,SAAS,IAAI,UAAU;AAC7B,QAAM,MAAM,OAAO,gBAAgB,SAAS,GAAG,GAAG,UAAU;AAC5D,QAAM,QAAQ,IAAI,qBAAqB,UAAU;AACjD,QAAM,QAAQ,IAAI,qBAAqB,aAAa;AAEpD,QAAM,cAAc,CAAC,OAAe,MAAM,KAAK,EAAE,KAAK,GAAG,YAAY,EAAE,SAAS,SAAS;AACzF,QAAM,WAAW,oBAAI,IAAoB;AACzC,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,UAAM,OAAO,MAAM,CAAC;AACpB,UAAM,KAAK,KAAK,aAAa,IAAI,KAAK;AACtC,QAAI,OAAO,KAAK,aAAa,MAAM,KAAK;AACxC,UAAM,YAAY,KAAK,aAAa,YAAY,KAAK;AACrD,QAAI,CAAC,YAAY,EAAE,KAAK,CAAC,UAAU,SAAS,KAAK,EAAG;AACpD,QAAI,CAAC,KAAK,WAAW,GAAG,KAAK,CAAC,KAAK,WAAW,WAAW,KAAK,YAAY,EAAE;AAC1E,aAAO,cAAc;AACvB,aAAS,IAAI,IAAI,IAAI;AAAA,EACvB;AAEA,MAAI,MAAM,SAAS,GAAG;AACpB,UAAM,UAAoB,CAAC;AAC3B,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,YAAM,OAAO,SAAS,IAAI,MAAM,CAAC,EAAE,aAAa,OAAO,KAAK,EAAE;AAC9D,UAAI,KAAM,SAAQ,KAAK,IAAI;AAAA,IAC7B;AACA,QAAI,QAAQ,SAAS,EAAG,QAAO;AAAA,EACjC;AACA,SAAO,MAAM,KAAK,SAAS,QAAQ,CAAC,EACjC,OAAO,CAAC,CAAC,EAAE,MAAM,YAAY,EAAE,CAAC,EAChC,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,EACvC,IAAI,CAAC,CAAC,EAAE,IAAI,MAAM,IAAI;AAC3B;AAIA,SAAS,gBAAgB,KAAwB;AAC/C,QAAM,SAAS,IAAI,UAAU;AAC7B,QAAM,MAAM,OAAO,gBAAgB,SAAS,GAAG,GAAG,UAAU;AAC5D,MAAI,CAAC,IAAI,gBAAiB,QAAO,CAAC;AAElC,QAAM,SAAoB,CAAC;AAC3B,cAAY,IAAI,iBAAiB,QAAQ,MAAM,CAAC,CAAC;AACjD,SAAO;AACT;AAEA,SAAS,YACP,MAAY,QACZ,UAA6B,YACvB;AACN,QAAM,WAAW,KAAK;AACtB,MAAI,CAAC,SAAU;AAEf,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,KAAK,SAAS,CAAC;AACrB,QAAI,GAAG,aAAa,EAAG;AAEvB,UAAM,MAAM,GAAG,WAAW,GAAG,aAAa;AAC1C,UAAM,WAAW,IAAI,QAAQ,WAAW,EAAE;AAE1C,YAAQ,UAAU;AAAA,MAChB,KAAK,OAAO;AACV,YAAI,SAAU,YAAW,KAAK,QAAQ;AACtC,cAAM,WAAuB,EAAE,MAAM,CAAC,GAAG,YAAY,CAAC,GAAG,MAAM,KAAK;AACpE,oBAAY,IAAI,QAAQ,UAAU,UAAU;AAE5C,YAAI,SAAS,KAAK,SAAS,GAAG;AAC5B,cAAI,WAAW,SAAS,GAAG;AACzB,kBAAM,cAAc,WAAW,IAAI;AACnC,kBAAM,aAAa,mBAAmB,SAAS,IAAI;AACnD,gBAAI,YAAY,MAAM;AACpB,0BAAY,KAAK,SAAS,YAAY,KAAK,OAAO,OAAO,MAAM;AAAA,YACjE;AACA,uBAAW;AAAA,UACb,OAAO;AACL,mBAAO,KAAK,EAAE,MAAM,SAAS,OAAO,WAAW,SAAS,IAAI,EAAE,CAAC;AAC/D,uBAAW;AAAA,UACb;AAAA,QACF,OAAO;AACL,qBAAW,WAAW,SAAS,IAAI,WAAW,IAAI,IAAK;AAAA,QACzD;AACA;AAAA,MACF;AAAA,MAEA,KAAK;AACH,YAAI,UAAU;AACZ,mBAAS,aAAa,CAAC;AACvB,sBAAY,IAAI,QAAQ,UAAU,UAAU;AAC5C,cAAI,SAAS,WAAW,SAAS,EAAG,UAAS,KAAK,KAAK,SAAS,UAAU;AAC1E,mBAAS,aAAa,CAAC;AAAA,QACzB;AACA;AAAA,MAEF,KAAK;AACH,YAAI,UAAU;AACZ,mBAAS,OAAO,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE;AACnD,sBAAY,IAAI,QAAQ,UAAU,UAAU;AAC5C,cAAI,SAAS,MAAM;AACjB,qBAAS,WAAW,KAAK,SAAS,IAAI;AACtC,qBAAS,OAAO;AAAA,UAClB;AAAA,QACF;AACA;AAAA,MAEF,KAAK;AACH,YAAI,UAAU,MAAM;AAClB,gBAAM,KAAK,SAAS,GAAG,aAAa,SAAS,KAAK,KAAK,EAAE;AACzD,gBAAM,KAAK,SAAS,GAAG,aAAa,SAAS,KAAK,KAAK,EAAE;AACzD,mBAAS,KAAK,UAAU,UAAU,IAAI,QAAQ;AAC9C,mBAAS,KAAK,UAAU,UAAU,IAAI,QAAQ;AAAA,QAChD;AACA;AAAA,MAEF,KAAK,KAAK;AACR,cAAM,OAAO,qBAAqB,EAAE;AACpC,YAAI,MAAM;AACR,cAAI,UAAU,MAAM;AAClB,qBAAS,KAAK,SAAS,SAAS,KAAK,OAAO,OAAO,MAAM;AAAA,UAC3D,WAAW,CAAC,UAAU;AACpB,mBAAO,KAAK,EAAE,MAAM,aAAa,KAAK,CAAC;AAAA,UACzC;AAAA,QACF;AACA,oBAAY,IAAI,QAAQ,UAAU,UAAU;AAC5C;AAAA,MACF;AAAA,MAEA;AACE,oBAAY,IAAI,QAAQ,UAAU,UAAU;AAC5C;AAAA,IACJ;AAAA,EACF;AACF;AAEA,SAAS,qBAAqB,MAAoB;AAChD,MAAI,OAAO;AACX,QAAM,OAAO,CAAC,SAAe;AAC3B,UAAM,WAAW,KAAK;AACtB,QAAI,CAAC,SAAU;AACf,aAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,YAAM,QAAQ,SAAS,CAAC;AACxB,UAAI,MAAM,aAAa,GAAG;AAAE,gBAAQ,MAAM,eAAe;AAAI;AAAA,MAAS;AACtE,UAAI,MAAM,aAAa,EAAG;AAE1B,YAAM,OAAO,MAAM,WAAW,MAAM,aAAa,IAAI,QAAQ,WAAW,EAAE;AAC1E,cAAQ,KAAK;AAAA,QACX,KAAK;AAAK,kBAAQ,MAAM,eAAe;AAAI;AAAA,QAC3C,KAAK;AAAO,kBAAQ;AAAM;AAAA,QAC1B,KAAK;AACH,eAAK,MAAM,aAAa,MAAM,KAAK,YAAY,OAAQ,SAAQ;AAC/D;AAAA,QACF,KAAK;AAAA,QAAW,KAAK;AAAW,kBAAQ;AAAK;AAAA,QAC7C,KAAK;AAAO;AAAA;AAAA,QACZ;AAAS,eAAK,KAAK;AAAG;AAAA,MACxB;AAAA,IACF;AAAA,EACF;AACA,OAAK,IAAI;AACT,SAAO,KAAK,QAAQ,WAAW,GAAG,EAAE,KAAK;AAC3C;;;AExSA,SAAS,kBAAAA,iBAAgB,mBAAmB;AAIrC,IAAM,kBAAkB;AACxB,IAAM,gBAAgB;AACtB,IAAM,kBAAkB;AACxB,IAAM,kBAAkB;AACxB,IAAM,YAAY;AAIzB,IAAM,YAAY;AAClB,IAAM,YAAY;AAClB,IAAM,WAAW;AACjB,IAAM,cAAc;AACpB,IAAM,YAAY;AAClB,IAAM,kBAAkB;AAGjB,IAAM,kBAAkB,KAAK;AAC7B,IAAM,iBAAiB,KAAK;AAC5B,IAAM,WAAW,KAAK;AAoB7B,IAAM,cAAc;AAEb,SAAS,YAAY,MAA2B;AACrD,QAAM,UAAuB,CAAC;AAC9B,MAAI,SAAS;AAEb,SAAO,SAAS,KAAK,KAAK,UAAU,QAAQ,SAAS,aAAa;AAChE,UAAM,SAAS,KAAK,aAAa,MAAM;AACvC,cAAU;AAEV,UAAM,QAAQ,SAAS;AACvB,UAAM,QAAS,UAAU,KAAM;AAC/B,QAAI,OAAQ,UAAU,KAAM;AAG5B,QAAI,SAAS,MAAO;AAClB,UAAI,SAAS,IAAI,KAAK,OAAQ;AAC9B,aAAO,KAAK,aAAa,MAAM;AAC/B,gBAAU;AAAA,IACZ;AAEA,QAAI,SAAS,OAAO,KAAK,OAAQ;AACjC,YAAQ,KAAK,EAAE,OAAO,OAAO,MAAM,MAAM,KAAK,SAAS,QAAQ,SAAS,IAAI,EAAE,CAAC;AAC/E,cAAU;AAAA,EACZ;AAEA,SAAO;AACT;AAKA,IAAMC,uBAAsB,MAAM,OAAO;AAElC,SAAS,iBAAiB,MAAsB;AACrD,QAAM,OAAO,EAAE,iBAAiBA,qBAAoB;AACpD,MAAI,KAAK,UAAU,KAAK,KAAK,CAAC,MAAM,KAAM;AACxC,QAAI;AAAE,aAAO,YAAY,MAAM,IAAI;AAAA,IAAE,QAAQ;AAAA,IAAwB;AAAA,EACvE;AACA,SAAOD,gBAAe,MAAM,IAAI;AAClC;AAIO,SAAS,gBAAgB,MAA6B;AAC3D,MAAI,KAAK,SAAS,GAAI,OAAM,IAAI,MAAM,4FAAgC;AACtE,QAAM,MAAM,KAAK,SAAS,GAAG,EAAE,EAAE,SAAS,MAAM,EAAE,QAAQ,QAAQ,EAAE;AACpE,SAAO;AAAA,IACL,WAAW;AAAA,IACX,cAAc,KAAK,EAAE;AAAA,IACrB,OAAO,KAAK,aAAa,EAAE;AAAA,EAC7B;AACF;AAIO,SAAS,YAAY,MAAsB;AAChD,MAAI,SAAS;AACb,MAAI,IAAI;AAER,SAAO,IAAI,IAAI,KAAK,QAAQ;AAC1B,UAAM,KAAK,KAAK,aAAa,CAAC;AAC9B,SAAK;AAEL,YAAQ,IAAI;AAAA,MACV,KAAK;AAAW,kBAAU;AAAM;AAAA,MAChC,KAAK;AAAW;AAAA,MAChB,KAAK;AAAU,kBAAU;AAAM;AAAA,MAC/B,KAAK;AAAa,kBAAU;AAAK;AAAA,MACjC,KAAK;AAAA,MAAW,KAAK;AAAiB,kBAAU;AAAK;AAAA,MACrD;AACE,YAAI,MAAM,KAAU,MAAM,IAAQ;AAChC,gBAAM,QAAS,MAAM,KAAK,MAAM,KAAO,MAAM,MAAM,MAAM,MAAQ,MAAM,MAAM,MAAM;AACnF,gBAAM,WAAY,MAAM,KAAK,MAAM,KAAO,MAAM,MAAM,MAAM;AAC5D,eAAK,SAAS,aAAa,IAAI,MAAM,KAAK,OAAQ,MAAK;AAAA,QACzD,WAAW,MAAM,IAAQ;AAEvB,cAAI,MAAM,SAAU,MAAM,SAAU,IAAI,IAAI,KAAK,QAAQ;AACvD,kBAAM,KAAK,KAAK,aAAa,CAAC;AAC9B,gBAAI,MAAM,SAAU,MAAM,OAAQ;AAChC,mBAAK;AACL,oBAAM,aAAc,KAAK,SAAW,OAAO,KAAK,SAAU;AAC1D,wBAAU,OAAO,cAAc,SAAS;AACxC;AAAA,YACF;AAAA,UACF;AACA,oBAAU,OAAO,aAAa,EAAE;AAAA,QAClC;AACA;AAAA,IACJ;AAAA,EACF;AAEA,SAAO;AACT;;;AC9HA,SAAS,qBAAqB;AAC9B,IAAME,WAAU,cAAc,YAAY,GAAG;AAC7C,IAAM,MAAiBA,SAAQ,KAAK;AAUpC,IAAM,eAAe;AAErB,IAAM,uBAAuB,MAAM,OAAO;AAEnC,SAAS,kBAAkB,QAAwB;AACxD,QAAM,MAAM,IAAI,MAAM,MAAM;AAE5B,QAAM,cAAc,IAAI,KAAK,KAAK,aAAa;AAC/C,MAAI,CAAC,aAAa,QAAS,OAAM,IAAI,MAAM,4CAAmB;AAC9D,QAAM,SAAS,gBAAgB,OAAO,KAAK,YAAY,OAAO,CAAC;AAC/D,MAAI,OAAO,cAAc,oBAAqB,OAAM,IAAI,MAAM,iDAAc;AAC5E,MAAI,OAAO,QAAQ,eAAgB,OAAM,IAAI,MAAM,sFAAqB;AACxE,MAAI,OAAO,QAAQ,SAAU,OAAM,IAAI,MAAM,oFAAwB;AACrE,QAAM,cAAc,OAAO,QAAQ,qBAAqB;AAExD,QAAM,WAAW,aAAa,GAAG;AACjC,MAAI,SAAS,WAAW,EAAG,OAAM,IAAI,MAAM,oFAAmB;AAE9D,QAAM,SAAoB,CAAC;AAC3B,MAAI,oBAAoB;AACxB,aAAW,eAAe,UAAU;AAClC,UAAM,OAAO,aAAa,iBAAiB,OAAO,KAAK,WAAW,CAAC,IAAI,OAAO,KAAK,WAAW;AAC9F,yBAAqB,KAAK;AAC1B,QAAI,oBAAoB,qBAAsB,OAAM,IAAI,MAAM,8FAAuC;AACrG,UAAM,UAAU,YAAY,IAAI;AAChC,WAAO,KAAK,GAAG,aAAa,OAAO,CAAC;AAAA,EACtC;AAEA,SAAO,iBAAiB,MAAM;AAChC;AAEA,SAAS,aAAa,KAA6B;AACjD,QAAM,WAAoD,CAAC;AAE3D,WAAS,IAAI,GAAG,IAAI,cAAc,KAAK;AACrC,UAAM,QAAQ,IAAI,KAAK,KAAK,oBAAoB,CAAC,EAAE;AACnD,QAAI,CAAC,OAAO,QAAS;AACrB,aAAS,KAAK,EAAE,KAAK,GAAG,SAAS,OAAO,KAAK,MAAM,OAAO,EAAE,CAAC;AAAA,EAC/D;AAEA,MAAI,SAAS,WAAW,KAAK,IAAI,WAAW;AAC1C,eAAW,SAAS,IAAI,WAAW;AACjC,UAAI,SAAS,UAAU,aAAc;AACrC,UAAI,MAAM,MAAM,WAAW,SAAS,KAAK,MAAM,SAAS;AACtD,cAAM,MAAM,SAAS,MAAM,KAAK,QAAQ,WAAW,EAAE,GAAG,EAAE,KAAK;AAC/D,iBAAS,KAAK,EAAE,KAAK,SAAS,OAAO,KAAK,MAAM,OAAO,EAAE,CAAC;AAAA,MAC5D;AAAA,IACF;AAAA,EACF;AAEA,SAAO,SAAS,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,OAAK,EAAE,OAAO;AAClE;AAEA,SAAS,aAAa,SAAiC;AACrD,QAAM,SAAoB,CAAC;AAC3B,MAAI,IAAI;AAER,SAAO,IAAI,QAAQ,QAAQ;AACzB,UAAM,MAAM,QAAQ,CAAC;AAErB,QAAI,IAAI,UAAU,mBAAmB,IAAI,UAAU,GAAG;AACpD,YAAM,EAAE,WAAW,QAAQ,QAAQ,IAAI,yBAAyB,SAAS,CAAC;AAC1E,UAAI,UAAW,QAAO,KAAK,EAAE,MAAM,aAAa,MAAM,UAAU,CAAC;AACjE,iBAAW,KAAK,OAAQ,QAAO,KAAK,EAAE,MAAM,SAAS,OAAO,EAAE,CAAC;AAC/D,UAAI;AACJ;AAAA,IACF;AAEA,QAAI,IAAI,UAAU,mBAAmB,IAAI,SAAS,KAAK,IAAI,KAAK,UAAU,GAAG;AAC3E,YAAM,SAAS,IAAI,KAAK,SAAS,GAAG,CAAC,EAAE,SAAS,OAAO;AACvD,UAAI,WAAW,UAAU,WAAW,QAAQ;AAC1C,cAAM,EAAE,OAAO,QAAQ,IAAI,gBAAgB,SAAS,CAAC;AACrD,YAAI,MAAO,QAAO,KAAK,EAAE,MAAM,SAAS,MAAM,CAAC;AAC/C,YAAI;AACJ;AAAA,MACF;AAAA,IACF;AAEA;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,yBAAyB,SAAsB,UAAkB;AACxE,QAAM,aAAa,QAAQ,QAAQ,EAAE;AACrC,MAAI,OAAO;AACX,QAAM,SAA0C,CAAC;AACjD,MAAI,IAAI,WAAW;AAEnB,SAAO,IAAI,QAAQ,QAAQ;AACzB,UAAM,MAAM,QAAQ,CAAC;AACrB,QAAI,IAAI,UAAU,mBAAmB,IAAI,SAAS,WAAY;AAE9D,QAAI,IAAI,UAAU,eAAe;AAC/B,aAAO,YAAY,IAAI,IAAI;AAAA,IAC7B;AAEA,QAAI,IAAI,UAAU,mBAAmB,IAAI,KAAK,UAAU,GAAG;AACzD,YAAM,SAAS,IAAI,KAAK,SAAS,GAAG,CAAC,EAAE,SAAS,OAAO;AACvD,UAAI,WAAW,UAAU,WAAW,QAAQ;AAC1C,cAAM,EAAE,OAAO,QAAQ,IAAI,gBAAgB,SAAS,CAAC;AACrD,YAAI,MAAO,QAAO,KAAK,KAAK;AAC5B,YAAI;AACJ;AAAA,MACF;AAAA,IACF;AACA;AAAA,EACF;AAEA,QAAM,UAAU,KAAK,KAAK;AAC1B,SAAO,EAAE,WAAW,WAAW,MAAM,QAAQ,SAAS,EAAE;AAC1D;AAEA,SAAS,gBAAgB,SAAsB,UAAkB;AAC/D,QAAM,aAAa,QAAQ,QAAQ,EAAE;AACrC,MAAI,IAAI,WAAW;AACnB,MAAI,OAAO,GAAG,OAAO;AACrB,QAAM,QAAuB,CAAC;AAE9B,SAAO,IAAI,QAAQ,QAAQ;AACzB,UAAM,MAAM,QAAQ,CAAC;AACrB,QAAI,IAAI,UAAU,mBAAmB,IAAI,SAAS,WAAY;AAC9D,QAAI,IAAI,UAAU,mBAAmB,IAAI,SAAS,WAAY;AAE9D,QAAI,IAAI,UAAU,aAAa,IAAI,KAAK,UAAU,GAAG;AACnD,aAAO,KAAK,IAAI,IAAI,KAAK,aAAa,CAAC,GAAG,QAAQ;AAClD,aAAO,KAAK,IAAI,IAAI,KAAK,aAAa,CAAC,GAAG,QAAQ;AAAA,IACpD;AAEA,QAAI,IAAI,UAAU,iBAAiB;AACjC,YAAM,EAAE,MAAM,QAAQ,IAAI,eAAe,SAAS,GAAG,UAAU;AAC/D,UAAI,KAAM,OAAM,KAAK,IAAI;AACzB,UAAI;AACJ;AAAA,IACF;AACA;AAAA,EACF;AAEA,MAAI,SAAS,KAAK,SAAS,KAAK,MAAM,WAAW,EAAG,QAAO,EAAE,OAAO,MAAM,SAAS,EAAE;AAErF,QAAM,WAAW,aAAa,MAAM,MAAM,KAAK;AAC/C,SAAO,EAAE,OAAO,WAAW,QAAQ,GAAG,SAAS,EAAE;AACnD;AAEA,SAAS,eAAe,SAAsB,UAAkB,YAAoB;AAClF,QAAM,MAAM,QAAQ,QAAQ;AAC5B,QAAM,YAAY,IAAI;AACtB,QAAM,QAAkB,CAAC;AAIzB,MAAI,UAAU;AACd,MAAI,UAAU;AACd,MAAI,IAAI,KAAK,UAAU,IAAI;AACzB,UAAM,KAAK,IAAI,KAAK,aAAa,EAAE;AACnC,UAAM,KAAK,IAAI,KAAK,aAAa,EAAE;AACnC,QAAI,KAAK,EAAG,WAAU,KAAK,IAAI,IAAI,QAAQ;AAC3C,QAAI,KAAK,EAAG,WAAU,KAAK,IAAI,IAAI,QAAQ;AAAA,EAC7C;AAEA,MAAI,IAAI,WAAW;AAEnB,SAAO,IAAI,QAAQ,QAAQ;AACzB,UAAM,IAAI,QAAQ,CAAC;AACnB,QAAI,EAAE,UAAU,mBAAmB,EAAE,SAAS,UAAW;AACzD,QAAI,EAAE,SAAS,eAAe,EAAE,UAAU,mBAAmB,EAAE,UAAU,iBAAkB;AAE3F,QAAI,EAAE,UAAU,eAAe;AAC7B,YAAM,IAAI,YAAY,EAAE,IAAI,EAAE,KAAK;AACnC,UAAI,EAAG,OAAM,KAAK,CAAC;AAAA,IACrB;AACA;AAAA,EACF;AAEA,SAAO,EAAE,MAAM,EAAE,MAAM,MAAM,KAAK,IAAI,GAAG,SAAS,QAAQ,GAAkB,SAAS,EAAE;AACzF;AAEA,SAAS,aAAa,MAAc,MAAc,OAAuC;AACvF,QAAM,OAAiC,MAAM,KAAK,EAAE,QAAQ,KAAK,GAAG,MAAM,MAAM,IAAI,EAAE,KAAK,IAAI,CAAC;AAChG,MAAI,UAAU;AAEd,WAAS,IAAI,GAAG,IAAI,QAAQ,UAAU,MAAM,QAAQ,KAAK;AACvD,aAAS,IAAI,GAAG,IAAI,QAAQ,UAAU,MAAM,QAAQ,KAAK;AACvD,UAAI,KAAK,CAAC,EAAE,CAAC,MAAM,KAAM;AACzB,YAAM,OAAO,MAAM,SAAS;AAC5B,WAAK,CAAC,EAAE,CAAC,IAAI;AAEb,eAAS,KAAK,GAAG,KAAK,KAAK,SAAS,MAAM;AACxC,iBAAS,KAAK,GAAG,KAAK,KAAK,SAAS,MAAM;AACxC,cAAI,OAAO,KAAK,OAAO,EAAG;AAC1B,cAAI,IAAI,KAAK,QAAQ,IAAI,KAAK;AAC5B,iBAAK,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE;AAAA,QAC9D;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO,KAAK,IAAI,SAAO,IAAI,IAAI,OAAK,KAAK,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE,CAAC,CAAC;AAChF;;;ACrNA,SAAS,iBAAAC,sBAAqB;AAC9B,SAAS,qBAAqB;AAL9B,IAAM,YAAY;AAElB,IAAM,iBAAiB,MAAM,OAAO;AAyBpC,IAAI,cAAkC;AAEtC,eAAe,YAAyC;AACtD,MAAI,YAAa,QAAO;AACxB,MAAI;AACF,UAAM,MAAM,MAAM,OAAO,iCAAiC;AAE1D,UAAM,MAAMA,eAAc,YAAY,GAAG;AACzC,UAAM,aAAa,IAAI,QAAQ,wCAAwC;AACvE,QAAI,oBAAoB,YAAY,cAAc,UAAU,EAAE;AAC9D,kBAAc;AACd,WAAO;AAAA,EACT,SAAS,KAAK;AAEZ,UAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,QAAI,IAAI,SAAS,aAAa,KAAK,IAAI,SAAS,kBAAkB,GAAG;AACnE,aAAO;AAAA,IACT;AACA,UAAM,IAAI,MAAM,yCAAqB,GAAG,EAAE;AAAA,EAC5C;AACF;AAEA,eAAsB,iBAAiB,QAA2C;AAChF,QAAM,QAAQ,MAAM,UAAU;AAC9B,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,MACL,SAAS;AAAA,MACT,UAAU;AAAA,MACV,WAAW;AAAA,MACX,OAAO;AAAA,IACT;AAAA,EACF;AAEA,QAAM,OAAO,IAAI,WAAW,MAAM;AAClC,QAAM,MAAM,MAAM,MAAM,YAAY;AAAA,IAClC;AAAA,IACA,gBAAgB;AAAA,IAChB,iBAAiB;AAAA,IACjB,iBAAiB;AAAA,EACnB,CAAC,EAAE;AAEH,MAAI;AACF,UAAM,YAAY,IAAI;AACtB,QAAI,cAAc,GAAG;AACnB,aAAO,EAAE,SAAS,OAAO,UAAU,OAAO,WAAW,GAAG,OAAO,+DAAkB;AAAA,IACnF;AAEA,UAAM,YAAsB,CAAC;AAC7B,QAAI,aAAa;AACjB,QAAI,iBAAiB;AACrB,UAAM,qBAAqB,KAAK,IAAI,WAAW,SAAS;AAExD,aAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,YAAM,OAAO,MAAM,IAAI,QAAQ,CAAC;AAChC,YAAM,cAAc,MAAM,KAAK,eAAe;AAC9C,YAAM,QAAQ,qBAAqB,YAAY,KAAK;AACpD,YAAM,WAAW,MAAM,KAAK,IAAI;AAChC,oBAAc,SAAS,QAAQ,OAAO,EAAE,EAAE;AAC1C,wBAAkB,SAAS,SAAS;AACpC,UAAI,iBAAiB,eAAgB,OAAM,IAAI,MAAM,8DAAiB,iBAAiB,OAAO,IAAI,kBAAQ;AAC1G,gBAAU,KAAK,QAAQ;AAAA,IACzB;AAEA,UAAM,kBAAkB,aAAa;AACrC,QAAI,kBAAkB,IAAI;AACxB,aAAO;AAAA,QACL,SAAS;AAAA,QACT,UAAU;AAAA,QACV;AAAA,QACA,cAAc;AAAA,QACd,OAAO,6EAAsB,SAAS,uDAAe,UAAU;AAAA,MACjE;AAAA,IACF;AAEA,QAAI,WAAW;AACf,aAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,YAAM,UAAU,aAAa,UAAU,CAAC,CAAC;AACzC,UAAI,QAAQ,KAAK,GAAG;AAClB,YAAI,IAAI,KAAK,SAAU,aAAY;AACnC,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,kBAAkB,QAAQ;AAErC,UAAM,YAAY,YAAY;AAC9B,WAAO,EAAE,SAAS,MAAM,UAAU,OAAO,UAAU,WAAW,oBAAoB,cAAc,OAAO,GAAI,aAAa,EAAE,SAAS,aAAQ,SAAS,wCAAU,SAAS,4EAAgB,EAAG;AAAA,EAC5L,UAAE;AACA,UAAM,IAAI,QAAQ,EAAE,MAAM,MAAM;AAAA,IAAC,CAAC;AAAA,EACpC;AACF;AAIA,SAAS,qBAAqB,OAAkC;AAC9D,MAAI,MAAM,WAAW,EAAG,QAAO,CAAC;AAEhC,QAAM,YAAY,MAAM,OAAO,UAAQ,OAAO,KAAK,QAAQ,YAAY,KAAK,IAAI,KAAK,MAAM,EAAE;AAC7F,MAAI,UAAU,WAAW,EAAG,QAAO,CAAC;AAEpC,YAAU,KAAK,CAAC,GAAG,MAAM;AACvB,UAAM,QAAQ,EAAE,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC;AAC5C,QAAI,KAAK,IAAI,KAAK,IAAI,EAAG,QAAO,EAAE,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC;AAC9D,WAAO;AAAA,EACT,CAAC;AAED,QAAM,QAAkB,CAAC;AACzB,MAAI,WAAW,UAAU,CAAC,EAAE,UAAU,CAAC;AACvC,MAAI,cAA4D,CAAC;AAEjE,aAAW,QAAQ,WAAW;AAC5B,UAAM,IAAI,KAAK,UAAU,CAAC;AAE1B,QAAI,KAAK,IAAI,WAAW,CAAC,IAAI,KAAK,IAAI,KAAK,SAAS,KAAK,CAAC,GAAG;AAC3D,UAAI,YAAY,SAAS,EAAG,OAAM,KAAK,eAAe,WAAW,CAAC;AAClE,oBAAc,CAAC;AACf,iBAAW;AAAA,IACb;AAEA,gBAAY,KAAK,EAAE,MAAM,KAAK,KAAK,GAAG,KAAK,UAAU,CAAC,GAAG,OAAO,KAAK,MAAM,CAAC;AAAA,EAC9E;AAEA,MAAI,YAAY,SAAS,EAAG,OAAM,KAAK,eAAe,WAAW,CAAC;AAClE,SAAO;AACT;AAEA,SAAS,eAAe,OAA6D;AACnF,MAAI,MAAM,UAAU,EAAG,QAAO,MAAM,CAAC,GAAG,QAAQ;AAChD,QAAM,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AAE9B,MAAI,SAAS,MAAM,CAAC,EAAE;AACtB,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,UAAM,MAAM,MAAM,CAAC,EAAE,KAAK,MAAM,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,CAAC,EAAE;AACxD,QAAI,MAAM,GAAI,WAAU;AAAA,aACf,MAAM,EAAG,WAAU;AAC5B,cAAU,MAAM,CAAC,EAAE;AAAA,EACrB;AACA,SAAO;AACT;AAEO,SAAS,aAAa,MAAsB;AACjD,SAAO,KACJ,QAAQ,qCAAqC,EAAE,EAC/C,QAAQ,4BAA4B,EAAE,EACtC,QAAQ,0BAA0B,OAAO,EACzC,QAAQ,WAAW,MAAM,EACzB,KAAK;AACV;AAEA,SAAS,kBAAkB,MAAsB;AAC/C,QAAM,QAAQ,KAAK,MAAM,IAAI;AAC7B,QAAM,SAAmB,CAAC;AAC1B,MAAI,cAA0B,CAAC;AAE/B,aAAW,QAAQ,OAAO;AACxB,QAAI,KAAK,SAAS,GAAI,GAAG;AACvB,kBAAY,KAAK,KAAK,MAAM,GAAI,EAAE,IAAI,OAAK,EAAE,KAAK,CAAC,CAAC;AAAA,IACtD,OAAO;AACL,UAAI,YAAY,UAAU,EAAG,QAAO,KAAK,sBAAsB,WAAW,CAAC;AAAA,eAClE,YAAY,WAAW,EAAG,QAAO,KAAK,YAAY,CAAC,EAAE,KAAK,KAAK,CAAC;AACzE,oBAAc,CAAC;AACf,aAAO,KAAK,IAAI;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,YAAY,UAAU,EAAG,QAAO,KAAK,sBAAsB,WAAW,CAAC;AAAA,WAClE,YAAY,WAAW,EAAG,QAAO,KAAK,YAAY,CAAC,EAAE,KAAK,KAAK,CAAC;AAEzE,SAAO,OAAO,KAAK,IAAI;AACzB;AAEA,SAAS,sBAAsB,MAA0B;AACvD,QAAM,UAAU,KAAK,IAAI,GAAG,KAAK,IAAI,OAAK,EAAE,MAAM,CAAC;AAEnD,QAAM,aAAa,KAAK,IAAI,OAAK;AAC/B,UAAM,OAAO,CAAC,GAAG,CAAC;AAClB,WAAO,KAAK,SAAS,QAAS,MAAK,KAAK,EAAE;AAC1C,WAAO;AAAA,EACT,CAAC;AAED,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,OAAO,WAAW,CAAC,EAAE,KAAK,KAAK,IAAI,IAAI;AAClD,QAAM,KAAK,OAAO,WAAW,CAAC,EAAE,IAAI,MAAM,KAAK,EAAE,KAAK,KAAK,IAAI,IAAI;AACnE,WAAS,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;AAC1C,UAAM,KAAK,OAAO,WAAW,CAAC,EAAE,KAAK,KAAK,IAAI,IAAI;AAAA,EACpD;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;;;ACnMA,eAAsB,MAAM,QAA2C;AACrE,MAAI,CAAC,UAAU,OAAO,eAAe,GAAG;AACtC,WAAO,EAAE,SAAS,OAAO,UAAU,WAAW,OAAO,8GAAyB;AAAA,EAChF;AACA,QAAM,SAAS,aAAa,MAAM;AAElC,UAAQ,QAAQ;AAAA,IACd,KAAK;AACH,aAAO,UAAU,MAAM;AAAA,IACzB,KAAK;AACH,aAAO,SAAS,MAAM;AAAA,IACxB,KAAK;AACH,aAAO,SAAS,MAAM;AAAA,IACxB;AACE,aAAO,EAAE,SAAS,OAAO,UAAU,WAAW,OAAO,qFAAoB;AAAA,EAC7E;AACF;AAKA,eAAsB,UAAU,QAA2C;AACzE,MAAI;AACF,UAAM,WAAW,MAAM,kBAAkB,MAAM;AAC/C,WAAO,EAAE,SAAS,MAAM,UAAU,QAAQ,SAAS;AAAA,EACrD,SAAS,KAAK;AACZ,WAAO,EAAE,SAAS,OAAO,UAAU,QAAQ,OAAO,eAAe,QAAQ,IAAI,UAAU,iCAAa;AAAA,EACtG;AACF;AAGA,eAAsB,SAAS,QAA2C;AACxE,MAAI;AACF,UAAM,WAAW,kBAAkB,OAAO,KAAK,MAAM,CAAC;AACtD,WAAO,EAAE,SAAS,MAAM,UAAU,OAAO,SAAS;AAAA,EACpD,SAAS,KAAK;AACZ,WAAO,EAAE,SAAS,OAAO,UAAU,OAAO,OAAO,eAAe,QAAQ,IAAI,UAAU,gCAAY;AAAA,EACpG;AACF;AAGA,eAAsB,SAAS,QAA2C;AACxE,MAAI;AACF,WAAO,MAAM,iBAAiB,MAAM;AAAA,EACtC,SAAS,KAAK;AACZ,WAAO,EAAE,SAAS,OAAO,UAAU,OAAO,OAAO,eAAe,QAAQ,IAAI,UAAU,gCAAY;AAAA,EACpG;AACF;","names":["inflateRawSync","MAX_DECOMPRESS_SIZE","require","createRequire"]}
|
package/dist/cli.js
CHANGED
|
@@ -4,7 +4,7 @@ import {
|
|
|
4
4
|
detectFormat,
|
|
5
5
|
parse,
|
|
6
6
|
toArrayBuffer
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-6FDCSJYE.js";
|
|
8
8
|
|
|
9
9
|
// src/cli.ts
|
|
10
10
|
import { readFileSync, writeFileSync, mkdirSync, statSync } from "fs";
|
|
@@ -65,3 +65,4 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
|
|
|
65
65
|
}
|
|
66
66
|
});
|
|
67
67
|
program.parse();
|
|
68
|
+
//# sourceMappingURL=cli.js.map
|
package/dist/cli.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\n\nimport { readFileSync, writeFileSync, mkdirSync, statSync } from \"fs\"\nimport { basename, resolve } from \"path\"\nimport { Command } from \"commander\"\nimport { parse, detectFormat } from \"./index.js\"\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\n\nconst program = new Command()\n\nprogram\n .name(\"kordoc\")\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF → Markdown\")\n .version(VERSION)\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF)\")\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (files: string[], opts) => {\n for (const filePath of files) {\n const absPath = resolve(filePath)\n const fileName = basename(absPath)\n\n try {\n const fileSize = statSync(absPath).size\n if (fileSize > 500 * 1024 * 1024) {\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\n process.exitCode = 1\n continue\n }\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const format = detectFormat(arrayBuffer)\n\n if (!opts.silent) {\n process.stderr.write(`[kordoc] ${fileName} (${format}) ...`)\n }\n\n const result = await parse(arrayBuffer)\n\n if (!result.success) {\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${result.error}\\n`)\n process.exitCode = 1\n continue\n }\n\n if (!opts.silent) process.stderr.write(` OK\\n`)\n\n const output = opts.format === \"json\"\n ? JSON.stringify(result, null, 2)\n : result.markdown\n\n if (opts.output && files.length === 1) {\n writeFileSync(opts.output, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\n } else if (opts.outDir) {\n mkdirSync(opts.outDir, { recursive: true })\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\n } else {\n process.stdout.write(output + \"\\n\")\n }\n } catch (err) {\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${err instanceof Error ? err.message : err}\\n`)\n process.exitCode = 1\n }\n }\n })\n\nprogram.parse()\n"],"mappings":";;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,gBAAgB;AACjE,SAAS,UAAU,eAAe;AAClC,SAAS,eAAe;AAIxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,QAAQ,EACb,YAAY,+FAAwC,EACpD,QAAQ,OAAO,EACf,SAAS,cAAc,+DAA4B,EACnD,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAiB,SAAS;AACvC,aAAW,YAAY,OAAO;AAC5B,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AAEjC,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC7D;AAEA,YAAM,SAAS,MAAM,MAAM,WAAW;AAEtC,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAE9C,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B,OAAO;AAEX,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAAA,MAC/D,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AAAA,MAC3D,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAAA,IACF,SAAS,KAAK;AACZ,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,eAAe,QAAQ,IAAI,UAAU,GAAG;AAAA,CAAI;AACpG,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF,CAAC;AAEH,QAAQ,MAAM;","names":[]}
|
package/dist/index.cjs
CHANGED
|
@@ -62,6 +62,7 @@ function isPdfFile(buffer) {
|
|
|
62
62
|
return b[0] === 37 && b[1] === 80 && b[2] === 68 && b[3] === 70;
|
|
63
63
|
}
|
|
64
64
|
function detectFormat(buffer) {
|
|
65
|
+
if (buffer.byteLength < 4) return "unknown";
|
|
65
66
|
if (isHwpxFile(buffer)) return "hwpx";
|
|
66
67
|
if (isOldHwpFile(buffer)) return "hwp";
|
|
67
68
|
if (isPdfFile(buffer)) return "pdf";
|
|
@@ -218,6 +219,11 @@ async function parseHwpxDocument(buffer) {
|
|
|
218
219
|
} catch {
|
|
219
220
|
return extractFromBrokenZip(buffer);
|
|
220
221
|
}
|
|
222
|
+
let declaredTotal = 0;
|
|
223
|
+
zip.forEach((_, file) => {
|
|
224
|
+
declaredTotal += file._data?.uncompressedSize ?? 0;
|
|
225
|
+
});
|
|
226
|
+
if (declaredTotal > MAX_DECOMPRESS_SIZE) throw new Error("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
|
|
221
227
|
const sectionPaths = await resolveSectionPaths(zip);
|
|
222
228
|
if (sectionPaths.length === 0) throw new Error("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
223
229
|
let totalDecompressed = 0;
|
|
@@ -258,7 +264,8 @@ function extractFromBrokenZip(buffer) {
|
|
|
258
264
|
}
|
|
259
265
|
const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
|
|
260
266
|
const name = new TextDecoder().decode(nameBytes);
|
|
261
|
-
|
|
267
|
+
const normalizedName = name.replace(/\\/g, "/");
|
|
268
|
+
if (normalizedName.includes("..") || normalizedName.startsWith("/") || /^[A-Za-z]:/.test(normalizedName)) {
|
|
262
269
|
pos = fileStart + compSize;
|
|
263
270
|
continue;
|
|
264
271
|
}
|
|
@@ -289,7 +296,8 @@ function extractFromBrokenZip(buffer) {
|
|
|
289
296
|
async function resolveSectionPaths(zip) {
|
|
290
297
|
const manifestPaths = ["Contents/content.hpf", "content.hpf"];
|
|
291
298
|
for (const mp of manifestPaths) {
|
|
292
|
-
const
|
|
299
|
+
const mpLower = mp.toLowerCase();
|
|
300
|
+
const file = zip.file(mp) || Object.values(zip.files).find((f) => f.name.toLowerCase() === mpLower) || null;
|
|
293
301
|
if (!file) continue;
|
|
294
302
|
const xml = await file.async("text");
|
|
295
303
|
const paths = parseSectionPathsFromManifest(xml);
|
|
@@ -463,10 +471,11 @@ var CHAR_FIXED_NBSP = 24;
|
|
|
463
471
|
var FLAG_COMPRESSED = 1 << 0;
|
|
464
472
|
var FLAG_ENCRYPTED = 1 << 1;
|
|
465
473
|
var FLAG_DRM = 1 << 4;
|
|
474
|
+
var MAX_RECORDS = 5e5;
|
|
466
475
|
function readRecords(data) {
|
|
467
476
|
const records = [];
|
|
468
477
|
let offset = 0;
|
|
469
|
-
while (offset + 4 <= data.length) {
|
|
478
|
+
while (offset + 4 <= data.length && records.length < MAX_RECORDS) {
|
|
470
479
|
const header = data.readUInt32LE(offset);
|
|
471
480
|
offset += 4;
|
|
472
481
|
const tagId = header & 1023;
|
|
@@ -527,7 +536,7 @@ function extractText(data) {
|
|
|
527
536
|
break;
|
|
528
537
|
default:
|
|
529
538
|
if (ch >= 1 && ch <= 31) {
|
|
530
|
-
const isExt = ch >= 1 && ch <= 3 || ch >=
|
|
539
|
+
const isExt = ch >= 1 && ch <= 3 || ch >= 10 && ch <= 18 || ch >= 21 && ch <= 23;
|
|
531
540
|
const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
|
|
532
541
|
if ((isExt || isInline) && i + 14 <= data.length) i += 14;
|
|
533
542
|
} else if (ch >= 32) {
|
|
@@ -586,6 +595,7 @@ function findSections(cfb) {
|
|
|
586
595
|
}
|
|
587
596
|
if (sections.length === 0 && cfb.FileIndex) {
|
|
588
597
|
for (const entry of cfb.FileIndex) {
|
|
598
|
+
if (sections.length >= MAX_SECTIONS) break;
|
|
589
599
|
if (entry.name?.startsWith("Section") && entry.content) {
|
|
590
600
|
const idx = parseInt(entry.name.replace("Section", ""), 10) || 0;
|
|
591
601
|
sections.push({ idx, content: Buffer.from(entry.content) });
|
|
@@ -654,8 +664,8 @@ function parseTableBlock(records, startIdx) {
|
|
|
654
664
|
if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
|
|
655
665
|
if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
|
|
656
666
|
if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
|
|
657
|
-
rows = rec.data.readUInt16LE(4);
|
|
658
|
-
cols = rec.data.readUInt16LE(6);
|
|
667
|
+
rows = Math.min(rec.data.readUInt16LE(4), MAX_ROWS);
|
|
668
|
+
cols = Math.min(rec.data.readUInt16LE(6), MAX_COLS);
|
|
659
669
|
}
|
|
660
670
|
if (rec.tagId === TAG_LIST_HEADER) {
|
|
661
671
|
const { cell, nextIdx } = parseCellBlock(records, i, tableLevel);
|
|
@@ -718,6 +728,8 @@ function arrangeCells(rows, cols, cells) {
|
|
|
718
728
|
var import_module2 = require("module");
|
|
719
729
|
var import_url = require("url");
|
|
720
730
|
var import_meta2 = {};
|
|
731
|
+
var MAX_PAGES = 5e3;
|
|
732
|
+
var MAX_TOTAL_TEXT = 100 * 1024 * 1024;
|
|
721
733
|
var pdfjsModule = null;
|
|
722
734
|
async function loadPdfjs() {
|
|
723
735
|
if (pdfjsModule) return pdfjsModule;
|
|
@@ -760,15 +772,19 @@ async function parsePdfDocument(buffer) {
|
|
|
760
772
|
}
|
|
761
773
|
const pageTexts = [];
|
|
762
774
|
let totalChars = 0;
|
|
763
|
-
|
|
775
|
+
let totalTextBytes = 0;
|
|
776
|
+
const effectivePageCount = Math.min(pageCount, MAX_PAGES);
|
|
777
|
+
for (let i = 1; i <= effectivePageCount; i++) {
|
|
764
778
|
const page = await doc.getPage(i);
|
|
765
779
|
const textContent = await page.getTextContent();
|
|
766
780
|
const lines = groupTextItemsByLine(textContent.items);
|
|
767
781
|
const pageText = lines.join("\n");
|
|
768
782
|
totalChars += pageText.replace(/\s/g, "").length;
|
|
783
|
+
totalTextBytes += pageText.length * 2;
|
|
784
|
+
if (totalTextBytes > MAX_TOTAL_TEXT) throw new Error(`\uD14D\uC2A4\uD2B8 \uCD94\uCD9C \uD06C\uAE30 \uCD08\uACFC (${MAX_TOTAL_TEXT / 1024 / 1024}MB \uC81C\uD55C)`);
|
|
769
785
|
pageTexts.push(pageText);
|
|
770
786
|
}
|
|
771
|
-
const avgCharsPerPage = totalChars /
|
|
787
|
+
const avgCharsPerPage = totalChars / effectivePageCount;
|
|
772
788
|
if (avgCharsPerPage < 10) {
|
|
773
789
|
return {
|
|
774
790
|
success: false,
|
|
@@ -787,7 +803,8 @@ async function parsePdfDocument(buffer) {
|
|
|
787
803
|
}
|
|
788
804
|
}
|
|
789
805
|
markdown = reconstructTables(markdown);
|
|
790
|
-
|
|
806
|
+
const truncated = pageCount > MAX_PAGES;
|
|
807
|
+
return { success: true, fileType: "pdf", markdown, pageCount: effectivePageCount, isImageBased: false, ...truncated && { warning: `PDF\uAC00 ${pageCount}\uD398\uC774\uC9C0\uC774\uC9C0\uB9CC ${MAX_PAGES}\uD398\uC774\uC9C0\uAE4C\uC9C0\uB9CC \uCC98\uB9AC\uD588\uC2B5\uB2C8\uB2E4` } };
|
|
791
808
|
} finally {
|
|
792
809
|
await doc.destroy().catch(() => {
|
|
793
810
|
});
|
|
@@ -867,7 +884,7 @@ function formatAsMarkdownTable(rows) {
|
|
|
867
884
|
}
|
|
868
885
|
|
|
869
886
|
// src/utils.ts
|
|
870
|
-
var VERSION = true ? "0.
|
|
887
|
+
var VERSION = true ? "1.0.0" : "0.0.0-dev";
|
|
871
888
|
|
|
872
889
|
// src/index.ts
|
|
873
890
|
async function parse(buffer) {
|