kordoc 0.2.2 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -49,7 +49,7 @@ npm install kordoc
49
49
  npm install pdfjs-dist
50
50
  ```
51
51
 
52
- > **Since v0.2.1**, `pdfjs-dist` is an optional peer dependency. Not needed for HWP/HWPX parsing.
52
+ > `pdfjs-dist` is an optional peer dependency. Not needed for HWP/HWPX parsing.
53
53
 
54
54
  ## Usage
55
55
 
@@ -147,18 +147,21 @@ import type { IRBlock, IRTable, IRCell, CellContext } from "kordoc"
147
147
 
148
148
  ## Security
149
149
 
150
- v0.2.2 security hardening (cumulative since v0.2.1):
150
+ v1.0.0 production-grade security hardening:
151
151
 
152
- - **ZIP bomb protection** — 100MB decompression limit, 500 entry cap
152
+ - **ZIP bomb protection** — Entry count validation, 100MB decompression limit, 500 entry cap
153
153
  - **XXE/Billion Laughs prevention** — Internal DTD subsets fully stripped from HWPX XML
154
154
  - **Decompression bomb guard** — `maxOutputLength` on HWP5 zlib streams, cumulative 100MB limit across sections
155
+ - **PDF resource limits** — MAX_PAGES=5,000, cumulative text size 100MB cap, `doc.destroy()` cleanup
156
+ - **HWP5 record cap** — Max 500,000 records per section, prevents memory exhaustion from crafted files
157
+ - **Table dimension clamping** — rows/cols read from HWP5 binary clamped to MAX_ROWS/MAX_COLS before allocation
155
158
  - **colSpan/rowSpan clamping** — Crafted merge values clamped to grid bounds (MAX_COLS=200, MAX_ROWS=10,000)
156
- - **Broken ZIP path traversal guard** — `..` and absolute path entries rejected, filename length capped
157
- - **MCP path restriction** — Only `.hwp`, `.hwpx`, `.pdf` extensions allowed
159
+ - **Path traversal guard** — Backslash normalization, `..`, absolute paths, Windows drive letters all rejected
160
+ - **MCP error sanitization** — Allowlist-based error filtering, unknown errors return generic message
161
+ - **MCP path restriction** — Only `.hwp`, `.hwpx`, `.pdf` extensions allowed, symlink resolution
158
162
  - **File size limit** — 500MB max in MCP server and CLI
159
- - **PDF resource cleanup** — `doc.destroy()` prevents WASM memory leaks
160
- - **Table memory guard** — Sparse Set-based allocation in Pass 1, 10,000 row cap
161
- - **HWP5 section limit** — Max 100 sections to prevent infinite loop on corrupted files
163
+ - **HWP5 section limit** — Max 100 sections in both primary and fallback paths
164
+ - **HWP5 control char fix** — Character code 10 (footnote/endnote) now correctly handled
162
165
 
163
166
  ## How It Works
164
167
 
@@ -17,6 +17,7 @@ function isPdfFile(buffer) {
17
17
  return b[0] === 37 && b[1] === 80 && b[2] === 68 && b[3] === 70;
18
18
  }
19
19
  function detectFormat(buffer) {
20
+ if (buffer.byteLength < 4) return "unknown";
20
21
  if (isHwpxFile(buffer)) return "hwpx";
21
22
  if (isOldHwpFile(buffer)) return "hwp";
22
23
  if (isPdfFile(buffer)) return "pdf";
@@ -24,7 +25,7 @@ function detectFormat(buffer) {
24
25
  }
25
26
 
26
27
  // src/utils.ts
27
- var VERSION = true ? "0.2.2" : "0.0.0-dev";
28
+ var VERSION = true ? "1.0.1" : "0.0.0-dev";
28
29
  function toArrayBuffer(buf) {
29
30
  return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
30
31
  }
@@ -179,6 +180,11 @@ async function parseHwpxDocument(buffer) {
179
180
  } catch {
180
181
  return extractFromBrokenZip(buffer);
181
182
  }
183
+ let entryCount = 0;
184
+ zip.forEach(() => {
185
+ entryCount++;
186
+ });
187
+ if (entryCount > MAX_ZIP_ENTRIES) throw new Error("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
182
188
  const sectionPaths = await resolveSectionPaths(zip);
183
189
  if (sectionPaths.length === 0) throw new Error("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
184
190
  let totalDecompressed = 0;
@@ -219,7 +225,8 @@ function extractFromBrokenZip(buffer) {
219
225
  }
220
226
  const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
221
227
  const name = new TextDecoder().decode(nameBytes);
222
- if (name.includes("..") || name.startsWith("/")) {
228
+ const normalizedName = name.replace(/\\/g, "/");
229
+ if (normalizedName.includes("..") || normalizedName.startsWith("/") || /^[A-Za-z]:/.test(normalizedName)) {
223
230
  pos = fileStart + compSize;
224
231
  continue;
225
232
  }
@@ -250,7 +257,8 @@ function extractFromBrokenZip(buffer) {
250
257
  async function resolveSectionPaths(zip) {
251
258
  const manifestPaths = ["Contents/content.hpf", "content.hpf"];
252
259
  for (const mp of manifestPaths) {
253
- const file = zip.file(new RegExp(`^${mp.replace(/\./g, "\\.")}$`, "i"))[0];
260
+ const mpLower = mp.toLowerCase();
261
+ const file = zip.file(mp) || Object.values(zip.files).find((f) => f.name.toLowerCase() === mpLower) || null;
254
262
  if (!file) continue;
255
263
  const xml = await file.async("text");
256
264
  const paths = parseSectionPathsFromManifest(xml);
@@ -424,10 +432,11 @@ var CHAR_FIXED_NBSP = 24;
424
432
  var FLAG_COMPRESSED = 1 << 0;
425
433
  var FLAG_ENCRYPTED = 1 << 1;
426
434
  var FLAG_DRM = 1 << 4;
435
+ var MAX_RECORDS = 5e5;
427
436
  function readRecords(data) {
428
437
  const records = [];
429
438
  let offset = 0;
430
- while (offset + 4 <= data.length) {
439
+ while (offset + 4 <= data.length && records.length < MAX_RECORDS) {
431
440
  const header = data.readUInt32LE(offset);
432
441
  offset += 4;
433
442
  const tagId = header & 1023;
@@ -488,7 +497,7 @@ function extractText(data) {
488
497
  break;
489
498
  default:
490
499
  if (ch >= 1 && ch <= 31) {
491
- const isExt = ch >= 1 && ch <= 3 || ch >= 11 && ch <= 18 || ch >= 21 && ch <= 23;
500
+ const isExt = ch >= 1 && ch <= 3 || ch >= 10 && ch <= 18 || ch >= 21 && ch <= 23;
492
501
  const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
493
502
  if ((isExt || isInline) && i + 14 <= data.length) i += 14;
494
503
  } else if (ch >= 32) {
@@ -546,6 +555,7 @@ function findSections(cfb) {
546
555
  }
547
556
  if (sections.length === 0 && cfb.FileIndex) {
548
557
  for (const entry of cfb.FileIndex) {
558
+ if (sections.length >= MAX_SECTIONS) break;
549
559
  if (entry.name?.startsWith("Section") && entry.content) {
550
560
  const idx = parseInt(entry.name.replace("Section", ""), 10) || 0;
551
561
  sections.push({ idx, content: Buffer.from(entry.content) });
@@ -614,8 +624,8 @@ function parseTableBlock(records, startIdx) {
614
624
  if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
615
625
  if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
616
626
  if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
617
- rows = rec.data.readUInt16LE(4);
618
- cols = rec.data.readUInt16LE(6);
627
+ rows = Math.min(rec.data.readUInt16LE(4), MAX_ROWS);
628
+ cols = Math.min(rec.data.readUInt16LE(6), MAX_COLS);
619
629
  }
620
630
  if (rec.tagId === TAG_LIST_HEADER) {
621
631
  const { cell, nextIdx } = parseCellBlock(records, i, tableLevel);
@@ -677,6 +687,8 @@ function arrangeCells(rows, cols, cells) {
677
687
  // src/pdf/parser.ts
678
688
  import { createRequire as createRequire2 } from "module";
679
689
  import { pathToFileURL } from "url";
690
+ var MAX_PAGES = 5e3;
691
+ var MAX_TOTAL_TEXT = 100 * 1024 * 1024;
680
692
  var pdfjsModule = null;
681
693
  async function loadPdfjs() {
682
694
  if (pdfjsModule) return pdfjsModule;
@@ -719,15 +731,19 @@ async function parsePdfDocument(buffer) {
719
731
  }
720
732
  const pageTexts = [];
721
733
  let totalChars = 0;
722
- for (let i = 1; i <= pageCount; i++) {
734
+ let totalTextBytes = 0;
735
+ const effectivePageCount = Math.min(pageCount, MAX_PAGES);
736
+ for (let i = 1; i <= effectivePageCount; i++) {
723
737
  const page = await doc.getPage(i);
724
738
  const textContent = await page.getTextContent();
725
739
  const lines = groupTextItemsByLine(textContent.items);
726
740
  const pageText = lines.join("\n");
727
741
  totalChars += pageText.replace(/\s/g, "").length;
742
+ totalTextBytes += pageText.length * 2;
743
+ if (totalTextBytes > MAX_TOTAL_TEXT) throw new Error(`\uD14D\uC2A4\uD2B8 \uCD94\uCD9C \uD06C\uAE30 \uCD08\uACFC (${MAX_TOTAL_TEXT / 1024 / 1024}MB \uC81C\uD55C)`);
728
744
  pageTexts.push(pageText);
729
745
  }
730
- const avgCharsPerPage = totalChars / pageCount;
746
+ const avgCharsPerPage = totalChars / effectivePageCount;
731
747
  if (avgCharsPerPage < 10) {
732
748
  return {
733
749
  success: false,
@@ -746,7 +762,8 @@ async function parsePdfDocument(buffer) {
746
762
  }
747
763
  }
748
764
  markdown = reconstructTables(markdown);
749
- return { success: true, fileType: "pdf", markdown, pageCount, isImageBased: false };
765
+ const truncated = pageCount > MAX_PAGES;
766
+ return { success: true, fileType: "pdf", markdown, pageCount: effectivePageCount, isImageBased: false, ...truncated && { warning: `PDF\uAC00 ${pageCount}\uD398\uC774\uC9C0\uC774\uC9C0\uB9CC ${MAX_PAGES}\uD398\uC774\uC9C0\uAE4C\uC9C0\uB9CC \uCC98\uB9AC\uD588\uC2B5\uB2C8\uB2E4` } };
750
767
  } finally {
751
768
  await doc.destroy().catch(() => {
752
769
  });
@@ -872,3 +889,4 @@ export {
872
889
  toArrayBuffer,
873
890
  parse
874
891
  };
892
+ //# sourceMappingURL=chunk-KT5X6QUZ.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/detect.ts","../src/utils.ts","../src/hwpx/parser.ts","../src/table/builder.ts","../src/hwp5/record.ts","../src/hwp5/parser.ts","../src/pdf/parser.ts","../src/index.ts"],"sourcesContent":["/** 매직 바이트 기반 파일 포맷 감지 */\n\nimport type { FileType } from \"./types.js\"\n\n/** 매직 바이트 뷰 생성 (복사 없이 view) */\nfunction magicBytes(buffer: ArrayBuffer): Uint8Array {\n return new Uint8Array(buffer, 0, Math.min(4, buffer.byteLength))\n}\n\n/** HWPX (ZIP 기반 한컴 문서): PK\\x03\\x04 */\nexport function isHwpxFile(buffer: ArrayBuffer): boolean {\n const b = magicBytes(buffer)\n return b[0] === 0x50 && b[1] === 0x4b && b[2] === 0x03 && b[3] === 0x04\n}\n\n/** HWP 5.x (OLE2 바이너리 한컴 문서): \\xD0\\xCF\\x11\\xE0 */\nexport function isOldHwpFile(buffer: ArrayBuffer): boolean {\n const b = magicBytes(buffer)\n return b[0] === 0xd0 && b[1] === 0xcf && b[2] === 0x11 && b[3] === 0xe0\n}\n\n/** PDF 문서: %PDF */\nexport function isPdfFile(buffer: ArrayBuffer): boolean {\n const b = magicBytes(buffer)\n return b[0] === 0x25 && b[1] === 0x50 && b[2] === 0x44 && b[3] === 0x46\n}\n\n/** 버퍼로부터 파일 포맷 감지 */\nexport function detectFormat(buffer: ArrayBuffer): FileType {\n if (buffer.byteLength < 4) return \"unknown\"\n if (isHwpxFile(buffer)) return \"hwpx\"\n if (isOldHwpFile(buffer)) return \"hwp\"\n if (isPdfFile(buffer)) return \"pdf\"\n return \"unknown\"\n}\n","/** kordoc 공용 유틸리티 */\n\n/** 빌드 타임에 tsup define으로 주입되는 버전 */\ndeclare const __KORDOC_VERSION__: string\nexport const VERSION: string = typeof __KORDOC_VERSION__ !== \"undefined\" ? __KORDOC_VERSION__ : \"0.0.0-dev\"\n\n/**\n * Node.js Buffer → ArrayBuffer 안전 변환\n * Buffer.buffer는 pool에서 할당된 공유 ArrayBuffer이므로 반드시 slice 필요\n */\nexport function toArrayBuffer(buf: Buffer): ArrayBuffer {\n return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength) as ArrayBuffer\n}\n","/**\n * HWPX 파서 — manifest 멀티섹션, colSpan/rowSpan, 중첩테이블\n *\n * lexdiff 기반 + edu-facility-ai 손상ZIP 복구\n */\n\nimport JSZip from \"jszip\"\nimport { inflateRawSync } from \"zlib\"\nimport { DOMParser } from \"@xmldom/xmldom\"\nimport { buildTable, convertTableToText, blocksToMarkdown, MAX_COLS, MAX_ROWS } from \"../table/builder.js\"\nimport type { CellContext, IRBlock } from \"../types.js\"\n\n/** 압축 해제 최대 크기 (100MB) — ZIP bomb 방지 */\nconst MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024\n/** 손상 ZIP 복구 시 최대 엔트리 수 */\nconst MAX_ZIP_ENTRIES = 500\n\n/** colSpan/rowSpan을 안전한 범위로 클램핑 */\nfunction clampSpan(val: number, max: number): number {\n return Math.max(1, Math.min(val, max))\n}\n\ninterface TableState { rows: CellContext[][]; currentRow: CellContext[]; cell: CellContext | null }\n\n/** XXE/Billion Laughs 방지 — DOCTYPE 제거 (내부 DTD 서브셋 포함) */\nfunction stripDtd(xml: string): string {\n return xml.replace(/<!DOCTYPE\\s[^[>]*(\\[[\\s\\S]*?\\])?\\s*>/gi, \"\")\n}\n\nexport async function parseHwpxDocument(buffer: ArrayBuffer): Promise<string> {\n let zip: JSZip\n\n try {\n zip = await JSZip.loadAsync(buffer)\n } catch {\n // ZIP Central Directory 손상 — Local File Header 스캔으로 폴백\n return extractFromBrokenZip(buffer)\n }\n\n // ZIP 전체 엔트리 수 검증 — 비정상 파일에 의한 자원 낭비 방지\n let entryCount = 0\n zip.forEach(() => { entryCount++ })\n if (entryCount > MAX_ZIP_ENTRIES) throw new Error(\"ZIP 엔트리 수 초과 (ZIP bomb 의심)\")\n\n const sectionPaths = await resolveSectionPaths(zip)\n if (sectionPaths.length === 0) throw new Error(\"HWPX에서 섹션 파일을 찾을 수 없습니다\")\n\n let totalDecompressed = 0\n const blocks: IRBlock[] = []\n for (const path of sectionPaths) {\n const file = zip.file(path)\n if (!file) continue\n const xml = await file.async(\"text\")\n totalDecompressed += xml.length * 2 // UTF-16 추정\n if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new Error(\"ZIP 압축 해제 크기 초과 (ZIP bomb 의심)\")\n blocks.push(...parseSectionXml(xml))\n }\n return blocksToMarkdown(blocks)\n}\n\n// ─── 손상 ZIP 복구 (edu-facility-ai에서 포팅) ──────────\n\nfunction extractFromBrokenZip(buffer: ArrayBuffer): string {\n const data = new Uint8Array(buffer)\n const view = new DataView(buffer)\n let pos = 0\n const texts: string[] = []\n let totalDecompressed = 0\n let entryCount = 0\n\n while (pos < data.length - 30) {\n // PK\\x03\\x04 시그니처 확인\n if (data[pos] !== 0x50 || data[pos + 1] !== 0x4b || data[pos + 2] !== 0x03 || data[pos + 3] !== 0x04) break\n\n if (++entryCount > MAX_ZIP_ENTRIES) break\n\n const method = view.getUint16(pos + 8, true)\n const compSize = view.getUint32(pos + 18, true)\n const nameLen = view.getUint16(pos + 26, true)\n const extraLen = view.getUint16(pos + 28, true)\n\n // nameLen 상한 — 비정상 값에 의한 대규모 버퍼 할당 방지\n if (nameLen > 1024 || extraLen > 65535) { pos += 30 + nameLen + extraLen; continue }\n\n const fileStart = pos + 30 + nameLen + extraLen\n // 범위 초과 검증 — OOB 및 무한 루프 방지\n if (fileStart + compSize > data.length) break\n if (compSize === 0 && method !== 0) { pos = fileStart; continue }\n\n const nameBytes = data.slice(pos + 30, pos + 30 + nameLen)\n const name = new TextDecoder().decode(nameBytes)\n\n // 경로 순회 방지 — 상위 디렉토리 참조 및 절대 경로 차단\n const normalizedName = name.replace(/\\\\/g, \"/\")\n if (normalizedName.includes(\"..\") || normalizedName.startsWith(\"/\") || /^[A-Za-z]:/.test(normalizedName)) { pos = fileStart + compSize; continue }\n const fileData = data.slice(fileStart, fileStart + compSize)\n pos = fileStart + compSize\n\n if (!name.toLowerCase().includes(\"section\") || !name.endsWith(\".xml\")) continue\n\n try {\n let content: string\n if (method === 0) {\n content = new TextDecoder().decode(fileData)\n } else if (method === 8) {\n const decompressed = inflateRawSync(Buffer.from(fileData), { maxOutputLength: MAX_DECOMPRESS_SIZE })\n content = new TextDecoder().decode(decompressed)\n } else {\n continue\n }\n totalDecompressed += content.length * 2\n if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new Error(\"압축 해제 크기 초과\")\n const sectionText = blocksToMarkdown(parseSectionXml(content))\n if (sectionText) texts.push(sectionText)\n } catch {\n continue\n }\n }\n\n if (texts.length === 0) throw new Error(\"손상된 HWPX에서 섹션 데이터를 복구할 수 없습니다\")\n return texts.join(\"\\n\\n\")\n}\n\n// ─── Manifest 해석 ───────────────────────────────────\n\nasync function resolveSectionPaths(zip: JSZip): Promise<string[]> {\n const manifestPaths = [\"Contents/content.hpf\", \"content.hpf\"]\n for (const mp of manifestPaths) {\n const mpLower = mp.toLowerCase()\n const file = zip.file(mp) || Object.values(zip.files).find(f => f.name.toLowerCase() === mpLower) || null\n if (!file) continue\n const xml = await file.async(\"text\")\n const paths = parseSectionPathsFromManifest(xml)\n if (paths.length > 0) return paths\n }\n\n // fallback: section*.xml 직접 검색\n const sectionFiles = zip.file(/[Ss]ection\\d+\\.xml$/)\n return sectionFiles.map(f => f.name).sort()\n}\n\nfunction parseSectionPathsFromManifest(xml: string): string[] {\n const parser = new DOMParser()\n const doc = parser.parseFromString(stripDtd(xml), \"text/xml\")\n const items = doc.getElementsByTagName(\"opf:item\")\n const spine = doc.getElementsByTagName(\"opf:itemref\")\n\n const isSectionId = (id: string) => /^s/i.test(id) || id.toLowerCase().includes(\"section\")\n const idToHref = new Map<string, string>()\n for (let i = 0; i < items.length; i++) {\n const item = items[i]\n const id = item.getAttribute(\"id\") || \"\"\n let href = item.getAttribute(\"href\") || \"\"\n const mediaType = item.getAttribute(\"media-type\") || \"\"\n if (!isSectionId(id) && !mediaType.includes(\"xml\")) continue\n if (!href.startsWith(\"/\") && !href.startsWith(\"Contents/\") && isSectionId(id))\n href = \"Contents/\" + href\n idToHref.set(id, href)\n }\n\n if (spine.length > 0) {\n const ordered: string[] = []\n for (let i = 0; i < spine.length; i++) {\n const href = idToHref.get(spine[i].getAttribute(\"idref\") || \"\")\n if (href) ordered.push(href)\n }\n if (ordered.length > 0) return ordered\n }\n return Array.from(idToHref.entries())\n .filter(([id]) => isSectionId(id))\n .sort((a, b) => a[0].localeCompare(b[0]))\n .map(([, href]) => href)\n}\n\n// ─── 섹션 XML 파싱 ──────────────────────────────────\n\nfunction parseSectionXml(xml: string): IRBlock[] {\n const parser = new DOMParser()\n const doc = parser.parseFromString(stripDtd(xml), \"text/xml\")\n if (!doc.documentElement) return []\n\n const blocks: IRBlock[] = []\n walkSection(doc.documentElement, blocks, null, [])\n return blocks\n}\n\nfunction walkSection(\n node: Node, blocks: IRBlock[],\n tableCtx: TableState | null, tableStack: TableState[]\n): void {\n const children = node.childNodes\n if (!children) return\n\n for (let i = 0; i < children.length; i++) {\n const el = children[i] as Element\n if (el.nodeType !== 1) continue\n\n const tag = el.tagName || el.localName || \"\"\n const localTag = tag.replace(/^[^:]+:/, \"\")\n\n switch (localTag) {\n case \"tbl\": {\n if (tableCtx) tableStack.push(tableCtx)\n const newTable: TableState = { rows: [], currentRow: [], cell: null }\n walkSection(el, blocks, newTable, tableStack)\n\n if (newTable.rows.length > 0) {\n if (tableStack.length > 0) {\n const parentTable = tableStack.pop()!\n const nestedText = convertTableToText(newTable.rows)\n if (parentTable.cell) {\n parentTable.cell.text += (parentTable.cell.text ? \"\\n\" : \"\") + nestedText\n }\n tableCtx = parentTable\n } else {\n blocks.push({ type: \"table\", table: buildTable(newTable.rows) })\n tableCtx = null\n }\n } else {\n tableCtx = tableStack.length > 0 ? tableStack.pop()! : null\n }\n break\n }\n\n case \"tr\":\n if (tableCtx) {\n tableCtx.currentRow = []\n walkSection(el, blocks, tableCtx, tableStack)\n if (tableCtx.currentRow.length > 0) tableCtx.rows.push(tableCtx.currentRow)\n tableCtx.currentRow = []\n }\n break\n\n case \"tc\":\n if (tableCtx) {\n tableCtx.cell = { text: \"\", colSpan: 1, rowSpan: 1 }\n walkSection(el, blocks, tableCtx, tableStack)\n if (tableCtx.cell) {\n tableCtx.currentRow.push(tableCtx.cell)\n tableCtx.cell = null\n }\n }\n break\n\n case \"cellSpan\":\n if (tableCtx?.cell) {\n const cs = parseInt(el.getAttribute(\"colSpan\") || \"1\", 10)\n const rs = parseInt(el.getAttribute(\"rowSpan\") || \"1\", 10)\n tableCtx.cell.colSpan = clampSpan(cs, MAX_COLS)\n tableCtx.cell.rowSpan = clampSpan(rs, MAX_ROWS)\n }\n break\n\n case \"p\": {\n const text = extractParagraphText(el)\n if (text) {\n if (tableCtx?.cell) {\n tableCtx.cell.text += (tableCtx.cell.text ? \"\\n\" : \"\") + text\n } else if (!tableCtx) {\n blocks.push({ type: \"paragraph\", text })\n }\n }\n walkSection(el, blocks, tableCtx, tableStack)\n break\n }\n\n default:\n walkSection(el, blocks, tableCtx, tableStack)\n break\n }\n }\n}\n\nfunction extractParagraphText(para: Node): string {\n let text = \"\"\n const walk = (node: Node) => {\n const children = node.childNodes\n if (!children) return\n for (let i = 0; i < children.length; i++) {\n const child = children[i] as Element\n if (child.nodeType === 3) { text += child.textContent || \"\"; continue }\n if (child.nodeType !== 1) continue\n\n const tag = (child.tagName || child.localName || \"\").replace(/^[^:]+:/, \"\")\n switch (tag) {\n case \"t\": text += child.textContent || \"\"; break\n case \"tab\": text += \"\\t\"; break\n case \"br\":\n if ((child.getAttribute(\"type\") || \"line\") === \"line\") text += \"\\n\"\n break\n case \"fwSpace\": case \"hwSpace\": text += \" \"; break\n case \"tbl\": break // 테이블은 walkSection에서 처리\n default: walk(child); break\n }\n }\n }\n walk(para)\n return text.replace(/[ \\t]+/g, \" \").trim()\n}\n","/** 2-pass colSpan/rowSpan 테이블 빌더 및 Markdown 변환 */\n\nimport type { CellContext, IRBlock, IRCell, IRTable } from \"../types.js\"\n\n/** 테이블 열 수 상한 — 한국 공공문서 기준 충분한 값 */\nexport const MAX_COLS = 200\n/** 테이블 행 수 상한 — 메모리 폭주 방지 */\nexport const MAX_ROWS = 10000\n\nexport function buildTable(rows: CellContext[][]): IRTable {\n if (rows.length > MAX_ROWS) rows = rows.slice(0, MAX_ROWS)\n const numRows = rows.length\n\n // Pass 1: maxCols 계산 (sparse Set — 메모리 효율적)\n const tempOccupied = new Set<number>()\n let maxCols = 0\n\n for (let rowIdx = 0; rowIdx < numRows; rowIdx++) {\n let colIdx = 0\n for (const cell of rows[rowIdx]) {\n while (colIdx < MAX_COLS && tempOccupied.has(rowIdx * MAX_COLS + colIdx)) colIdx++\n if (colIdx >= MAX_COLS) break\n\n for (let r = rowIdx; r < Math.min(rowIdx + cell.rowSpan, numRows); r++) {\n for (let c = colIdx; c < Math.min(colIdx + cell.colSpan, MAX_COLS); c++) {\n tempOccupied.add(r * MAX_COLS + c)\n }\n }\n colIdx += cell.colSpan\n if (colIdx > maxCols) maxCols = colIdx\n }\n }\n tempOccupied.clear()\n\n if (maxCols === 0) return { rows: 0, cols: 0, cells: [], hasHeader: false }\n\n // Pass 2: 실제 배치\n const grid: IRCell[][] = Array.from({ length: numRows }, () =>\n Array.from({ length: maxCols }, () => ({ text: \"\", colSpan: 1, rowSpan: 1 }))\n )\n const occupied: boolean[][] = Array.from({ length: numRows }, () => Array(maxCols).fill(false))\n\n for (let rowIdx = 0; rowIdx < numRows; rowIdx++) {\n let colIdx = 0\n let cellIdx = 0\n\n while (colIdx < maxCols && cellIdx < rows[rowIdx].length) {\n while (colIdx < maxCols && occupied[rowIdx][colIdx]) colIdx++\n if (colIdx >= maxCols) break\n\n const cell = rows[rowIdx][cellIdx]\n grid[rowIdx][colIdx] = {\n text: cell.text.trim(),\n colSpan: cell.colSpan,\n rowSpan: cell.rowSpan,\n }\n\n for (let r = rowIdx; r < Math.min(rowIdx + cell.rowSpan, numRows); r++) {\n for (let c = colIdx; c < Math.min(colIdx + cell.colSpan, maxCols); c++) {\n occupied[r][c] = true\n }\n }\n\n colIdx += cell.colSpan\n cellIdx++\n }\n }\n\n return { rows: numRows, cols: maxCols, cells: grid, hasHeader: numRows > 1 }\n}\n\nexport function convertTableToText(rows: CellContext[][]): string {\n return rows\n .map(row =>\n row\n .map(c => c.text.trim().replace(/\\n/g, \" \"))\n .filter(Boolean)\n .join(\" | \")\n )\n .filter(Boolean)\n .join(\"\\n\")\n}\n\nexport function blocksToMarkdown(blocks: IRBlock[]): string {\n const lines: string[] = []\n\n for (let i = 0; i < blocks.length; i++) {\n const block = blocks[i]\n\n if (block.type === \"paragraph\" && block.text) {\n const text = block.text\n\n if (/^\\[별표\\s*\\d+/.test(text)) {\n const nextBlock = blocks[i + 1]\n if (nextBlock?.type === \"paragraph\" && nextBlock.text && /관련\\)?$/.test(nextBlock.text)) {\n lines.push(\"\", `## ${text} ${nextBlock.text}`, \"\")\n i++\n } else {\n lines.push(\"\", `## ${text}`, \"\")\n }\n continue\n }\n\n if (/^\\([^)]*조[^)]*관련\\)$/.test(text)) {\n lines.push(`*${text}*`, \"\")\n continue\n }\n\n lines.push(text)\n } else if (block.type === \"table\" && block.table) {\n lines.push(tableToMarkdown(block.table))\n }\n }\n\n return lines.join(\"\\n\").trim()\n}\n\nfunction tableToMarkdown(table: IRTable): string {\n if (table.rows === 0 || table.cols === 0) return \"\"\n\n const { cells, rows: numRows, cols: numCols } = table\n\n // 1행 1열 → 구조화된 텍스트\n if (numRows === 1 && numCols === 1) {\n const content = cells[0][0].text\n return content\n .split(/\\n/)\n .map(line => {\n const trimmed = line.trim()\n if (!trimmed) return \"\"\n if (/^\\d+\\.\\s/.test(trimmed)) return `**${trimmed}**`\n if (/^[가-힣]\\.\\s/.test(trimmed)) return ` ${trimmed}`\n return trimmed\n })\n .filter(Boolean)\n .join(\"\\n\")\n }\n\n // 병합 셀: 행/열 병합된 셀은 빈 칸으로\n const display: string[][] = Array.from({ length: numRows }, () => Array(numCols).fill(\"\"))\n const skip = new Set<string>()\n\n for (let r = 0; r < numRows; r++) {\n for (let c = 0; c < numCols; c++) {\n if (skip.has(`${r},${c}`)) continue\n const cell = cells[r][c]\n display[r][c] = cell.text.replace(/\\n/g, \"<br>\")\n\n for (let dr = 0; dr < cell.rowSpan; dr++) {\n for (let dc = 0; dc < cell.colSpan; dc++) {\n if (dr === 0 && dc === 0) continue\n if (r + dr < numRows && c + dc < numCols) {\n skip.add(`${r + dr},${c + dc}`)\n }\n }\n }\n }\n }\n\n // rowSpan에 의해 생긴 빈 placeholder 행만 제거 (내용이 동일한 실제 데이터 행은 유지)\n const uniqueRows: string[][] = []\n for (const row of display) {\n const isEmptyPlaceholder = row.every(cell => cell === \"\")\n if (!isEmptyPlaceholder) uniqueRows.push(row)\n }\n\n if (uniqueRows.length === 0) return \"\"\n\n const md: string[] = []\n md.push(\"| \" + uniqueRows[0].join(\" | \") + \" |\")\n md.push(\"| \" + uniqueRows[0].map(() => \"---\").join(\" | \") + \" |\")\n for (let i = 1; i < uniqueRows.length; i++) {\n md.push(\"| \" + uniqueRows[i].join(\" | \") + \" |\")\n }\n return md.join(\"\\n\")\n}\n","/** HWP 5.x 레코드 리더, UTF-16LE 텍스트 추출, 스트림 압축해제 */\n\nimport { inflateRawSync, inflateSync } from \"zlib\"\n\n// ─── 레코드 태그 상수 ────────────────────────────────\n\nexport const TAG_PARA_HEADER = 0x0042\nexport const TAG_PARA_TEXT = 0x0043\nexport const TAG_CTRL_HEADER = 0x0047\nexport const TAG_LIST_HEADER = 0x0048\nexport const TAG_TABLE = 0x004d\n\n// 특수 문자 코드 (UTF-16LE)\n// HWP 스펙에서 0x0000은 NUL이 아닌 줄바꿈(line break)으로 정의됨\nconst CHAR_LINE = 0x0000\nconst CHAR_PARA = 0x000d\nconst CHAR_TAB = 0x0009\nconst CHAR_HYPHEN = 0x001e\nconst CHAR_NBSP = 0x001f\nconst CHAR_FIXED_NBSP = 0x0018\n\n// FileHeader 플래그\nexport const FLAG_COMPRESSED = 1 << 0\nexport const FLAG_ENCRYPTED = 1 << 1\nexport const FLAG_DRM = 1 << 4\n\n// ─── 레코드 구조 ─────────────────────────────────────\n\nexport interface HwpRecord {\n tagId: number\n level: number\n size: number\n data: Buffer\n}\n\nexport interface HwpFileHeader {\n signature: string\n versionMajor: number\n flags: number\n}\n\n// ─── 레코드 리더 ─────────────────────────────────────\n\n/** 최대 레코드 수 — 비정상 파일에 의한 메모리 폭주 방지 */\nconst MAX_RECORDS = 500_000\n\nexport function readRecords(data: Buffer): HwpRecord[] {\n const records: HwpRecord[] = []\n let offset = 0\n\n while (offset + 4 <= data.length && records.length < MAX_RECORDS) {\n const header = data.readUInt32LE(offset)\n offset += 4\n\n const tagId = header & 0x3ff\n const level = (header >> 10) & 0x3ff\n let size = (header >> 20) & 0xfff\n\n // 확장 크기\n if (size === 0xfff) {\n if (offset + 4 > data.length) break\n size = data.readUInt32LE(offset)\n offset += 4\n }\n\n if (offset + size > data.length) break\n records.push({ tagId, level, size, data: data.subarray(offset, offset + size) })\n offset += size\n }\n\n return records\n}\n\n// ─── 스트림 압축 해제 ────────────────────────────────\n\n/** 압축 해제 최대 크기 (100MB) — decompression bomb 방지 */\nconst MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024\n\nexport function decompressStream(data: Buffer): Buffer {\n const opts = { maxOutputLength: MAX_DECOMPRESS_SIZE }\n if (data.length >= 2 && data[0] === 0x78) {\n try { return inflateSync(data, opts) } catch { /* fallback to raw */ }\n }\n return inflateRawSync(data, opts)\n}\n\n// ─── FileHeader 파싱 ─────────────────────────────────\n\nexport function parseFileHeader(data: Buffer): HwpFileHeader {\n if (data.length < 40) throw new Error(\"FileHeader가 너무 짧습니다 (최소 40바이트)\")\n const sig = data.subarray(0, 32).toString(\"utf8\").replace(/\\0+$/, \"\")\n return {\n signature: sig,\n versionMajor: data[35],\n flags: data.readUInt32LE(36),\n }\n}\n\n// ─── UTF-16LE 텍스트 추출 (21가지 제어문자 처리) ─────\n\nexport function extractText(data: Buffer): string {\n let result = \"\"\n let i = 0\n\n while (i + 1 < data.length) {\n const ch = data.readUInt16LE(i)\n i += 2\n\n switch (ch) {\n case CHAR_LINE: result += \"\\n\"; break\n case CHAR_PARA: break\n case CHAR_TAB: result += \"\\t\"; break\n case CHAR_HYPHEN: result += \"-\"; break\n case CHAR_NBSP: case CHAR_FIXED_NBSP: result += \" \"; break\n default:\n if (ch >= 0x0001 && ch <= 0x001f) {\n const isExt = (ch >= 1 && ch <= 3) || (ch >= 10 && ch <= 18) || (ch >= 21 && ch <= 23)\n const isInline = (ch >= 4 && ch <= 9) || (ch >= 19 && ch <= 20)\n if ((isExt || isInline) && i + 14 <= data.length) i += 14\n } else if (ch >= 0x0020) {\n // UTF-16 surrogate pair 처리 (BMP 외 문자: 이모지, CJK 확장 등)\n if (ch >= 0xd800 && ch <= 0xdbff && i + 1 < data.length) {\n const lo = data.readUInt16LE(i)\n if (lo >= 0xdc00 && lo <= 0xdfff) {\n i += 2\n const codePoint = ((ch - 0xd800) << 10) + (lo - 0xdc00) + 0x10000\n result += String.fromCodePoint(codePoint)\n break\n }\n }\n result += String.fromCharCode(ch)\n }\n break\n }\n }\n\n return result\n}\n","/** HWP 5.x 바이너리 파서 — OLE2 컨테이너 → 섹션 → Markdown */\n\nimport {\n readRecords, decompressStream, parseFileHeader, extractText,\n TAG_PARA_HEADER, TAG_PARA_TEXT, TAG_CTRL_HEADER, TAG_LIST_HEADER, TAG_TABLE,\n FLAG_COMPRESSED, FLAG_ENCRYPTED, FLAG_DRM,\n type HwpRecord,\n} from \"./record.js\"\nimport { buildTable, blocksToMarkdown, MAX_COLS, MAX_ROWS } from \"../table/builder.js\"\nimport type { CellContext, IRBlock } from \"../types.js\"\n\nimport { createRequire } from \"module\"\nconst require = createRequire(import.meta.url)\nconst CFB: CfbModule = require(\"cfb\")\n\ninterface CfbEntry { name?: string; content?: Buffer | Uint8Array }\ninterface CfbContainer { FileIndex?: CfbEntry[] }\ninterface CfbModule {\n parse(data: Buffer): CfbContainer\n find(cfb: CfbContainer, path: string): CfbEntry | null\n}\n\n/** 최대 섹션 수 — 비정상 파일에 의한 무한 루프 방지 */\nconst MAX_SECTIONS = 100\n/** 누적 압축 해제 최대 크기 (100MB) */\nconst MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024\n\nexport function parseHwp5Document(buffer: Buffer): string {\n const cfb = CFB.parse(buffer)\n\n const headerEntry = CFB.find(cfb, \"/FileHeader\")\n if (!headerEntry?.content) throw new Error(\"FileHeader 스트림 없음\")\n const header = parseFileHeader(Buffer.from(headerEntry.content))\n if (header.signature !== \"HWP Document File\") throw new Error(\"HWP 시그니처 불일치\")\n if (header.flags & FLAG_ENCRYPTED) throw new Error(\"암호화된 HWP는 지원하지 않습니다\")\n if (header.flags & FLAG_DRM) throw new Error(\"DRM 보호된 HWP는 지원하지 않습니다\")\n const compressed = (header.flags & FLAG_COMPRESSED) !== 0\n\n const sections = findSections(cfb)\n if (sections.length === 0) throw new Error(\"섹션 스트림을 찾을 수 없습니다\")\n\n const blocks: IRBlock[] = []\n let totalDecompressed = 0\n for (const sectionData of sections) {\n const data = compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData)\n totalDecompressed += data.length\n if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new Error(\"총 압축 해제 크기 초과 (decompression bomb 의심)\")\n const records = readRecords(data)\n blocks.push(...parseSection(records))\n }\n\n return blocksToMarkdown(blocks)\n}\n\nfunction findSections(cfb: CfbContainer): Buffer[] {\n const sections: Array<{ idx: number; content: Buffer }> = []\n\n for (let i = 0; i < MAX_SECTIONS; i++) {\n const entry = CFB.find(cfb, `/BodyText/Section${i}`)\n if (!entry?.content) break\n sections.push({ idx: i, content: Buffer.from(entry.content) })\n }\n\n if (sections.length === 0 && cfb.FileIndex) {\n for (const entry of cfb.FileIndex) {\n if (sections.length >= MAX_SECTIONS) break\n if (entry.name?.startsWith(\"Section\") && entry.content) {\n const idx = parseInt(entry.name.replace(\"Section\", \"\"), 10) || 0\n sections.push({ idx, content: Buffer.from(entry.content) })\n }\n }\n }\n\n return sections.sort((a, b) => a.idx - b.idx).map(s => s.content)\n}\n\nfunction parseSection(records: HwpRecord[]): IRBlock[] {\n const blocks: IRBlock[] = []\n let i = 0\n\n while (i < records.length) {\n const rec = records[i]\n\n if (rec.tagId === TAG_PARA_HEADER && rec.level === 0) {\n const { paragraph, tables, nextIdx } = parseParagraphWithTables(records, i)\n if (paragraph) blocks.push({ type: \"paragraph\", text: paragraph })\n for (const t of tables) blocks.push({ type: \"table\", table: t })\n i = nextIdx\n continue\n }\n\n if (rec.tagId === TAG_CTRL_HEADER && rec.level <= 1 && rec.data.length >= 4) {\n const ctrlId = rec.data.subarray(0, 4).toString(\"ascii\")\n if (ctrlId === \" lbt\" || ctrlId === \"tbl \") {\n const { table, nextIdx } = parseTableBlock(records, i)\n if (table) blocks.push({ type: \"table\", table })\n i = nextIdx\n continue\n }\n }\n\n i++\n }\n\n return blocks\n}\n\nfunction parseParagraphWithTables(records: HwpRecord[], startIdx: number) {\n const startLevel = records[startIdx].level\n let text = \"\"\n const tables: ReturnType<typeof buildTable>[] = []\n let i = startIdx + 1\n\n while (i < records.length) {\n const rec = records[i]\n if (rec.tagId === TAG_PARA_HEADER && rec.level <= startLevel) break\n\n if (rec.tagId === TAG_PARA_TEXT) {\n text = extractText(rec.data)\n }\n\n if (rec.tagId === TAG_CTRL_HEADER && rec.data.length >= 4) {\n const ctrlId = rec.data.subarray(0, 4).toString(\"ascii\")\n if (ctrlId === \" lbt\" || ctrlId === \"tbl \") {\n const { table, nextIdx } = parseTableBlock(records, i)\n if (table) tables.push(table)\n i = nextIdx\n continue\n }\n }\n i++\n }\n\n const trimmed = text.trim()\n return { paragraph: trimmed || null, tables, nextIdx: i }\n}\n\nfunction parseTableBlock(records: HwpRecord[], startIdx: number) {\n const tableLevel = records[startIdx].level\n let i = startIdx + 1\n let rows = 0, cols = 0\n const cells: CellContext[] = []\n\n while (i < records.length) {\n const rec = records[i]\n if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break\n if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break\n\n if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {\n rows = Math.min(rec.data.readUInt16LE(4), MAX_ROWS)\n cols = Math.min(rec.data.readUInt16LE(6), MAX_COLS)\n }\n\n if (rec.tagId === TAG_LIST_HEADER) {\n const { cell, nextIdx } = parseCellBlock(records, i, tableLevel)\n if (cell) cells.push(cell)\n i = nextIdx\n continue\n }\n i++\n }\n\n if (rows === 0 || cols === 0 || cells.length === 0) return { table: null, nextIdx: i }\n\n const cellRows = arrangeCells(rows, cols, cells)\n return { table: buildTable(cellRows), nextIdx: i }\n}\n\nfunction parseCellBlock(records: HwpRecord[], startIdx: number, tableLevel: number) {\n const rec = records[startIdx]\n const cellLevel = rec.level\n const texts: string[] = []\n\n // LIST_HEADER에서 셀 병합 정보 추출\n // HWP5 셀 LIST_HEADER 구조: paraCount(u16) + flags(u32) + colAddr(u16) + rowAddr(u16) + colSpan(u16) + rowSpan(u16)\n let colSpan = 1\n let rowSpan = 1\n if (rec.data.length >= 14) {\n const cs = rec.data.readUInt16LE(10)\n const rs = rec.data.readUInt16LE(12)\n if (cs > 0) colSpan = Math.min(cs, MAX_COLS)\n if (rs > 0) rowSpan = Math.min(rs, MAX_ROWS)\n }\n\n let i = startIdx + 1\n\n while (i < records.length) {\n const r = records[i]\n if (r.tagId === TAG_LIST_HEADER && r.level <= cellLevel) break\n if (r.level <= tableLevel && (r.tagId === TAG_PARA_HEADER || r.tagId === TAG_CTRL_HEADER)) break\n\n if (r.tagId === TAG_PARA_TEXT) {\n const t = extractText(r.data).trim()\n if (t) texts.push(t)\n }\n i++\n }\n\n return { cell: { text: texts.join(\"\\n\"), colSpan, rowSpan } as CellContext, nextIdx: i }\n}\n\nfunction arrangeCells(rows: number, cols: number, cells: CellContext[]): CellContext[][] {\n const grid: (CellContext | null)[][] = Array.from({ length: rows }, () => Array(cols).fill(null))\n let cellIdx = 0\n\n for (let r = 0; r < rows && cellIdx < cells.length; r++) {\n for (let c = 0; c < cols && cellIdx < cells.length; c++) {\n if (grid[r][c] !== null) continue\n const cell = cells[cellIdx++]\n grid[r][c] = cell\n\n for (let dr = 0; dr < cell.rowSpan; dr++) {\n for (let dc = 0; dc < cell.colSpan; dc++) {\n if (dr === 0 && dc === 0) continue\n if (r + dr < rows && c + dc < cols)\n grid[r + dr][c + dc] = { text: \"\", colSpan: 1, rowSpan: 1 }\n }\n }\n }\n }\n\n return grid.map(row => row.map(c => c || { text: \"\", colSpan: 1, rowSpan: 1 }))\n}\n","/** PDF 텍스트 추출 (pdfjs-dist 기반 서버사이드 파싱) */\n\nimport type { ParseResult } from \"../types.js\"\n\n/** 최대 처리 페이지 수 — OOM 방지 */\nconst MAX_PAGES = 5000\n/** 누적 텍스트 최대 크기 (100MB) — 메모리 폭주 방지 */\nconst MAX_TOTAL_TEXT = 100 * 1024 * 1024\n\nimport { createRequire } from \"module\"\nimport { pathToFileURL } from \"url\"\n\n// pdfjs-dist는 external로 빌드됨 — 설치 안 되어 있으면 런타임에 잡힘\ninterface PdfjsModule {\n getDocument: (opts: Record<string, unknown>) => { promise: Promise<PdfjsDocument> }\n GlobalWorkerOptions: { workerSrc: string }\n}\ninterface PdfjsDocument {\n numPages: number\n getPage: (n: number) => Promise<PdfjsPage>\n destroy: () => Promise<void>\n}\ninterface PdfjsPage {\n getTextContent: () => Promise<{ items: PdfjsTextItem[] }>\n}\ninterface PdfjsTextItem {\n str: string\n transform: number[]\n width: number\n height: number\n}\n\nlet pdfjsModule: PdfjsModule | null = null\n\nasync function loadPdfjs(): Promise<PdfjsModule | null> {\n if (pdfjsModule) return pdfjsModule\n try {\n const mod = await import(\"pdfjs-dist/legacy/build/pdf.mjs\") as unknown as PdfjsModule\n // 워커 경로를 file:// URL로 설정 (Node.js ESM 환경 필수)\n const req = createRequire(import.meta.url)\n const workerPath = req.resolve(\"pdfjs-dist/legacy/build/pdf.worker.mjs\")\n mod.GlobalWorkerOptions.workerSrc = pathToFileURL(workerPath).href\n pdfjsModule = mod\n return mod\n } catch (err) {\n // import 실패 원인을 구분하여 반환\n const msg = err instanceof Error ? err.message : String(err)\n if (msg.includes(\"Cannot find\") || msg.includes(\"MODULE_NOT_FOUND\")) {\n return null // 미설치\n }\n throw new Error(`pdfjs-dist 로딩 실패: ${msg}`)\n }\n}\n\nexport async function parsePdfDocument(buffer: ArrayBuffer): Promise<ParseResult> {\n const pdfjs = await loadPdfjs()\n if (!pdfjs) {\n return {\n success: false,\n fileType: \"pdf\",\n pageCount: 0,\n error: \"pdfjs-dist가 설치되지 않았습니다. npm install pdfjs-dist\",\n }\n }\n\n const data = new Uint8Array(buffer)\n const doc = await pdfjs.getDocument({\n data,\n useSystemFonts: true,\n disableFontFace: true,\n isEvalSupported: false,\n }).promise\n\n try {\n const pageCount = doc.numPages\n if (pageCount === 0) {\n return { success: false, fileType: \"pdf\", pageCount: 0, error: \"PDF에 페이지가 없습니다.\" }\n }\n\n const pageTexts: string[] = []\n let totalChars = 0\n let totalTextBytes = 0\n const effectivePageCount = Math.min(pageCount, MAX_PAGES)\n\n for (let i = 1; i <= effectivePageCount; i++) {\n const page = await doc.getPage(i)\n const textContent = await page.getTextContent()\n const lines = groupTextItemsByLine(textContent.items)\n const pageText = lines.join(\"\\n\")\n totalChars += pageText.replace(/\\s/g, \"\").length\n totalTextBytes += pageText.length * 2\n if (totalTextBytes > MAX_TOTAL_TEXT) throw new Error(`텍스트 추출 크기 초과 (${MAX_TOTAL_TEXT / 1024 / 1024}MB 제한)`)\n pageTexts.push(pageText)\n }\n\n const avgCharsPerPage = totalChars / effectivePageCount\n if (avgCharsPerPage < 10) {\n return {\n success: false,\n fileType: \"pdf\",\n pageCount,\n isImageBased: true,\n error: `이미지 기반 PDF로 추정됩니다 (${pageCount}페이지, 추출 텍스트 ${totalChars}자).`,\n }\n }\n\n let markdown = \"\"\n for (let i = 0; i < pageTexts.length; i++) {\n const cleaned = cleanPdfText(pageTexts[i])\n if (cleaned.trim()) {\n if (i > 0 && markdown) markdown += \"\\n\\n\"\n markdown += cleaned\n }\n }\n\n markdown = reconstructTables(markdown)\n\n const truncated = pageCount > MAX_PAGES\n return { success: true, fileType: \"pdf\", markdown, pageCount: effectivePageCount, isImageBased: false, ...(truncated && { warning: `PDF가 ${pageCount}페이지이지만 ${MAX_PAGES}페이지까지만 처리했습니다` }) }\n } finally {\n await doc.destroy().catch(() => {})\n }\n}\n\n// ─── 텍스트 아이템 → 행 그룹핑 ──────────────────────\n\nfunction groupTextItemsByLine(items: PdfjsTextItem[]): string[] {\n if (items.length === 0) return []\n\n const textItems = items.filter(item => typeof item.str === \"string\" && item.str.trim() !== \"\")\n if (textItems.length === 0) return []\n\n textItems.sort((a, b) => {\n const yDiff = b.transform[5] - a.transform[5]\n if (Math.abs(yDiff) < 2) return a.transform[4] - b.transform[4]\n return yDiff\n })\n\n const lines: string[] = []\n let currentY = textItems[0].transform[5]\n let currentLine: { text: string; x: number; width: number }[] = []\n\n for (const item of textItems) {\n const y = item.transform[5]\n\n if (Math.abs(currentY - y) > Math.max(item.height * 0.5, 2)) {\n if (currentLine.length > 0) lines.push(mergeLineItems(currentLine))\n currentLine = []\n currentY = y\n }\n\n currentLine.push({ text: item.str, x: item.transform[4], width: item.width })\n }\n\n if (currentLine.length > 0) lines.push(mergeLineItems(currentLine))\n return lines\n}\n\nfunction mergeLineItems(items: { text: string; x: number; width: number }[]): string {\n if (items.length <= 1) return items[0]?.text || \"\"\n items.sort((a, b) => a.x - b.x)\n\n let result = items[0].text\n for (let i = 1; i < items.length; i++) {\n const gap = items[i].x - (items[i - 1].x + items[i - 1].width)\n if (gap > 15) result += \"\\t\"\n else if (gap > 3) result += \" \"\n result += items[i].text\n }\n return result\n}\n\nexport function cleanPdfText(text: string): string {\n return text\n .replace(/^[\\s]*[-–—]\\s*\\d+\\s*[-–—][\\s]*$/gm, \"\")\n .replace(/^\\s*\\d+\\s*\\/\\s*\\d+\\s*$/gm, \"\")\n .replace(/([가-힣·,\\-])\\n([가-힣(])/g, \"$1 $2\")\n .replace(/\\n{3,}/g, \"\\n\\n\")\n .trim()\n}\n\nfunction reconstructTables(text: string): string {\n const lines = text.split(\"\\n\")\n const result: string[] = []\n let tableBuffer: string[][] = []\n\n for (const line of lines) {\n if (line.includes(\"\\t\")) {\n tableBuffer.push(line.split(\"\\t\").map(c => c.trim()))\n } else {\n if (tableBuffer.length >= 2) result.push(formatAsMarkdownTable(tableBuffer))\n else if (tableBuffer.length === 1) result.push(tableBuffer[0].join(\" | \"))\n tableBuffer = []\n result.push(line)\n }\n }\n\n if (tableBuffer.length >= 2) result.push(formatAsMarkdownTable(tableBuffer))\n else if (tableBuffer.length === 1) result.push(tableBuffer[0].join(\" | \"))\n\n return result.join(\"\\n\")\n}\n\nfunction formatAsMarkdownTable(rows: string[][]): string {\n const maxCols = Math.max(...rows.map(r => r.length))\n // defensive copy — 원본 배열 변경 방지\n const normalized = rows.map(r => {\n const copy = [...r]\n while (copy.length < maxCols) copy.push(\"\")\n return copy\n })\n\n const lines: string[] = []\n lines.push(\"| \" + normalized[0].join(\" | \") + \" |\")\n lines.push(\"| \" + normalized[0].map(() => \"---\").join(\" | \") + \" |\")\n for (let i = 1; i < normalized.length; i++) {\n lines.push(\"| \" + normalized[i].join(\" | \") + \" |\")\n }\n return lines.join(\"\\n\")\n}\n","/**\n * kordoc — 모두 파싱해버리겠다\n *\n * HWP, HWPX, PDF → Markdown 변환 통합 라이브러리\n */\n\nimport { detectFormat, isHwpxFile, isOldHwpFile, isPdfFile } from \"./detect.js\"\nimport { parseHwpxDocument } from \"./hwpx/parser.js\"\nimport { parseHwp5Document } from \"./hwp5/parser.js\"\nimport { parsePdfDocument } from \"./pdf/parser.js\"\nimport type { ParseResult } from \"./types.js\"\n\n// ─── 메인 API ────────────────────────────────────────\n\n/**\n * 파일 버퍼를 자동 감지하여 Markdown으로 변환\n *\n * @example\n * ```ts\n * import { parse } from \"kordoc\"\n * const result = await parse(buffer)\n * if (result.success) console.log(result.markdown)\n * ```\n */\nexport async function parse(buffer: ArrayBuffer): Promise<ParseResult> {\n if (!buffer || buffer.byteLength === 0) {\n return { success: false, fileType: \"unknown\", error: \"빈 버퍼이거나 유효하지 않은 입력입니다.\" }\n }\n const format = detectFormat(buffer)\n\n switch (format) {\n case \"hwpx\":\n return parseHwpx(buffer)\n case \"hwp\":\n return parseHwp(buffer)\n case \"pdf\":\n return parsePdf(buffer)\n default:\n return { success: false, fileType: \"unknown\", error: \"지원하지 않는 파일 형식입니다.\" }\n }\n}\n\n// ─── 포맷별 API ──────────────────────────────────────\n\n/** HWPX 파일을 Markdown으로 변환 */\nexport async function parseHwpx(buffer: ArrayBuffer): Promise<ParseResult> {\n try {\n const markdown = await parseHwpxDocument(buffer)\n return { success: true, fileType: \"hwpx\", markdown }\n } catch (err) {\n return { success: false, fileType: \"hwpx\", error: err instanceof Error ? err.message : \"HWPX 파싱 실패\" }\n }\n}\n\n/** HWP 5.x 바이너리 파일을 Markdown으로 변환 */\nexport async function parseHwp(buffer: ArrayBuffer): Promise<ParseResult> {\n try {\n const markdown = parseHwp5Document(Buffer.from(buffer))\n return { success: true, fileType: \"hwp\", markdown }\n } catch (err) {\n return { success: false, fileType: \"hwp\", error: err instanceof Error ? err.message : \"HWP 파싱 실패\" }\n }\n}\n\n/** PDF 파일에서 텍스트를 추출하여 Markdown으로 변환 */\nexport async function parsePdf(buffer: ArrayBuffer): Promise<ParseResult> {\n try {\n return await parsePdfDocument(buffer)\n } catch (err) {\n return { success: false, fileType: \"pdf\", error: err instanceof Error ? err.message : \"PDF 파싱 실패\" }\n }\n}\n\n// ─── Re-exports ──────────────────────────────────────\n\nexport { detectFormat, isHwpxFile, isOldHwpFile, isPdfFile } from \"./detect.js\"\nexport type { ParseResult, ParseSuccess, ParseFailure, FileType, IRBlock, IRTable, IRCell, CellContext } from \"./types.js\"\nexport { buildTable, blocksToMarkdown, convertTableToText } from \"./table/builder.js\"\nexport { VERSION } from \"./utils.js\"\n"],"mappings":";;;AAKA,SAAS,WAAW,QAAiC;AACnD,SAAO,IAAI,WAAW,QAAQ,GAAG,KAAK,IAAI,GAAG,OAAO,UAAU,CAAC;AACjE;AAGO,SAAS,WAAW,QAA8B;AACvD,QAAM,IAAI,WAAW,MAAM;AAC3B,SAAO,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM,KAAQ,EAAE,CAAC,MAAM;AACrE;AAGO,SAAS,aAAa,QAA8B;AACzD,QAAM,IAAI,WAAW,MAAM;AAC3B,SAAO,EAAE,CAAC,MAAM,OAAQ,EAAE,CAAC,MAAM,OAAQ,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM;AACrE;AAGO,SAAS,UAAU,QAA8B;AACtD,QAAM,IAAI,WAAW,MAAM;AAC3B,SAAO,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM,MAAQ,EAAE,CAAC,MAAM;AACrE;AAGO,SAAS,aAAa,QAA+B;AAC1D,MAAI,OAAO,aAAa,EAAG,QAAO;AAClC,MAAI,WAAW,MAAM,EAAG,QAAO;AAC/B,MAAI,aAAa,MAAM,EAAG,QAAO;AACjC,MAAI,UAAU,MAAM,EAAG,QAAO;AAC9B,SAAO;AACT;;;AC9BO,IAAM,UAAkB,OAA4C,UAAqB;AAMzF,SAAS,cAAc,KAA0B;AACtD,SAAO,IAAI,OAAO,MAAM,IAAI,YAAY,IAAI,aAAa,IAAI,UAAU;AACzE;;;ACNA,OAAO,WAAW;AAClB,SAAS,sBAAsB;AAC/B,SAAS,iBAAiB;;;ACHnB,IAAM,WAAW;AAEjB,IAAM,WAAW;AAEjB,SAAS,WAAW,MAAgC;AACzD,MAAI,KAAK,SAAS,SAAU,QAAO,KAAK,MAAM,GAAG,QAAQ;AACzD,QAAM,UAAU,KAAK;AAGrB,QAAM,eAAe,oBAAI,IAAY;AACrC,MAAI,UAAU;AAEd,WAAS,SAAS,GAAG,SAAS,SAAS,UAAU;AAC/C,QAAI,SAAS;AACb,eAAW,QAAQ,KAAK,MAAM,GAAG;AAC/B,aAAO,SAAS,YAAY,aAAa,IAAI,SAAS,WAAW,MAAM,EAAG;AAC1E,UAAI,UAAU,SAAU;AAExB,eAAS,IAAI,QAAQ,IAAI,KAAK,IAAI,SAAS,KAAK,SAAS,OAAO,GAAG,KAAK;AACtE,iBAAS,IAAI,QAAQ,IAAI,KAAK,IAAI,SAAS,KAAK,SAAS,QAAQ,GAAG,KAAK;AACvE,uBAAa,IAAI,IAAI,WAAW,CAAC;AAAA,QACnC;AAAA,MACF;AACA,gBAAU,KAAK;AACf,UAAI,SAAS,QAAS,WAAU;AAAA,IAClC;AAAA,EACF;AACA,eAAa,MAAM;AAEnB,MAAI,YAAY,EAAG,QAAO,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC,GAAG,WAAW,MAAM;AAG1E,QAAM,OAAmB,MAAM;AAAA,IAAK,EAAE,QAAQ,QAAQ;AAAA,IAAG,MACvD,MAAM,KAAK,EAAE,QAAQ,QAAQ,GAAG,OAAO,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE,EAAE;AAAA,EAC9E;AACA,QAAM,WAAwB,MAAM,KAAK,EAAE,QAAQ,QAAQ,GAAG,MAAM,MAAM,OAAO,EAAE,KAAK,KAAK,CAAC;AAE9F,WAAS,SAAS,GAAG,SAAS,SAAS,UAAU;AAC/C,QAAI,SAAS;AACb,QAAI,UAAU;AAEd,WAAO,SAAS,WAAW,UAAU,KAAK,MAAM,EAAE,QAAQ;AACxD,aAAO,SAAS,WAAW,SAAS,MAAM,EAAE,MAAM,EAAG;AACrD,UAAI,UAAU,QAAS;AAEvB,YAAM,OAAO,KAAK,MAAM,EAAE,OAAO;AACjC,WAAK,MAAM,EAAE,MAAM,IAAI;AAAA,QACrB,MAAM,KAAK,KAAK,KAAK;AAAA,QACrB,SAAS,KAAK;AAAA,QACd,SAAS,KAAK;AAAA,MAChB;AAEA,eAAS,IAAI,QAAQ,IAAI,KAAK,IAAI,SAAS,KAAK,SAAS,OAAO,GAAG,KAAK;AACtE,iBAAS,IAAI,QAAQ,IAAI,KAAK,IAAI,SAAS,KAAK,SAAS,OAAO,GAAG,KAAK;AACtE,mBAAS,CAAC,EAAE,CAAC,IAAI;AAAA,QACnB;AAAA,MACF;AAEA,gBAAU,KAAK;AACf;AAAA,IACF;AAAA,EACF;AAEA,SAAO,EAAE,MAAM,SAAS,MAAM,SAAS,OAAO,MAAM,WAAW,UAAU,EAAE;AAC7E;AAEO,SAAS,mBAAmB,MAA+B;AAChE,SAAO,KACJ;AAAA,IAAI,SACH,IACG,IAAI,OAAK,EAAE,KAAK,KAAK,EAAE,QAAQ,OAAO,GAAG,CAAC,EAC1C,OAAO,OAAO,EACd,KAAK,KAAK;AAAA,EACf,EACC,OAAO,OAAO,EACd,KAAK,IAAI;AACd;AAEO,SAAS,iBAAiB,QAA2B;AAC1D,QAAM,QAAkB,CAAC;AAEzB,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,UAAM,QAAQ,OAAO,CAAC;AAEtB,QAAI,MAAM,SAAS,eAAe,MAAM,MAAM;AAC5C,YAAM,OAAO,MAAM;AAEnB,UAAI,cAAc,KAAK,IAAI,GAAG;AAC5B,cAAM,YAAY,OAAO,IAAI,CAAC;AAC9B,YAAI,WAAW,SAAS,eAAe,UAAU,QAAQ,SAAS,KAAK,UAAU,IAAI,GAAG;AACtF,gBAAM,KAAK,IAAI,MAAM,IAAI,IAAI,UAAU,IAAI,IAAI,EAAE;AACjD;AAAA,QACF,OAAO;AACL,gBAAM,KAAK,IAAI,MAAM,IAAI,IAAI,EAAE;AAAA,QACjC;AACA;AAAA,MACF;AAEA,UAAI,sBAAsB,KAAK,IAAI,GAAG;AACpC,cAAM,KAAK,IAAI,IAAI,KAAK,EAAE;AAC1B;AAAA,MACF;AAEA,YAAM,KAAK,IAAI;AAAA,IACjB,WAAW,MAAM,SAAS,WAAW,MAAM,OAAO;AAChD,YAAM,KAAK,gBAAgB,MAAM,KAAK,CAAC;AAAA,IACzC;AAAA,EACF;AAEA,SAAO,MAAM,KAAK,IAAI,EAAE,KAAK;AAC/B;AAEA,SAAS,gBAAgB,OAAwB;AAC/C,MAAI,MAAM,SAAS,KAAK,MAAM,SAAS,EAAG,QAAO;AAEjD,QAAM,EAAE,OAAO,MAAM,SAAS,MAAM,QAAQ,IAAI;AAGhD,MAAI,YAAY,KAAK,YAAY,GAAG;AAClC,UAAM,UAAU,MAAM,CAAC,EAAE,CAAC,EAAE;AAC5B,WAAO,QACJ,MAAM,IAAI,EACV,IAAI,UAAQ;AACX,YAAM,UAAU,KAAK,KAAK;AAC1B,UAAI,CAAC,QAAS,QAAO;AACrB,UAAI,WAAW,KAAK,OAAO,EAAG,QAAO,KAAK,OAAO;AACjD,UAAI,aAAa,KAAK,OAAO,EAAG,QAAO,KAAK,OAAO;AACnD,aAAO;AAAA,IACT,CAAC,EACA,OAAO,OAAO,EACd,KAAK,IAAI;AAAA,EACd;AAGA,QAAM,UAAsB,MAAM,KAAK,EAAE,QAAQ,QAAQ,GAAG,MAAM,MAAM,OAAO,EAAE,KAAK,EAAE,CAAC;AACzF,QAAM,OAAO,oBAAI,IAAY;AAE7B,WAAS,IAAI,GAAG,IAAI,SAAS,KAAK;AAChC,aAAS,IAAI,GAAG,IAAI,SAAS,KAAK;AAChC,UAAI,KAAK,IAAI,GAAG,CAAC,IAAI,CAAC,EAAE,EAAG;AAC3B,YAAM,OAAO,MAAM,CAAC,EAAE,CAAC;AACvB,cAAQ,CAAC,EAAE,CAAC,IAAI,KAAK,KAAK,QAAQ,OAAO,MAAM;AAE/C,eAAS,KAAK,GAAG,KAAK,KAAK,SAAS,MAAM;AACxC,iBAAS,KAAK,GAAG,KAAK,KAAK,SAAS,MAAM;AACxC,cAAI,OAAO,KAAK,OAAO,EAAG;AAC1B,cAAI,IAAI,KAAK,WAAW,IAAI,KAAK,SAAS;AACxC,iBAAK,IAAI,GAAG,IAAI,EAAE,IAAI,IAAI,EAAE,EAAE;AAAA,UAChC;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAGA,QAAM,aAAyB,CAAC;AAChC,aAAW,OAAO,SAAS;AACzB,UAAM,qBAAqB,IAAI,MAAM,UAAQ,SAAS,EAAE;AACxD,QAAI,CAAC,mBAAoB,YAAW,KAAK,GAAG;AAAA,EAC9C;AAEA,MAAI,WAAW,WAAW,EAAG,QAAO;AAEpC,QAAM,KAAe,CAAC;AACtB,KAAG,KAAK,OAAO,WAAW,CAAC,EAAE,KAAK,KAAK,IAAI,IAAI;AAC/C,KAAG,KAAK,OAAO,WAAW,CAAC,EAAE,IAAI,MAAM,KAAK,EAAE,KAAK,KAAK,IAAI,IAAI;AAChE,WAAS,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;AAC1C,OAAG,KAAK,OAAO,WAAW,CAAC,EAAE,KAAK,KAAK,IAAI,IAAI;AAAA,EACjD;AACA,SAAO,GAAG,KAAK,IAAI;AACrB;;;ADlKA,IAAM,sBAAsB,MAAM,OAAO;AAEzC,IAAM,kBAAkB;AAGxB,SAAS,UAAU,KAAa,KAAqB;AACnD,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,KAAK,GAAG,CAAC;AACvC;AAKA,SAAS,SAAS,KAAqB;AACrC,SAAO,IAAI,QAAQ,0CAA0C,EAAE;AACjE;AAEA,eAAsB,kBAAkB,QAAsC;AAC5E,MAAI;AAEJ,MAAI;AACF,UAAM,MAAM,MAAM,UAAU,MAAM;AAAA,EACpC,QAAQ;AAEN,WAAO,qBAAqB,MAAM;AAAA,EACpC;AAGA,MAAI,aAAa;AACjB,MAAI,QAAQ,MAAM;AAAE;AAAA,EAAa,CAAC;AAClC,MAAI,aAAa,gBAAiB,OAAM,IAAI,MAAM,oEAA4B;AAE9E,QAAM,eAAe,MAAM,oBAAoB,GAAG;AAClD,MAAI,aAAa,WAAW,EAAG,OAAM,IAAI,MAAM,+FAAyB;AAExE,MAAI,oBAAoB;AACxB,QAAM,SAAoB,CAAC;AAC3B,aAAW,QAAQ,cAAc;AAC/B,UAAM,OAAO,IAAI,KAAK,IAAI;AAC1B,QAAI,CAAC,KAAM;AACX,UAAM,MAAM,MAAM,KAAK,MAAM,MAAM;AACnC,yBAAqB,IAAI,SAAS;AAClC,QAAI,oBAAoB,oBAAqB,OAAM,IAAI,MAAM,iFAA+B;AAC5F,WAAO,KAAK,GAAG,gBAAgB,GAAG,CAAC;AAAA,EACrC;AACA,SAAO,iBAAiB,MAAM;AAChC;AAIA,SAAS,qBAAqB,QAA6B;AACzD,QAAM,OAAO,IAAI,WAAW,MAAM;AAClC,QAAM,OAAO,IAAI,SAAS,MAAM;AAChC,MAAI,MAAM;AACV,QAAM,QAAkB,CAAC;AACzB,MAAI,oBAAoB;AACxB,MAAI,aAAa;AAEjB,SAAO,MAAM,KAAK,SAAS,IAAI;AAE7B,QAAI,KAAK,GAAG,MAAM,MAAQ,KAAK,MAAM,CAAC,MAAM,MAAQ,KAAK,MAAM,CAAC,MAAM,KAAQ,KAAK,MAAM,CAAC,MAAM,EAAM;AAEtG,QAAI,EAAE,aAAa,gBAAiB;AAEpC,UAAM,SAAS,KAAK,UAAU,MAAM,GAAG,IAAI;AAC3C,UAAM,WAAW,KAAK,UAAU,MAAM,IAAI,IAAI;AAC9C,UAAM,UAAU,KAAK,UAAU,MAAM,IAAI,IAAI;AAC7C,UAAM,WAAW,KAAK,UAAU,MAAM,IAAI,IAAI;AAG9C,QAAI,UAAU,QAAQ,WAAW,OAAO;AAAE,aAAO,KAAK,UAAU;AAAU;AAAA,IAAS;AAEnF,UAAM,YAAY,MAAM,KAAK,UAAU;AAEvC,QAAI,YAAY,WAAW,KAAK,OAAQ;AACxC,QAAI,aAAa,KAAK,WAAW,GAAG;AAAE,YAAM;AAAW;AAAA,IAAS;AAEhE,UAAM,YAAY,KAAK,MAAM,MAAM,IAAI,MAAM,KAAK,OAAO;AACzD,UAAM,OAAO,IAAI,YAAY,EAAE,OAAO,SAAS;AAG/C,UAAM,iBAAiB,KAAK,QAAQ,OAAO,GAAG;AAC9C,QAAI,eAAe,SAAS,IAAI,KAAK,eAAe,WAAW,GAAG,KAAK,aAAa,KAAK,cAAc,GAAG;AAAE,YAAM,YAAY;AAAU;AAAA,IAAS;AACjJ,UAAM,WAAW,KAAK,MAAM,WAAW,YAAY,QAAQ;AAC3D,UAAM,YAAY;AAElB,QAAI,CAAC,KAAK,YAAY,EAAE,SAAS,SAAS,KAAK,CAAC,KAAK,SAAS,MAAM,EAAG;AAEvE,QAAI;AACF,UAAI;AACJ,UAAI,WAAW,GAAG;AAChB,kBAAU,IAAI,YAAY,EAAE,OAAO,QAAQ;AAAA,MAC7C,WAAW,WAAW,GAAG;AACvB,cAAM,eAAe,eAAe,OAAO,KAAK,QAAQ,GAAG,EAAE,iBAAiB,oBAAoB,CAAC;AACnG,kBAAU,IAAI,YAAY,EAAE,OAAO,YAAY;AAAA,MACjD,OAAO;AACL;AAAA,MACF;AACA,2BAAqB,QAAQ,SAAS;AACtC,UAAI,oBAAoB,oBAAqB,OAAM,IAAI,MAAM,qDAAa;AAC1E,YAAM,cAAc,iBAAiB,gBAAgB,OAAO,CAAC;AAC7D,UAAI,YAAa,OAAM,KAAK,WAAW;AAAA,IACzC,QAAQ;AACN;AAAA,IACF;AAAA,EACF;AAEA,MAAI,MAAM,WAAW,EAAG,OAAM,IAAI,MAAM,8HAA+B;AACvE,SAAO,MAAM,KAAK,MAAM;AAC1B;AAIA,eAAe,oBAAoB,KAA+B;AAChE,QAAM,gBAAgB,CAAC,wBAAwB,aAAa;AAC5D,aAAW,MAAM,eAAe;AAC9B,UAAM,UAAU,GAAG,YAAY;AAC/B,UAAM,OAAO,IAAI,KAAK,EAAE,KAAK,OAAO,OAAO,IAAI,KAAK,EAAE,KAAK,OAAK,EAAE,KAAK,YAAY,MAAM,OAAO,KAAK;AACrG,QAAI,CAAC,KAAM;AACX,UAAM,MAAM,MAAM,KAAK,MAAM,MAAM;AACnC,UAAM,QAAQ,8BAA8B,GAAG;AAC/C,QAAI,MAAM,SAAS,EAAG,QAAO;AAAA,EAC/B;AAGA,QAAM,eAAe,IAAI,KAAK,qBAAqB;AACnD,SAAO,aAAa,IAAI,OAAK,EAAE,IAAI,EAAE,KAAK;AAC5C;AAEA,SAAS,8BAA8B,KAAuB;AAC5D,QAAM,SAAS,IAAI,UAAU;AAC7B,QAAM,MAAM,OAAO,gBAAgB,SAAS,GAAG,GAAG,UAAU;AAC5D,QAAM,QAAQ,IAAI,qBAAqB,UAAU;AACjD,QAAM,QAAQ,IAAI,qBAAqB,aAAa;AAEpD,QAAM,cAAc,CAAC,OAAe,MAAM,KAAK,EAAE,KAAK,GAAG,YAAY,EAAE,SAAS,SAAS;AACzF,QAAM,WAAW,oBAAI,IAAoB;AACzC,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,UAAM,OAAO,MAAM,CAAC;AACpB,UAAM,KAAK,KAAK,aAAa,IAAI,KAAK;AACtC,QAAI,OAAO,KAAK,aAAa,MAAM,KAAK;AACxC,UAAM,YAAY,KAAK,aAAa,YAAY,KAAK;AACrD,QAAI,CAAC,YAAY,EAAE,KAAK,CAAC,UAAU,SAAS,KAAK,EAAG;AACpD,QAAI,CAAC,KAAK,WAAW,GAAG,KAAK,CAAC,KAAK,WAAW,WAAW,KAAK,YAAY,EAAE;AAC1E,aAAO,cAAc;AACvB,aAAS,IAAI,IAAI,IAAI;AAAA,EACvB;AAEA,MAAI,MAAM,SAAS,GAAG;AACpB,UAAM,UAAoB,CAAC;AAC3B,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,YAAM,OAAO,SAAS,IAAI,MAAM,CAAC,EAAE,aAAa,OAAO,KAAK,EAAE;AAC9D,UAAI,KAAM,SAAQ,KAAK,IAAI;AAAA,IAC7B;AACA,QAAI,QAAQ,SAAS,EAAG,QAAO;AAAA,EACjC;AACA,SAAO,MAAM,KAAK,SAAS,QAAQ,CAAC,EACjC,OAAO,CAAC,CAAC,EAAE,MAAM,YAAY,EAAE,CAAC,EAChC,KAAK,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,EACvC,IAAI,CAAC,CAAC,EAAE,IAAI,MAAM,IAAI;AAC3B;AAIA,SAAS,gBAAgB,KAAwB;AAC/C,QAAM,SAAS,IAAI,UAAU;AAC7B,QAAM,MAAM,OAAO,gBAAgB,SAAS,GAAG,GAAG,UAAU;AAC5D,MAAI,CAAC,IAAI,gBAAiB,QAAO,CAAC;AAElC,QAAM,SAAoB,CAAC;AAC3B,cAAY,IAAI,iBAAiB,QAAQ,MAAM,CAAC,CAAC;AACjD,SAAO;AACT;AAEA,SAAS,YACP,MAAY,QACZ,UAA6B,YACvB;AACN,QAAM,WAAW,KAAK;AACtB,MAAI,CAAC,SAAU;AAEf,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,KAAK,SAAS,CAAC;AACrB,QAAI,GAAG,aAAa,EAAG;AAEvB,UAAM,MAAM,GAAG,WAAW,GAAG,aAAa;AAC1C,UAAM,WAAW,IAAI,QAAQ,WAAW,EAAE;AAE1C,YAAQ,UAAU;AAAA,MAChB,KAAK,OAAO;AACV,YAAI,SAAU,YAAW,KAAK,QAAQ;AACtC,cAAM,WAAuB,EAAE,MAAM,CAAC,GAAG,YAAY,CAAC,GAAG,MAAM,KAAK;AACpE,oBAAY,IAAI,QAAQ,UAAU,UAAU;AAE5C,YAAI,SAAS,KAAK,SAAS,GAAG;AAC5B,cAAI,WAAW,SAAS,GAAG;AACzB,kBAAM,cAAc,WAAW,IAAI;AACnC,kBAAM,aAAa,mBAAmB,SAAS,IAAI;AACnD,gBAAI,YAAY,MAAM;AACpB,0BAAY,KAAK,SAAS,YAAY,KAAK,OAAO,OAAO,MAAM;AAAA,YACjE;AACA,uBAAW;AAAA,UACb,OAAO;AACL,mBAAO,KAAK,EAAE,MAAM,SAAS,OAAO,WAAW,SAAS,IAAI,EAAE,CAAC;AAC/D,uBAAW;AAAA,UACb;AAAA,QACF,OAAO;AACL,qBAAW,WAAW,SAAS,IAAI,WAAW,IAAI,IAAK;AAAA,QACzD;AACA;AAAA,MACF;AAAA,MAEA,KAAK;AACH,YAAI,UAAU;AACZ,mBAAS,aAAa,CAAC;AACvB,sBAAY,IAAI,QAAQ,UAAU,UAAU;AAC5C,cAAI,SAAS,WAAW,SAAS,EAAG,UAAS,KAAK,KAAK,SAAS,UAAU;AAC1E,mBAAS,aAAa,CAAC;AAAA,QACzB;AACA;AAAA,MAEF,KAAK;AACH,YAAI,UAAU;AACZ,mBAAS,OAAO,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE;AACnD,sBAAY,IAAI,QAAQ,UAAU,UAAU;AAC5C,cAAI,SAAS,MAAM;AACjB,qBAAS,WAAW,KAAK,SAAS,IAAI;AACtC,qBAAS,OAAO;AAAA,UAClB;AAAA,QACF;AACA;AAAA,MAEF,KAAK;AACH,YAAI,UAAU,MAAM;AAClB,gBAAM,KAAK,SAAS,GAAG,aAAa,SAAS,KAAK,KAAK,EAAE;AACzD,gBAAM,KAAK,SAAS,GAAG,aAAa,SAAS,KAAK,KAAK,EAAE;AACzD,mBAAS,KAAK,UAAU,UAAU,IAAI,QAAQ;AAC9C,mBAAS,KAAK,UAAU,UAAU,IAAI,QAAQ;AAAA,QAChD;AACA;AAAA,MAEF,KAAK,KAAK;AACR,cAAM,OAAO,qBAAqB,EAAE;AACpC,YAAI,MAAM;AACR,cAAI,UAAU,MAAM;AAClB,qBAAS,KAAK,SAAS,SAAS,KAAK,OAAO,OAAO,MAAM;AAAA,UAC3D,WAAW,CAAC,UAAU;AACpB,mBAAO,KAAK,EAAE,MAAM,aAAa,KAAK,CAAC;AAAA,UACzC;AAAA,QACF;AACA,oBAAY,IAAI,QAAQ,UAAU,UAAU;AAC5C;AAAA,MACF;AAAA,MAEA;AACE,oBAAY,IAAI,QAAQ,UAAU,UAAU;AAC5C;AAAA,IACJ;AAAA,EACF;AACF;AAEA,SAAS,qBAAqB,MAAoB;AAChD,MAAI,OAAO;AACX,QAAM,OAAO,CAAC,SAAe;AAC3B,UAAM,WAAW,KAAK;AACtB,QAAI,CAAC,SAAU;AACf,aAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,YAAM,QAAQ,SAAS,CAAC;AACxB,UAAI,MAAM,aAAa,GAAG;AAAE,gBAAQ,MAAM,eAAe;AAAI;AAAA,MAAS;AACtE,UAAI,MAAM,aAAa,EAAG;AAE1B,YAAM,OAAO,MAAM,WAAW,MAAM,aAAa,IAAI,QAAQ,WAAW,EAAE;AAC1E,cAAQ,KAAK;AAAA,QACX,KAAK;AAAK,kBAAQ,MAAM,eAAe;AAAI;AAAA,QAC3C,KAAK;AAAO,kBAAQ;AAAM;AAAA,QAC1B,KAAK;AACH,eAAK,MAAM,aAAa,MAAM,KAAK,YAAY,OAAQ,SAAQ;AAC/D;AAAA,QACF,KAAK;AAAA,QAAW,KAAK;AAAW,kBAAQ;AAAK;AAAA,QAC7C,KAAK;AAAO;AAAA;AAAA,QACZ;AAAS,eAAK,KAAK;AAAG;AAAA,MACxB;AAAA,IACF;AAAA,EACF;AACA,OAAK,IAAI;AACT,SAAO,KAAK,QAAQ,WAAW,GAAG,EAAE,KAAK;AAC3C;;;AExSA,SAAS,kBAAAA,iBAAgB,mBAAmB;AAIrC,IAAM,kBAAkB;AACxB,IAAM,gBAAgB;AACtB,IAAM,kBAAkB;AACxB,IAAM,kBAAkB;AACxB,IAAM,YAAY;AAIzB,IAAM,YAAY;AAClB,IAAM,YAAY;AAClB,IAAM,WAAW;AACjB,IAAM,cAAc;AACpB,IAAM,YAAY;AAClB,IAAM,kBAAkB;AAGjB,IAAM,kBAAkB,KAAK;AAC7B,IAAM,iBAAiB,KAAK;AAC5B,IAAM,WAAW,KAAK;AAoB7B,IAAM,cAAc;AAEb,SAAS,YAAY,MAA2B;AACrD,QAAM,UAAuB,CAAC;AAC9B,MAAI,SAAS;AAEb,SAAO,SAAS,KAAK,KAAK,UAAU,QAAQ,SAAS,aAAa;AAChE,UAAM,SAAS,KAAK,aAAa,MAAM;AACvC,cAAU;AAEV,UAAM,QAAQ,SAAS;AACvB,UAAM,QAAS,UAAU,KAAM;AAC/B,QAAI,OAAQ,UAAU,KAAM;AAG5B,QAAI,SAAS,MAAO;AAClB,UAAI,SAAS,IAAI,KAAK,OAAQ;AAC9B,aAAO,KAAK,aAAa,MAAM;AAC/B,gBAAU;AAAA,IACZ;AAEA,QAAI,SAAS,OAAO,KAAK,OAAQ;AACjC,YAAQ,KAAK,EAAE,OAAO,OAAO,MAAM,MAAM,KAAK,SAAS,QAAQ,SAAS,IAAI,EAAE,CAAC;AAC/E,cAAU;AAAA,EACZ;AAEA,SAAO;AACT;AAKA,IAAMC,uBAAsB,MAAM,OAAO;AAElC,SAAS,iBAAiB,MAAsB;AACrD,QAAM,OAAO,EAAE,iBAAiBA,qBAAoB;AACpD,MAAI,KAAK,UAAU,KAAK,KAAK,CAAC,MAAM,KAAM;AACxC,QAAI;AAAE,aAAO,YAAY,MAAM,IAAI;AAAA,IAAE,QAAQ;AAAA,IAAwB;AAAA,EACvE;AACA,SAAOD,gBAAe,MAAM,IAAI;AAClC;AAIO,SAAS,gBAAgB,MAA6B;AAC3D,MAAI,KAAK,SAAS,GAAI,OAAM,IAAI,MAAM,4FAAgC;AACtE,QAAM,MAAM,KAAK,SAAS,GAAG,EAAE,EAAE,SAAS,MAAM,EAAE,QAAQ,QAAQ,EAAE;AACpE,SAAO;AAAA,IACL,WAAW;AAAA,IACX,cAAc,KAAK,EAAE;AAAA,IACrB,OAAO,KAAK,aAAa,EAAE;AAAA,EAC7B;AACF;AAIO,SAAS,YAAY,MAAsB;AAChD,MAAI,SAAS;AACb,MAAI,IAAI;AAER,SAAO,IAAI,IAAI,KAAK,QAAQ;AAC1B,UAAM,KAAK,KAAK,aAAa,CAAC;AAC9B,SAAK;AAEL,YAAQ,IAAI;AAAA,MACV,KAAK;AAAW,kBAAU;AAAM;AAAA,MAChC,KAAK;AAAW;AAAA,MAChB,KAAK;AAAU,kBAAU;AAAM;AAAA,MAC/B,KAAK;AAAa,kBAAU;AAAK;AAAA,MACjC,KAAK;AAAA,MAAW,KAAK;AAAiB,kBAAU;AAAK;AAAA,MACrD;AACE,YAAI,MAAM,KAAU,MAAM,IAAQ;AAChC,gBAAM,QAAS,MAAM,KAAK,MAAM,KAAO,MAAM,MAAM,MAAM,MAAQ,MAAM,MAAM,MAAM;AACnF,gBAAM,WAAY,MAAM,KAAK,MAAM,KAAO,MAAM,MAAM,MAAM;AAC5D,eAAK,SAAS,aAAa,IAAI,MAAM,KAAK,OAAQ,MAAK;AAAA,QACzD,WAAW,MAAM,IAAQ;AAEvB,cAAI,MAAM,SAAU,MAAM,SAAU,IAAI,IAAI,KAAK,QAAQ;AACvD,kBAAM,KAAK,KAAK,aAAa,CAAC;AAC9B,gBAAI,MAAM,SAAU,MAAM,OAAQ;AAChC,mBAAK;AACL,oBAAM,aAAc,KAAK,SAAW,OAAO,KAAK,SAAU;AAC1D,wBAAU,OAAO,cAAc,SAAS;AACxC;AAAA,YACF;AAAA,UACF;AACA,oBAAU,OAAO,aAAa,EAAE;AAAA,QAClC;AACA;AAAA,IACJ;AAAA,EACF;AAEA,SAAO;AACT;;;AC9HA,SAAS,qBAAqB;AAC9B,IAAME,WAAU,cAAc,YAAY,GAAG;AAC7C,IAAM,MAAiBA,SAAQ,KAAK;AAUpC,IAAM,eAAe;AAErB,IAAM,uBAAuB,MAAM,OAAO;AAEnC,SAAS,kBAAkB,QAAwB;AACxD,QAAM,MAAM,IAAI,MAAM,MAAM;AAE5B,QAAM,cAAc,IAAI,KAAK,KAAK,aAAa;AAC/C,MAAI,CAAC,aAAa,QAAS,OAAM,IAAI,MAAM,4CAAmB;AAC9D,QAAM,SAAS,gBAAgB,OAAO,KAAK,YAAY,OAAO,CAAC;AAC/D,MAAI,OAAO,cAAc,oBAAqB,OAAM,IAAI,MAAM,iDAAc;AAC5E,MAAI,OAAO,QAAQ,eAAgB,OAAM,IAAI,MAAM,sFAAqB;AACxE,MAAI,OAAO,QAAQ,SAAU,OAAM,IAAI,MAAM,oFAAwB;AACrE,QAAM,cAAc,OAAO,QAAQ,qBAAqB;AAExD,QAAM,WAAW,aAAa,GAAG;AACjC,MAAI,SAAS,WAAW,EAAG,OAAM,IAAI,MAAM,oFAAmB;AAE9D,QAAM,SAAoB,CAAC;AAC3B,MAAI,oBAAoB;AACxB,aAAW,eAAe,UAAU;AAClC,UAAM,OAAO,aAAa,iBAAiB,OAAO,KAAK,WAAW,CAAC,IAAI,OAAO,KAAK,WAAW;AAC9F,yBAAqB,KAAK;AAC1B,QAAI,oBAAoB,qBAAsB,OAAM,IAAI,MAAM,8FAAuC;AACrG,UAAM,UAAU,YAAY,IAAI;AAChC,WAAO,KAAK,GAAG,aAAa,OAAO,CAAC;AAAA,EACtC;AAEA,SAAO,iBAAiB,MAAM;AAChC;AAEA,SAAS,aAAa,KAA6B;AACjD,QAAM,WAAoD,CAAC;AAE3D,WAAS,IAAI,GAAG,IAAI,cAAc,KAAK;AACrC,UAAM,QAAQ,IAAI,KAAK,KAAK,oBAAoB,CAAC,EAAE;AACnD,QAAI,CAAC,OAAO,QAAS;AACrB,aAAS,KAAK,EAAE,KAAK,GAAG,SAAS,OAAO,KAAK,MAAM,OAAO,EAAE,CAAC;AAAA,EAC/D;AAEA,MAAI,SAAS,WAAW,KAAK,IAAI,WAAW;AAC1C,eAAW,SAAS,IAAI,WAAW;AACjC,UAAI,SAAS,UAAU,aAAc;AACrC,UAAI,MAAM,MAAM,WAAW,SAAS,KAAK,MAAM,SAAS;AACtD,cAAM,MAAM,SAAS,MAAM,KAAK,QAAQ,WAAW,EAAE,GAAG,EAAE,KAAK;AAC/D,iBAAS,KAAK,EAAE,KAAK,SAAS,OAAO,KAAK,MAAM,OAAO,EAAE,CAAC;AAAA,MAC5D;AAAA,IACF;AAAA,EACF;AAEA,SAAO,SAAS,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,OAAK,EAAE,OAAO;AAClE;AAEA,SAAS,aAAa,SAAiC;AACrD,QAAM,SAAoB,CAAC;AAC3B,MAAI,IAAI;AAER,SAAO,IAAI,QAAQ,QAAQ;AACzB,UAAM,MAAM,QAAQ,CAAC;AAErB,QAAI,IAAI,UAAU,mBAAmB,IAAI,UAAU,GAAG;AACpD,YAAM,EAAE,WAAW,QAAQ,QAAQ,IAAI,yBAAyB,SAAS,CAAC;AAC1E,UAAI,UAAW,QAAO,KAAK,EAAE,MAAM,aAAa,MAAM,UAAU,CAAC;AACjE,iBAAW,KAAK,OAAQ,QAAO,KAAK,EAAE,MAAM,SAAS,OAAO,EAAE,CAAC;AAC/D,UAAI;AACJ;AAAA,IACF;AAEA,QAAI,IAAI,UAAU,mBAAmB,IAAI,SAAS,KAAK,IAAI,KAAK,UAAU,GAAG;AAC3E,YAAM,SAAS,IAAI,KAAK,SAAS,GAAG,CAAC,EAAE,SAAS,OAAO;AACvD,UAAI,WAAW,UAAU,WAAW,QAAQ;AAC1C,cAAM,EAAE,OAAO,QAAQ,IAAI,gBAAgB,SAAS,CAAC;AACrD,YAAI,MAAO,QAAO,KAAK,EAAE,MAAM,SAAS,MAAM,CAAC;AAC/C,YAAI;AACJ;AAAA,MACF;AAAA,IACF;AAEA;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,yBAAyB,SAAsB,UAAkB;AACxE,QAAM,aAAa,QAAQ,QAAQ,EAAE;AACrC,MAAI,OAAO;AACX,QAAM,SAA0C,CAAC;AACjD,MAAI,IAAI,WAAW;AAEnB,SAAO,IAAI,QAAQ,QAAQ;AACzB,UAAM,MAAM,QAAQ,CAAC;AACrB,QAAI,IAAI,UAAU,mBAAmB,IAAI,SAAS,WAAY;AAE9D,QAAI,IAAI,UAAU,eAAe;AAC/B,aAAO,YAAY,IAAI,IAAI;AAAA,IAC7B;AAEA,QAAI,IAAI,UAAU,mBAAmB,IAAI,KAAK,UAAU,GAAG;AACzD,YAAM,SAAS,IAAI,KAAK,SAAS,GAAG,CAAC,EAAE,SAAS,OAAO;AACvD,UAAI,WAAW,UAAU,WAAW,QAAQ;AAC1C,cAAM,EAAE,OAAO,QAAQ,IAAI,gBAAgB,SAAS,CAAC;AACrD,YAAI,MAAO,QAAO,KAAK,KAAK;AAC5B,YAAI;AACJ;AAAA,MACF;AAAA,IACF;AACA;AAAA,EACF;AAEA,QAAM,UAAU,KAAK,KAAK;AAC1B,SAAO,EAAE,WAAW,WAAW,MAAM,QAAQ,SAAS,EAAE;AAC1D;AAEA,SAAS,gBAAgB,SAAsB,UAAkB;AAC/D,QAAM,aAAa,QAAQ,QAAQ,EAAE;AACrC,MAAI,IAAI,WAAW;AACnB,MAAI,OAAO,GAAG,OAAO;AACrB,QAAM,QAAuB,CAAC;AAE9B,SAAO,IAAI,QAAQ,QAAQ;AACzB,UAAM,MAAM,QAAQ,CAAC;AACrB,QAAI,IAAI,UAAU,mBAAmB,IAAI,SAAS,WAAY;AAC9D,QAAI,IAAI,UAAU,mBAAmB,IAAI,SAAS,WAAY;AAE9D,QAAI,IAAI,UAAU,aAAa,IAAI,KAAK,UAAU,GAAG;AACnD,aAAO,KAAK,IAAI,IAAI,KAAK,aAAa,CAAC,GAAG,QAAQ;AAClD,aAAO,KAAK,IAAI,IAAI,KAAK,aAAa,CAAC,GAAG,QAAQ;AAAA,IACpD;AAEA,QAAI,IAAI,UAAU,iBAAiB;AACjC,YAAM,EAAE,MAAM,QAAQ,IAAI,eAAe,SAAS,GAAG,UAAU;AAC/D,UAAI,KAAM,OAAM,KAAK,IAAI;AACzB,UAAI;AACJ;AAAA,IACF;AACA;AAAA,EACF;AAEA,MAAI,SAAS,KAAK,SAAS,KAAK,MAAM,WAAW,EAAG,QAAO,EAAE,OAAO,MAAM,SAAS,EAAE;AAErF,QAAM,WAAW,aAAa,MAAM,MAAM,KAAK;AAC/C,SAAO,EAAE,OAAO,WAAW,QAAQ,GAAG,SAAS,EAAE;AACnD;AAEA,SAAS,eAAe,SAAsB,UAAkB,YAAoB;AAClF,QAAM,MAAM,QAAQ,QAAQ;AAC5B,QAAM,YAAY,IAAI;AACtB,QAAM,QAAkB,CAAC;AAIzB,MAAI,UAAU;AACd,MAAI,UAAU;AACd,MAAI,IAAI,KAAK,UAAU,IAAI;AACzB,UAAM,KAAK,IAAI,KAAK,aAAa,EAAE;AACnC,UAAM,KAAK,IAAI,KAAK,aAAa,EAAE;AACnC,QAAI,KAAK,EAAG,WAAU,KAAK,IAAI,IAAI,QAAQ;AAC3C,QAAI,KAAK,EAAG,WAAU,KAAK,IAAI,IAAI,QAAQ;AAAA,EAC7C;AAEA,MAAI,IAAI,WAAW;AAEnB,SAAO,IAAI,QAAQ,QAAQ;AACzB,UAAM,IAAI,QAAQ,CAAC;AACnB,QAAI,EAAE,UAAU,mBAAmB,EAAE,SAAS,UAAW;AACzD,QAAI,EAAE,SAAS,eAAe,EAAE,UAAU,mBAAmB,EAAE,UAAU,iBAAkB;AAE3F,QAAI,EAAE,UAAU,eAAe;AAC7B,YAAM,IAAI,YAAY,EAAE,IAAI,EAAE,KAAK;AACnC,UAAI,EAAG,OAAM,KAAK,CAAC;AAAA,IACrB;AACA;AAAA,EACF;AAEA,SAAO,EAAE,MAAM,EAAE,MAAM,MAAM,KAAK,IAAI,GAAG,SAAS,QAAQ,GAAkB,SAAS,EAAE;AACzF;AAEA,SAAS,aAAa,MAAc,MAAc,OAAuC;AACvF,QAAM,OAAiC,MAAM,KAAK,EAAE,QAAQ,KAAK,GAAG,MAAM,MAAM,IAAI,EAAE,KAAK,IAAI,CAAC;AAChG,MAAI,UAAU;AAEd,WAAS,IAAI,GAAG,IAAI,QAAQ,UAAU,MAAM,QAAQ,KAAK;AACvD,aAAS,IAAI,GAAG,IAAI,QAAQ,UAAU,MAAM,QAAQ,KAAK;AACvD,UAAI,KAAK,CAAC,EAAE,CAAC,MAAM,KAAM;AACzB,YAAM,OAAO,MAAM,SAAS;AAC5B,WAAK,CAAC,EAAE,CAAC,IAAI;AAEb,eAAS,KAAK,GAAG,KAAK,KAAK,SAAS,MAAM;AACxC,iBAAS,KAAK,GAAG,KAAK,KAAK,SAAS,MAAM;AACxC,cAAI,OAAO,KAAK,OAAO,EAAG;AAC1B,cAAI,IAAI,KAAK,QAAQ,IAAI,KAAK;AAC5B,iBAAK,IAAI,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE;AAAA,QAC9D;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO,KAAK,IAAI,SAAO,IAAI,IAAI,OAAK,KAAK,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE,CAAC,CAAC;AAChF;;;ACrNA,SAAS,iBAAAC,sBAAqB;AAC9B,SAAS,qBAAqB;AAL9B,IAAM,YAAY;AAElB,IAAM,iBAAiB,MAAM,OAAO;AAyBpC,IAAI,cAAkC;AAEtC,eAAe,YAAyC;AACtD,MAAI,YAAa,QAAO;AACxB,MAAI;AACF,UAAM,MAAM,MAAM,OAAO,iCAAiC;AAE1D,UAAM,MAAMA,eAAc,YAAY,GAAG;AACzC,UAAM,aAAa,IAAI,QAAQ,wCAAwC;AACvE,QAAI,oBAAoB,YAAY,cAAc,UAAU,EAAE;AAC9D,kBAAc;AACd,WAAO;AAAA,EACT,SAAS,KAAK;AAEZ,UAAM,MAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,QAAI,IAAI,SAAS,aAAa,KAAK,IAAI,SAAS,kBAAkB,GAAG;AACnE,aAAO;AAAA,IACT;AACA,UAAM,IAAI,MAAM,yCAAqB,GAAG,EAAE;AAAA,EAC5C;AACF;AAEA,eAAsB,iBAAiB,QAA2C;AAChF,QAAM,QAAQ,MAAM,UAAU;AAC9B,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,MACL,SAAS;AAAA,MACT,UAAU;AAAA,MACV,WAAW;AAAA,MACX,OAAO;AAAA,IACT;AAAA,EACF;AAEA,QAAM,OAAO,IAAI,WAAW,MAAM;AAClC,QAAM,MAAM,MAAM,MAAM,YAAY;AAAA,IAClC;AAAA,IACA,gBAAgB;AAAA,IAChB,iBAAiB;AAAA,IACjB,iBAAiB;AAAA,EACnB,CAAC,EAAE;AAEH,MAAI;AACF,UAAM,YAAY,IAAI;AACtB,QAAI,cAAc,GAAG;AACnB,aAAO,EAAE,SAAS,OAAO,UAAU,OAAO,WAAW,GAAG,OAAO,+DAAkB;AAAA,IACnF;AAEA,UAAM,YAAsB,CAAC;AAC7B,QAAI,aAAa;AACjB,QAAI,iBAAiB;AACrB,UAAM,qBAAqB,KAAK,IAAI,WAAW,SAAS;AAExD,aAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,YAAM,OAAO,MAAM,IAAI,QAAQ,CAAC;AAChC,YAAM,cAAc,MAAM,KAAK,eAAe;AAC9C,YAAM,QAAQ,qBAAqB,YAAY,KAAK;AACpD,YAAM,WAAW,MAAM,KAAK,IAAI;AAChC,oBAAc,SAAS,QAAQ,OAAO,EAAE,EAAE;AAC1C,wBAAkB,SAAS,SAAS;AACpC,UAAI,iBAAiB,eAAgB,OAAM,IAAI,MAAM,8DAAiB,iBAAiB,OAAO,IAAI,kBAAQ;AAC1G,gBAAU,KAAK,QAAQ;AAAA,IACzB;AAEA,UAAM,kBAAkB,aAAa;AACrC,QAAI,kBAAkB,IAAI;AACxB,aAAO;AAAA,QACL,SAAS;AAAA,QACT,UAAU;AAAA,QACV;AAAA,QACA,cAAc;AAAA,QACd,OAAO,6EAAsB,SAAS,uDAAe,UAAU;AAAA,MACjE;AAAA,IACF;AAEA,QAAI,WAAW;AACf,aAAS,IAAI,GAAG,IAAI,UAAU,QAAQ,KAAK;AACzC,YAAM,UAAU,aAAa,UAAU,CAAC,CAAC;AACzC,UAAI,QAAQ,KAAK,GAAG;AAClB,YAAI,IAAI,KAAK,SAAU,aAAY;AACnC,oBAAY;AAAA,MACd;AAAA,IACF;AAEA,eAAW,kBAAkB,QAAQ;AAErC,UAAM,YAAY,YAAY;AAC9B,WAAO,EAAE,SAAS,MAAM,UAAU,OAAO,UAAU,WAAW,oBAAoB,cAAc,OAAO,GAAI,aAAa,EAAE,SAAS,aAAQ,SAAS,wCAAU,SAAS,4EAAgB,EAAG;AAAA,EAC5L,UAAE;AACA,UAAM,IAAI,QAAQ,EAAE,MAAM,MAAM;AAAA,IAAC,CAAC;AAAA,EACpC;AACF;AAIA,SAAS,qBAAqB,OAAkC;AAC9D,MAAI,MAAM,WAAW,EAAG,QAAO,CAAC;AAEhC,QAAM,YAAY,MAAM,OAAO,UAAQ,OAAO,KAAK,QAAQ,YAAY,KAAK,IAAI,KAAK,MAAM,EAAE;AAC7F,MAAI,UAAU,WAAW,EAAG,QAAO,CAAC;AAEpC,YAAU,KAAK,CAAC,GAAG,MAAM;AACvB,UAAM,QAAQ,EAAE,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC;AAC5C,QAAI,KAAK,IAAI,KAAK,IAAI,EAAG,QAAO,EAAE,UAAU,CAAC,IAAI,EAAE,UAAU,CAAC;AAC9D,WAAO;AAAA,EACT,CAAC;AAED,QAAM,QAAkB,CAAC;AACzB,MAAI,WAAW,UAAU,CAAC,EAAE,UAAU,CAAC;AACvC,MAAI,cAA4D,CAAC;AAEjE,aAAW,QAAQ,WAAW;AAC5B,UAAM,IAAI,KAAK,UAAU,CAAC;AAE1B,QAAI,KAAK,IAAI,WAAW,CAAC,IAAI,KAAK,IAAI,KAAK,SAAS,KAAK,CAAC,GAAG;AAC3D,UAAI,YAAY,SAAS,EAAG,OAAM,KAAK,eAAe,WAAW,CAAC;AAClE,oBAAc,CAAC;AACf,iBAAW;AAAA,IACb;AAEA,gBAAY,KAAK,EAAE,MAAM,KAAK,KAAK,GAAG,KAAK,UAAU,CAAC,GAAG,OAAO,KAAK,MAAM,CAAC;AAAA,EAC9E;AAEA,MAAI,YAAY,SAAS,EAAG,OAAM,KAAK,eAAe,WAAW,CAAC;AAClE,SAAO;AACT;AAEA,SAAS,eAAe,OAA6D;AACnF,MAAI,MAAM,UAAU,EAAG,QAAO,MAAM,CAAC,GAAG,QAAQ;AAChD,QAAM,KAAK,CAAC,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;AAE9B,MAAI,SAAS,MAAM,CAAC,EAAE;AACtB,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,UAAM,MAAM,MAAM,CAAC,EAAE,KAAK,MAAM,IAAI,CAAC,EAAE,IAAI,MAAM,IAAI,CAAC,EAAE;AACxD,QAAI,MAAM,GAAI,WAAU;AAAA,aACf,MAAM,EAAG,WAAU;AAC5B,cAAU,MAAM,CAAC,EAAE;AAAA,EACrB;AACA,SAAO;AACT;AAEO,SAAS,aAAa,MAAsB;AACjD,SAAO,KACJ,QAAQ,qCAAqC,EAAE,EAC/C,QAAQ,4BAA4B,EAAE,EACtC,QAAQ,0BAA0B,OAAO,EACzC,QAAQ,WAAW,MAAM,EACzB,KAAK;AACV;AAEA,SAAS,kBAAkB,MAAsB;AAC/C,QAAM,QAAQ,KAAK,MAAM,IAAI;AAC7B,QAAM,SAAmB,CAAC;AAC1B,MAAI,cAA0B,CAAC;AAE/B,aAAW,QAAQ,OAAO;AACxB,QAAI,KAAK,SAAS,GAAI,GAAG;AACvB,kBAAY,KAAK,KAAK,MAAM,GAAI,EAAE,IAAI,OAAK,EAAE,KAAK,CAAC,CAAC;AAAA,IACtD,OAAO;AACL,UAAI,YAAY,UAAU,EAAG,QAAO,KAAK,sBAAsB,WAAW,CAAC;AAAA,eAClE,YAAY,WAAW,EAAG,QAAO,KAAK,YAAY,CAAC,EAAE,KAAK,KAAK,CAAC;AACzE,oBAAc,CAAC;AACf,aAAO,KAAK,IAAI;AAAA,IAClB;AAAA,EACF;AAEA,MAAI,YAAY,UAAU,EAAG,QAAO,KAAK,sBAAsB,WAAW,CAAC;AAAA,WAClE,YAAY,WAAW,EAAG,QAAO,KAAK,YAAY,CAAC,EAAE,KAAK,KAAK,CAAC;AAEzE,SAAO,OAAO,KAAK,IAAI;AACzB;AAEA,SAAS,sBAAsB,MAA0B;AACvD,QAAM,UAAU,KAAK,IAAI,GAAG,KAAK,IAAI,OAAK,EAAE,MAAM,CAAC;AAEnD,QAAM,aAAa,KAAK,IAAI,OAAK;AAC/B,UAAM,OAAO,CAAC,GAAG,CAAC;AAClB,WAAO,KAAK,SAAS,QAAS,MAAK,KAAK,EAAE;AAC1C,WAAO;AAAA,EACT,CAAC;AAED,QAAM,QAAkB,CAAC;AACzB,QAAM,KAAK,OAAO,WAAW,CAAC,EAAE,KAAK,KAAK,IAAI,IAAI;AAClD,QAAM,KAAK,OAAO,WAAW,CAAC,EAAE,IAAI,MAAM,KAAK,EAAE,KAAK,KAAK,IAAI,IAAI;AACnE,WAAS,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;AAC1C,UAAM,KAAK,OAAO,WAAW,CAAC,EAAE,KAAK,KAAK,IAAI,IAAI;AAAA,EACpD;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;;;ACnMA,eAAsB,MAAM,QAA2C;AACrE,MAAI,CAAC,UAAU,OAAO,eAAe,GAAG;AACtC,WAAO,EAAE,SAAS,OAAO,UAAU,WAAW,OAAO,8GAAyB;AAAA,EAChF;AACA,QAAM,SAAS,aAAa,MAAM;AAElC,UAAQ,QAAQ;AAAA,IACd,KAAK;AACH,aAAO,UAAU,MAAM;AAAA,IACzB,KAAK;AACH,aAAO,SAAS,MAAM;AAAA,IACxB,KAAK;AACH,aAAO,SAAS,MAAM;AAAA,IACxB;AACE,aAAO,EAAE,SAAS,OAAO,UAAU,WAAW,OAAO,qFAAoB;AAAA,EAC7E;AACF;AAKA,eAAsB,UAAU,QAA2C;AACzE,MAAI;AACF,UAAM,WAAW,MAAM,kBAAkB,MAAM;AAC/C,WAAO,EAAE,SAAS,MAAM,UAAU,QAAQ,SAAS;AAAA,EACrD,SAAS,KAAK;AACZ,WAAO,EAAE,SAAS,OAAO,UAAU,QAAQ,OAAO,eAAe,QAAQ,IAAI,UAAU,iCAAa;AAAA,EACtG;AACF;AAGA,eAAsB,SAAS,QAA2C;AACxE,MAAI;AACF,UAAM,WAAW,kBAAkB,OAAO,KAAK,MAAM,CAAC;AACtD,WAAO,EAAE,SAAS,MAAM,UAAU,OAAO,SAAS;AAAA,EACpD,SAAS,KAAK;AACZ,WAAO,EAAE,SAAS,OAAO,UAAU,OAAO,OAAO,eAAe,QAAQ,IAAI,UAAU,gCAAY;AAAA,EACpG;AACF;AAGA,eAAsB,SAAS,QAA2C;AACxE,MAAI;AACF,WAAO,MAAM,iBAAiB,MAAM;AAAA,EACtC,SAAS,KAAK;AACZ,WAAO,EAAE,SAAS,OAAO,UAAU,OAAO,OAAO,eAAe,QAAQ,IAAI,UAAU,gCAAY;AAAA,EACpG;AACF;","names":["inflateRawSync","MAX_DECOMPRESS_SIZE","require","createRequire"]}
package/dist/cli.js CHANGED
@@ -4,7 +4,7 @@ import {
4
4
  detectFormat,
5
5
  parse,
6
6
  toArrayBuffer
7
- } from "./chunk-KZMWHK72.js";
7
+ } from "./chunk-KT5X6QUZ.js";
8
8
 
9
9
  // src/cli.ts
10
10
  import { readFileSync, writeFileSync, mkdirSync, statSync } from "fs";
@@ -65,3 +65,4 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
65
65
  }
66
66
  });
67
67
  program.parse();
68
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\n\nimport { readFileSync, writeFileSync, mkdirSync, statSync } from \"fs\"\nimport { basename, resolve } from \"path\"\nimport { Command } from \"commander\"\nimport { parse, detectFormat } from \"./index.js\"\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\n\nconst program = new Command()\n\nprogram\n .name(\"kordoc\")\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF → Markdown\")\n .version(VERSION)\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF)\")\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (files: string[], opts) => {\n for (const filePath of files) {\n const absPath = resolve(filePath)\n const fileName = basename(absPath)\n\n try {\n const fileSize = statSync(absPath).size\n if (fileSize > 500 * 1024 * 1024) {\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\n process.exitCode = 1\n continue\n }\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const format = detectFormat(arrayBuffer)\n\n if (!opts.silent) {\n process.stderr.write(`[kordoc] ${fileName} (${format}) ...`)\n }\n\n const result = await parse(arrayBuffer)\n\n if (!result.success) {\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${result.error}\\n`)\n process.exitCode = 1\n continue\n }\n\n if (!opts.silent) process.stderr.write(` OK\\n`)\n\n const output = opts.format === \"json\"\n ? JSON.stringify(result, null, 2)\n : result.markdown\n\n if (opts.output && files.length === 1) {\n writeFileSync(opts.output, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\n } else if (opts.outDir) {\n mkdirSync(opts.outDir, { recursive: true })\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\n } else {\n process.stdout.write(output + \"\\n\")\n }\n } catch (err) {\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${err instanceof Error ? err.message : err}\\n`)\n process.exitCode = 1\n }\n }\n })\n\nprogram.parse()\n"],"mappings":";;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,gBAAgB;AACjE,SAAS,UAAU,eAAe;AAClC,SAAS,eAAe;AAIxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,QAAQ,EACb,YAAY,+FAAwC,EACpD,QAAQ,OAAO,EACf,SAAS,cAAc,+DAA4B,EACnD,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAiB,SAAS;AACvC,aAAW,YAAY,OAAO;AAC5B,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AAEjC,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC7D;AAEA,YAAM,SAAS,MAAM,MAAM,WAAW;AAEtC,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAE9C,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B,OAAO;AAEX,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAAA,MAC/D,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AAAA,MAC3D,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAAA,IACF,SAAS,KAAK;AACZ,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,eAAe,QAAQ,IAAI,UAAU,GAAG;AAAA,CAAI;AACpG,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF,CAAC;AAEH,QAAQ,MAAM;","names":[]}
package/dist/index.cjs CHANGED
@@ -62,6 +62,7 @@ function isPdfFile(buffer) {
62
62
  return b[0] === 37 && b[1] === 80 && b[2] === 68 && b[3] === 70;
63
63
  }
64
64
  function detectFormat(buffer) {
65
+ if (buffer.byteLength < 4) return "unknown";
65
66
  if (isHwpxFile(buffer)) return "hwpx";
66
67
  if (isOldHwpFile(buffer)) return "hwp";
67
68
  if (isPdfFile(buffer)) return "pdf";
@@ -218,6 +219,11 @@ async function parseHwpxDocument(buffer) {
218
219
  } catch {
219
220
  return extractFromBrokenZip(buffer);
220
221
  }
222
+ let entryCount = 0;
223
+ zip.forEach(() => {
224
+ entryCount++;
225
+ });
226
+ if (entryCount > MAX_ZIP_ENTRIES) throw new Error("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
221
227
  const sectionPaths = await resolveSectionPaths(zip);
222
228
  if (sectionPaths.length === 0) throw new Error("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
223
229
  let totalDecompressed = 0;
@@ -258,7 +264,8 @@ function extractFromBrokenZip(buffer) {
258
264
  }
259
265
  const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
260
266
  const name = new TextDecoder().decode(nameBytes);
261
- if (name.includes("..") || name.startsWith("/")) {
267
+ const normalizedName = name.replace(/\\/g, "/");
268
+ if (normalizedName.includes("..") || normalizedName.startsWith("/") || /^[A-Za-z]:/.test(normalizedName)) {
262
269
  pos = fileStart + compSize;
263
270
  continue;
264
271
  }
@@ -289,7 +296,8 @@ function extractFromBrokenZip(buffer) {
289
296
  async function resolveSectionPaths(zip) {
290
297
  const manifestPaths = ["Contents/content.hpf", "content.hpf"];
291
298
  for (const mp of manifestPaths) {
292
- const file = zip.file(new RegExp(`^${mp.replace(/\./g, "\\.")}$`, "i"))[0];
299
+ const mpLower = mp.toLowerCase();
300
+ const file = zip.file(mp) || Object.values(zip.files).find((f) => f.name.toLowerCase() === mpLower) || null;
293
301
  if (!file) continue;
294
302
  const xml = await file.async("text");
295
303
  const paths = parseSectionPathsFromManifest(xml);
@@ -463,10 +471,11 @@ var CHAR_FIXED_NBSP = 24;
463
471
  var FLAG_COMPRESSED = 1 << 0;
464
472
  var FLAG_ENCRYPTED = 1 << 1;
465
473
  var FLAG_DRM = 1 << 4;
474
+ var MAX_RECORDS = 5e5;
466
475
  function readRecords(data) {
467
476
  const records = [];
468
477
  let offset = 0;
469
- while (offset + 4 <= data.length) {
478
+ while (offset + 4 <= data.length && records.length < MAX_RECORDS) {
470
479
  const header = data.readUInt32LE(offset);
471
480
  offset += 4;
472
481
  const tagId = header & 1023;
@@ -527,7 +536,7 @@ function extractText(data) {
527
536
  break;
528
537
  default:
529
538
  if (ch >= 1 && ch <= 31) {
530
- const isExt = ch >= 1 && ch <= 3 || ch >= 11 && ch <= 18 || ch >= 21 && ch <= 23;
539
+ const isExt = ch >= 1 && ch <= 3 || ch >= 10 && ch <= 18 || ch >= 21 && ch <= 23;
531
540
  const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
532
541
  if ((isExt || isInline) && i + 14 <= data.length) i += 14;
533
542
  } else if (ch >= 32) {
@@ -586,6 +595,7 @@ function findSections(cfb) {
586
595
  }
587
596
  if (sections.length === 0 && cfb.FileIndex) {
588
597
  for (const entry of cfb.FileIndex) {
598
+ if (sections.length >= MAX_SECTIONS) break;
589
599
  if (entry.name?.startsWith("Section") && entry.content) {
590
600
  const idx = parseInt(entry.name.replace("Section", ""), 10) || 0;
591
601
  sections.push({ idx, content: Buffer.from(entry.content) });
@@ -654,8 +664,8 @@ function parseTableBlock(records, startIdx) {
654
664
  if (rec.tagId === TAG_PARA_HEADER && rec.level <= tableLevel) break;
655
665
  if (rec.tagId === TAG_CTRL_HEADER && rec.level <= tableLevel) break;
656
666
  if (rec.tagId === TAG_TABLE && rec.data.length >= 8) {
657
- rows = rec.data.readUInt16LE(4);
658
- cols = rec.data.readUInt16LE(6);
667
+ rows = Math.min(rec.data.readUInt16LE(4), MAX_ROWS);
668
+ cols = Math.min(rec.data.readUInt16LE(6), MAX_COLS);
659
669
  }
660
670
  if (rec.tagId === TAG_LIST_HEADER) {
661
671
  const { cell, nextIdx } = parseCellBlock(records, i, tableLevel);
@@ -718,6 +728,8 @@ function arrangeCells(rows, cols, cells) {
718
728
  var import_module2 = require("module");
719
729
  var import_url = require("url");
720
730
  var import_meta2 = {};
731
+ var MAX_PAGES = 5e3;
732
+ var MAX_TOTAL_TEXT = 100 * 1024 * 1024;
721
733
  var pdfjsModule = null;
722
734
  async function loadPdfjs() {
723
735
  if (pdfjsModule) return pdfjsModule;
@@ -760,15 +772,19 @@ async function parsePdfDocument(buffer) {
760
772
  }
761
773
  const pageTexts = [];
762
774
  let totalChars = 0;
763
- for (let i = 1; i <= pageCount; i++) {
775
+ let totalTextBytes = 0;
776
+ const effectivePageCount = Math.min(pageCount, MAX_PAGES);
777
+ for (let i = 1; i <= effectivePageCount; i++) {
764
778
  const page = await doc.getPage(i);
765
779
  const textContent = await page.getTextContent();
766
780
  const lines = groupTextItemsByLine(textContent.items);
767
781
  const pageText = lines.join("\n");
768
782
  totalChars += pageText.replace(/\s/g, "").length;
783
+ totalTextBytes += pageText.length * 2;
784
+ if (totalTextBytes > MAX_TOTAL_TEXT) throw new Error(`\uD14D\uC2A4\uD2B8 \uCD94\uCD9C \uD06C\uAE30 \uCD08\uACFC (${MAX_TOTAL_TEXT / 1024 / 1024}MB \uC81C\uD55C)`);
769
785
  pageTexts.push(pageText);
770
786
  }
771
- const avgCharsPerPage = totalChars / pageCount;
787
+ const avgCharsPerPage = totalChars / effectivePageCount;
772
788
  if (avgCharsPerPage < 10) {
773
789
  return {
774
790
  success: false,
@@ -787,7 +803,8 @@ async function parsePdfDocument(buffer) {
787
803
  }
788
804
  }
789
805
  markdown = reconstructTables(markdown);
790
- return { success: true, fileType: "pdf", markdown, pageCount, isImageBased: false };
806
+ const truncated = pageCount > MAX_PAGES;
807
+ return { success: true, fileType: "pdf", markdown, pageCount: effectivePageCount, isImageBased: false, ...truncated && { warning: `PDF\uAC00 ${pageCount}\uD398\uC774\uC9C0\uC774\uC9C0\uB9CC ${MAX_PAGES}\uD398\uC774\uC9C0\uAE4C\uC9C0\uB9CC \uCC98\uB9AC\uD588\uC2B5\uB2C8\uB2E4` } };
791
808
  } finally {
792
809
  await doc.destroy().catch(() => {
793
810
  });
@@ -867,7 +884,7 @@ function formatAsMarkdownTable(rows) {
867
884
  }
868
885
 
869
886
  // src/utils.ts
870
- var VERSION = true ? "0.2.2" : "0.0.0-dev";
887
+ var VERSION = true ? "1.0.1" : "0.0.0-dev";
871
888
 
872
889
  // src/index.ts
873
890
  async function parse(buffer) {