kordoc 2.5.2 → 2.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +450 -431
- package/dist/chunk-4NWDJGAU.js +18955 -0
- package/dist/chunk-4NWDJGAU.js.map +1 -0
- package/dist/{chunk-NKKLA43G.js → chunk-4SK2PDMQ.js} +14 -3
- package/dist/chunk-4SK2PDMQ.js.map +1 -0
- package/dist/{chunk-24NKFRB4.js → chunk-LB7E2KDF.js} +14 -3
- package/dist/chunk-LB7E2KDF.js.map +1 -0
- package/dist/chunk-MEPHGCPQ.js +266 -0
- package/dist/chunk-MEPHGCPQ.js.map +1 -0
- package/dist/chunk-MOL7MDBG.js +0 -0
- package/dist/chunk-MUOQXDZ4.cjs.map +1 -1
- package/dist/{chunk-Z65OQP3H.cjs → chunk-Y476BOHI.cjs} +14 -3
- package/dist/chunk-Y476BOHI.cjs.map +1 -0
- package/dist/cli.js +60 -5
- package/dist/cli.js.map +1 -1
- package/dist/{detect-I7YIS4Q6.js → detect-RI2MQ33K.js} +6 -2
- package/dist/formula-3AQUUIRF.js +1151 -0
- package/dist/formula-3AQUUIRF.js.map +1 -0
- package/dist/formula-JCNF43NE.js +1153 -0
- package/dist/formula-JCNF43NE.js.map +1 -0
- package/dist/formula-XGG6ZP42.cjs +1151 -0
- package/dist/formula-XGG6ZP42.cjs.map +1 -0
- package/dist/index.cjs +14706 -450
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +73 -2
- package/dist/index.d.ts +73 -2
- package/dist/index.js +14583 -327
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +5 -5
- package/dist/mcp.js.map +1 -1
- package/dist/page-range-3C7UGGEK.cjs.map +1 -1
- package/dist/page-range-737B4EZW.js +0 -0
- package/dist/{parser-AZYPOKAR.cjs → parser-7OFQ67QL.cjs} +160 -28
- package/dist/parser-7OFQ67QL.cjs.map +1 -0
- package/dist/{parser-BQKQOIJU.js → parser-DJCMY3OO.js} +136 -4
- package/dist/parser-DJCMY3OO.js.map +1 -0
- package/dist/{parser-FRROKAB7.js → parser-QMMQ7Y7R.js} +136 -4
- package/dist/parser-QMMQ7Y7R.js.map +1 -0
- package/dist/{provider-WPIYEALY.js → provider-2SEHU2FM.js} +1 -1
- package/dist/provider-2SEHU2FM.js.map +1 -0
- package/dist/{provider-7H4CPZYS.js → provider-AKROB7WQ.js} +1 -1
- package/dist/provider-AKROB7WQ.js.map +1 -0
- package/dist/{provider-YN2SSK4X.cjs → provider-SNONEZNW.cjs} +1 -1
- package/dist/provider-SNONEZNW.cjs.map +1 -0
- package/dist/setup-57FB3LSP.js +0 -0
- package/dist/{watch-ZJAUWUAE.js → watch-FVMVIZ5Q.js} +4 -4
- package/dist/watch-FVMVIZ5Q.js.map +1 -0
- package/package.json +98 -77
- package/dist/chunk-24NKFRB4.js.map +0 -1
- package/dist/chunk-2CAJSQK5.js +0 -5052
- package/dist/chunk-2CAJSQK5.js.map +0 -1
- package/dist/chunk-M3E3C5GS.js +0 -59
- package/dist/chunk-M3E3C5GS.js.map +0 -1
- package/dist/chunk-NKKLA43G.js.map +0 -1
- package/dist/chunk-Z65OQP3H.cjs.map +0 -1
- package/dist/parser-AZYPOKAR.cjs.map +0 -1
- package/dist/parser-BQKQOIJU.js.map +0 -1
- package/dist/parser-FRROKAB7.js.map +0 -1
- package/dist/provider-7H4CPZYS.js.map +0 -1
- package/dist/provider-WPIYEALY.js.map +0 -1
- package/dist/provider-YN2SSK4X.cjs.map +0 -1
- package/dist/watch-ZJAUWUAE.js.map +0 -1
- /package/dist/{detect-I7YIS4Q6.js.map → detect-RI2MQ33K.js.map} +0 -0
package/dist/mcp.js
CHANGED
|
@@ -8,18 +8,18 @@ import {
|
|
|
8
8
|
fillHwpx,
|
|
9
9
|
markdownToHwpx,
|
|
10
10
|
parse
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-4NWDJGAU.js";
|
|
12
12
|
import {
|
|
13
13
|
detectFormat,
|
|
14
14
|
detectZipFormat
|
|
15
|
-
} from "./chunk-
|
|
15
|
+
} from "./chunk-MEPHGCPQ.js";
|
|
16
16
|
import {
|
|
17
17
|
KordocError,
|
|
18
18
|
VERSION,
|
|
19
19
|
blocksToMarkdown,
|
|
20
20
|
sanitizeError,
|
|
21
21
|
toArrayBuffer
|
|
22
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-4SK2PDMQ.js";
|
|
23
23
|
import "./chunk-MOL7MDBG.js";
|
|
24
24
|
|
|
25
25
|
// src/mcp.ts
|
|
@@ -178,7 +178,7 @@ server.tool(
|
|
|
178
178
|
let metadata;
|
|
179
179
|
let effectiveFormat = format;
|
|
180
180
|
if (format === "hwpx") {
|
|
181
|
-
const { detectZipFormat: detectZipFormat2 } = await import("./detect-
|
|
181
|
+
const { detectZipFormat: detectZipFormat2 } = await import("./detect-RI2MQ33K.js");
|
|
182
182
|
const zipFormat = await detectZipFormat2(buffer);
|
|
183
183
|
if (zipFormat === "xlsx" || zipFormat === "docx") effectiveFormat = zipFormat;
|
|
184
184
|
}
|
|
@@ -191,7 +191,7 @@ server.tool(
|
|
|
191
191
|
break;
|
|
192
192
|
case "pdf":
|
|
193
193
|
try {
|
|
194
|
-
const { extractPdfMetadataOnly } = await import("./parser-
|
|
194
|
+
const { extractPdfMetadataOnly } = await import("./parser-QMMQ7Y7R.js");
|
|
195
195
|
metadata = await extractPdfMetadataOnly(buffer);
|
|
196
196
|
} catch {
|
|
197
197
|
metadata = void 0;
|
package/dist/mcp.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/mcp.ts"],"sourcesContent":["/** kordoc MCP 서버 — Claude/Cursor에서 문서 파싱 도구로 사용 */\r\n\r\nimport { McpServer } from \"@modelcontextprotocol/sdk/server/mcp.js\"\r\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\"\r\nimport { z } from \"zod\"\r\nimport { readFileSync, writeFileSync, realpathSync, openSync, readSync, closeSync, statSync, mkdirSync } from \"fs\"\r\nimport { resolve, isAbsolute, extname, dirname } from \"path\"\r\nimport { parse, detectFormat, detectZipFormat, blocksToMarkdown, compare, extractFormFields, fillFormFields, markdownToHwpx, fillHwpx } from \"./index.js\"\r\nimport { VERSION, toArrayBuffer, sanitizeError, KordocError } from \"./utils.js\"\r\nimport { extractHwp5MetadataOnly } from \"./hwp5/parser.js\"\r\nimport { extractHwpxMetadataOnly } from \"./hwpx/parser.js\"\r\n// pdfjs-dist는 optional — dynamic import로 지연 로드\r\n// import { extractPdfMetadataOnly } from \"./pdf/parser.js\"\r\n\r\n/** 허용 파일 확장자 */\r\nconst ALLOWED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\r\n/** 최대 파일 크기 (500MB) */\r\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\r\n\r\n/** 경로 정규화 및 보안 검증 */\r\nfunction safePath(filePath: string): string {\r\n if (!filePath) throw new KordocError(\"파일 경로가 비어있습니다\")\r\n const resolved = resolve(filePath)\r\n let real: string\r\n try {\r\n real = realpathSync(resolved)\r\n } catch (err: any) {\r\n if (err?.code === \"ENOENT\") throw new KordocError(`파일을 찾을 수 없습니다: ${resolved}`)\r\n if (err?.code === \"EACCES\" || err?.code === \"EPERM\") throw new KordocError(`파일 접근 권한이 없습니다: ${resolved}`)\r\n throw new KordocError(`경로 처리 오류 [${err?.code ?? \"UNKNOWN\"}]`)\r\n }\r\n if (!isAbsolute(real)) throw new KordocError(\"절대 경로만 허용됩니다\")\r\n const ext = extname(real).toLowerCase()\r\n if (!ALLOWED_EXTENSIONS.has(ext)) throw new KordocError(`지원하지 않는 확장자입니다: ${ext} (허용: ${[...ALLOWED_EXTENSIONS].join(\", \")})`)\r\n return real\r\n}\r\n\r\n/** 최대 파일 크기 — metadata 전용 (50MB, 전체 파싱보다 보수적) */\r\nconst MAX_METADATA_FILE_SIZE = 50 * 1024 * 1024\r\n\r\n/** 파일 읽기 + 크기 검증 공통 로직 */\r\nfunction readValidatedFile(filePath: string, maxSize = MAX_FILE_SIZE): { buffer: ArrayBuffer; resolved: string } {\r\n const resolved = safePath(filePath)\r\n let fileSize: number\r\n try {\r\n fileSize = statSync(resolved).size\r\n } catch (err: any) {\r\n throw new KordocError(`파일 상태 읽기 실패 [${err?.code ?? \"UNKNOWN\"}]: ${resolved}`)\r\n }\r\n if (fileSize > maxSize) {\r\n throw new KordocError(`파일이 너무 큽니다: ${(fileSize / 1024 / 1024).toFixed(1)}MB (최대 ${maxSize / 1024 / 1024}MB)`)\r\n }\r\n let raw: Buffer\r\n try {\r\n raw = readFileSync(resolved)\r\n } catch (err: any) {\r\n throw new KordocError(`파일 읽기 실패 [${err?.code ?? \"UNKNOWN\"}]: ${resolved}`)\r\n }\r\n return { buffer: toArrayBuffer(raw), resolved }\r\n}\r\n\r\n/** 파일 헤더(16바이트)만 읽어 포맷 감지 — 전체 파일 로드 불필요 */\r\nfunction detectFormatFromHeader(resolved: string): ReturnType<typeof detectFormat> {\r\n const fd = openSync(resolved, \"r\")\r\n try {\r\n const headerBuf = Buffer.alloc(16)\r\n readSync(fd, headerBuf, 0, 16, 0)\r\n return detectFormat(toArrayBuffer(headerBuf))\r\n } finally {\r\n closeSync(fd)\r\n }\r\n}\r\n\r\nconst server = new McpServer({\r\n name: \"kordoc\",\r\n version: VERSION,\r\n})\r\n\r\n// ─── 도구: parse_document ────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_document\",\r\n \"한국 문서 파일(HWP, HWPX, PDF, XLSX, DOCX)을 마크다운으로 변환합니다. 파일 경로를 입력하면 포맷을 자동 감지하여 텍스트를 추출합니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const format = detectFormat(buffer)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const result = await parse(buffer)\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const meta = [\r\n `포맷: ${result.fileType.toUpperCase()}`,\r\n result.pageCount ? `페이지: ${result.pageCount}` : null,\r\n result.metadata?.title ? `제목: ${result.metadata.title}` : null,\r\n result.metadata?.author ? `작성자: ${result.metadata.author}` : null,\r\n result.isImageBased ? \"이미지 기반 PDF (텍스트 추출 불가)\" : null,\r\n ].filter(Boolean).join(\" | \")\r\n\r\n // outline/warnings 부가 정보 추가\r\n const parts: string[] = [`[${meta}]`]\r\n\r\n if (result.outline && result.outline.length > 0) {\r\n const outlineText = result.outline.map(o => `${\" \".repeat(o.level - 1)}- ${o.text}`).join(\"\\n\")\r\n parts.push(`\\n📑 문서 구조:\\n${outlineText}`)\r\n }\r\n\r\n if (result.warnings && result.warnings.length > 0) {\r\n const warnText = result.warnings.map(w => `- [p${w.page || \"?\"}] ${w.message}`).join(\"\\n\")\r\n parts.push(`\\n⚠️ 경고:\\n${warnText}`)\r\n }\r\n\r\n parts.push(`\\n\\n${result.markdown}`)\r\n\r\n return {\r\n content: [{ type: \"text\", text: parts.join(\"\") }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: detect_format ─────────────────────────────\r\n\r\nserver.tool(\r\n \"detect_format\",\r\n \"파일의 포맷을 매직 바이트로 감지합니다 (hwpx, hwp, pdf, unknown).\",\r\n {\r\n file_path: z.string().min(1).describe(\"감지할 파일의 절대 경로\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const resolved = safePath(file_path)\r\n const format = detectFormatFromHeader(resolved)\r\n return {\r\n content: [{ type: \"text\", text: `${file_path}: ${format}` }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_metadata ────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_metadata\",\r\n \"문서의 메타데이터(제목, 작성자, 날짜 등)만 빠르게 추출합니다. 전체 파싱 없이 헤더/매니페스트만 읽습니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"메타데이터를 추출할 문서 파일의 절대 경로\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const resolved = safePath(file_path)\r\n const format = detectFormatFromHeader(resolved)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n // metadata 전용 크기 제한 (50MB)\r\n const { buffer } = readValidatedFile(file_path, MAX_METADATA_FILE_SIZE)\r\n\r\n let metadata\r\n // ZIP 기반 포맷(hwpx)은 내부 구조로 세분화 (XLSX/DOCX 구분)\r\n let effectiveFormat = format\r\n if (format === \"hwpx\") {\r\n const { detectZipFormat } = await import(\"./detect.js\")\r\n const zipFormat = await detectZipFormat(buffer)\r\n if (zipFormat === \"xlsx\" || zipFormat === \"docx\") effectiveFormat = zipFormat as any\r\n }\r\n switch (effectiveFormat) {\r\n case \"hwp\":\r\n metadata = extractHwp5MetadataOnly(Buffer.from(buffer))\r\n break\r\n case \"hwpx\":\r\n metadata = await extractHwpxMetadataOnly(buffer)\r\n break\r\n case \"pdf\":\r\n try {\r\n const { extractPdfMetadataOnly } = await import(\"./pdf/parser.js\")\r\n metadata = await extractPdfMetadataOnly(buffer)\r\n } catch {\r\n metadata = undefined // pdfjs-dist 미설치 시 metadata 생략\r\n }\r\n break\r\n case \"xlsx\":\r\n case \"docx\": {\r\n // XLSX/DOCX는 전용 metadata 추출기가 없으므로 전체 파싱 후 metadata 반환\r\n const result = await parse(buffer)\r\n metadata = result.success ? result.metadata : undefined\r\n break\r\n }\r\n }\r\n\r\n return {\r\n content: [{ type: \"text\", text: JSON.stringify({ format, ...metadata }, null, 2) }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_pages ──────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_pages\",\r\n \"문서의 특정 페이지/섹션 범위만 파싱합니다. PDF는 정확한 페이지, HWP/HWPX는 섹션 단위 근사치입니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\r\n pages: z.string().min(1).describe(\"페이지 범위 (예: '1-3', '1,3,5-7')\"),\r\n },\r\n async ({ file_path, pages }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const format = detectFormat(buffer)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const result = await parse(buffer, { pages })\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const meta = [\r\n `포맷: ${result.fileType.toUpperCase()}`,\r\n `범위: ${pages}`,\r\n result.pageCount ? `페이지: ${result.pageCount}` : null,\r\n ].filter(Boolean).join(\" | \")\r\n\r\n return {\r\n content: [{ type: \"text\", text: `[${meta}]\\n\\n${result.markdown}` }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_table ──────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_table\",\r\n \"문서에서 N번째 테이블만 추출합니다 (0-based index). 테이블이 없거나 인덱스 범위를 초과하면 오류를 반환합니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\r\n table_index: z.number().int().min(0).describe(\"추출할 테이블 인덱스 (0부터 시작)\"),\r\n },\r\n async ({ file_path, table_index }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const format = detectFormat(buffer)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const result = await parse(buffer)\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const tableBlocks = result.blocks.filter(b => b.type === \"table\" && b.table)\r\n if (tableBlocks.length === 0) {\r\n return {\r\n content: [{ type: \"text\", text: `문서에 테이블이 없습니다.` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n if (table_index >= tableBlocks.length) {\r\n return {\r\n content: [{ type: \"text\", text: `테이블 인덱스 초과: ${table_index} (총 ${tableBlocks.length}개 테이블)` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const tableBlock = tableBlocks[table_index]\r\n const tableMarkdown = blocksToMarkdown([tableBlock])\r\n\r\n return {\r\n content: [{ type: \"text\", text: `[테이블 #${table_index} / 총 ${tableBlocks.length}개]\\n\\n${tableMarkdown}` }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: compare_documents ─────────────────────────\r\n\r\nserver.tool(\r\n \"compare_documents\",\r\n \"두 한국 문서 파일을 비교하여 추가/삭제/변경된 블록을 표시합니다. 신구대조표 생성에 활용됩니다. 크로스 포맷(HWP↔HWPX) 비교 가능.\",\r\n {\r\n file_path_a: z.string().min(1).describe(\"비교 원본 문서의 절대 경로\"),\r\n file_path_b: z.string().min(1).describe(\"비교 대상 문서의 절대 경로\"),\r\n },\r\n async ({ file_path_a, file_path_b }) => {\r\n try {\r\n const { buffer: bufA } = readValidatedFile(file_path_a)\r\n const { buffer: bufB } = readValidatedFile(file_path_b)\r\n\r\n const result = await compare(bufA, bufB)\r\n const { stats, diffs } = result\r\n\r\n const lines: string[] = [\r\n `## 문서 비교 결과`,\r\n `추가: ${stats.added} | 삭제: ${stats.removed} | 변경: ${stats.modified} | 동일: ${stats.unchanged}`,\r\n \"\",\r\n ]\r\n\r\n for (const d of diffs) {\r\n const prefix = d.type === \"added\" ? \"+\" : d.type === \"removed\" ? \"-\" : d.type === \"modified\" ? \"~\" : \" \"\r\n const text = d.after?.text || d.before?.text || (d.after?.table ? \"[테이블]\" : d.before?.table ? \"[테이블]\" : \"\")\r\n const sim = d.similarity !== undefined ? ` (${(d.similarity * 100).toFixed(0)}%)` : \"\"\r\n lines.push(`${prefix} ${text.substring(0, 200)}${sim}`)\r\n }\r\n\r\n return {\r\n content: [{ type: \"text\", text: lines.join(\"\\n\") }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_form ───────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_form\",\r\n \"한국 서식 문서에서 레이블-값 쌍을 구조화된 JSON으로 추출합니다. 양식/서식 문서에 최적화.\",\r\n {\r\n file_path: z.string().min(1).describe(\"서식 문서 파일의 절대 경로\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const result = await parse(buffer)\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const form = extractFormFields(result.blocks)\r\n return {\r\n content: [{ type: \"text\", text: JSON.stringify(form, null, 2) }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: fill_form ───────────────────────────────\r\n\r\nserver.tool(\r\n \"fill_form\",\r\n \"한국 서식 문서의 빈칸을 채워서 새 문서로 출력합니다. hwpx-preserve를 사용하면 원본 서식(테두리, 폰트, 병합 등)을 100% 유지합니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"서식 템플릿 문서의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\r\n fields: z.record(z.string(), z.string()).describe(\"채울 필드 맵 (라벨 → 값). 예: {\\\"성명\\\": \\\"홍길동\\\", \\\"전화번호\\\": \\\"010-1234-5678\\\"}\"),\r\n output_format: z.enum([\"markdown\", \"hwpx\", \"hwpx-preserve\"]).default(\"hwpx-preserve\").describe(\"출력 포맷: hwpx-preserve (원본 스타일 보존, HWPX 전용), hwpx (새 HWPX 생성), markdown\"),\r\n output_path: z.string().optional().describe(\"출력 파일 저장 경로 (선택). 지정 시 파일로 저장, 미지정 시 텍스트로 반환\"),\r\n },\r\n async ({ file_path, fields, output_format, output_path }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n\r\n // ─── hwpx-preserve: 원본 ZIP 직접 수정 (스타일 보존) ───\r\n if (output_format === \"hwpx-preserve\") {\r\n const format = detectFormat(buffer)\r\n let isHwpx = format === \"hwpx\"\r\n if (isHwpx) {\r\n const zipFormat = await detectZipFormat(buffer)\r\n isHwpx = zipFormat === \"hwpx\"\r\n }\r\n if (!isHwpx) {\r\n return {\r\n content: [{ type: \"text\", text: `hwpx-preserve는 HWPX 파일만 지원합니다 (감지된 포맷: ${format}). hwpx 또는 markdown을 사용하세요.` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const hwpxResult = await fillHwpx(buffer, fields)\r\n const summary = [\r\n `채워진 필드: ${hwpxResult.filled.length}개 (원본 스타일 보존)`,\r\n hwpxResult.unmatched.length > 0 ? `매칭 실패: ${hwpxResult.unmatched.join(\", \")}` : null,\r\n ].filter(Boolean).join(\" | \")\r\n\r\n const filledList = hwpxResult.filled.map(f => ` - ${f.label}: ${f.value}`).join(\"\\n\")\r\n\r\n if (output_path) {\r\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\r\n writeFileSync(resolve(output_path), Buffer.from(hwpxResult.buffer))\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\n채워진 필드:\\n${filledList}\\n\\nHWPX 파일 저장 (원본 서식 유지): ${resolve(output_path)}` }],\r\n }\r\n }\r\n\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\n채워진 필드:\\n${filledList}\\n\\n⚠️ output_path를 지정하면 원본 서식이 유지된 HWPX 파일로 저장됩니다.` }],\r\n }\r\n }\r\n\r\n // ─── 일반 경로: parse → fill → output ───\r\n const result = await parse(buffer)\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const formInfo = extractFormFields(result.blocks)\r\n const fillResult = fillFormFields(result.blocks, fields)\r\n\r\n if (fillResult.filled.length === 0 && formInfo.fields.length === 0) {\r\n return {\r\n content: [{ type: \"text\", text: `서식 필드를 찾을 수 없습니다. 일반 문서이거나 서식 패턴이 감지되지 않았습니다.` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const markdown = blocksToMarkdown(fillResult.blocks)\r\n const summary = [\r\n `채워진 필드: ${fillResult.filled.length}개`,\r\n fillResult.unmatched.length > 0 ? `매칭 실패: ${fillResult.unmatched.join(\", \")}` : null,\r\n formInfo.fields.length > 0 ? `서식 필드: ${formInfo.fields.length}개 (확신도 ${(formInfo.confidence * 100).toFixed(0)}%)` : null,\r\n ].filter(Boolean).join(\" | \")\r\n\r\n if (output_format === \"hwpx\") {\r\n const hwpxBuffer = await markdownToHwpx(markdown)\r\n if (output_path) {\r\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\r\n writeFileSync(resolve(output_path), Buffer.from(hwpxBuffer))\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\nHWPX 파일 저장: ${resolve(output_path)}` }],\r\n }\r\n }\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\n⚠️ output_path를 지정하면 HWPX 파일로 저장됩니다. 미리보기:\\n\\n${markdown}` }],\r\n }\r\n }\r\n\r\n // markdown\r\n if (output_path) {\r\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\r\n writeFileSync(resolve(output_path), markdown, \"utf-8\")\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\n마크다운 파일 저장: ${resolve(output_path)}\\n\\n${markdown}` }],\r\n }\r\n }\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\n${markdown}` }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 서버 시작 ───────────────────────────────────────\r\n\r\nasync function main() {\r\n const transport = new StdioServerTransport()\r\n await server.connect(transport)\r\n}\r\n\r\nmain().catch((err) => { console.error(err); process.exit(1) })\r\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;AAEA,SAAS,iBAAiB;AAC1B,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAClB,SAAS,cAAc,eAAe,cAAc,UAAU,UAAU,WAAW,UAAU,iBAAiB;AAC9G,SAAS,SAAS,YAAY,SAAS,eAAe;AAStD,IAAM,qBAAqB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAE9E,IAAM,gBAAgB,MAAM,OAAO;AAGnC,SAAS,SAAS,UAA0B;AAC1C,MAAI,CAAC,SAAU,OAAM,IAAI,YAAY,sEAAe;AACpD,QAAM,WAAW,QAAQ,QAAQ;AACjC,MAAI;AACJ,MAAI;AACF,WAAO,aAAa,QAAQ;AAAA,EAC9B,SAAS,KAAU;AACjB,QAAI,KAAK,SAAS,SAAU,OAAM,IAAI,YAAY,oEAAkB,QAAQ,EAAE;AAC9E,QAAI,KAAK,SAAS,YAAY,KAAK,SAAS,QAAS,OAAM,IAAI,YAAY,0EAAmB,QAAQ,EAAE;AACxG,UAAM,IAAI,YAAY,2CAAa,KAAK,QAAQ,SAAS,GAAG;AAAA,EAC9D;AACA,MAAI,CAAC,WAAW,IAAI,EAAG,OAAM,IAAI,YAAY,gEAAc;AAC3D,QAAM,MAAM,QAAQ,IAAI,EAAE,YAAY;AACtC,MAAI,CAAC,mBAAmB,IAAI,GAAG,EAAG,OAAM,IAAI,YAAY,+EAAmB,GAAG,mBAAS,CAAC,GAAG,kBAAkB,EAAE,KAAK,IAAI,CAAC,GAAG;AAC5H,SAAO;AACT;AAGA,IAAM,yBAAyB,KAAK,OAAO;AAG3C,SAAS,kBAAkB,UAAkB,UAAU,eAA0D;AAC/G,QAAM,WAAW,SAAS,QAAQ;AAClC,MAAI;AACJ,MAAI;AACF,eAAW,SAAS,QAAQ,EAAE;AAAA,EAChC,SAAS,KAAU;AACjB,UAAM,IAAI,YAAY,wDAAgB,KAAK,QAAQ,SAAS,MAAM,QAAQ,EAAE;AAAA,EAC9E;AACA,MAAI,WAAW,SAAS;AACtB,UAAM,IAAI,YAAY,wDAAgB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC,oBAAU,UAAU,OAAO,IAAI,KAAK;AAAA,EAC9G;AACA,MAAI;AACJ,MAAI;AACF,UAAM,aAAa,QAAQ;AAAA,EAC7B,SAAS,KAAU;AACjB,UAAM,IAAI,YAAY,2CAAa,KAAK,QAAQ,SAAS,MAAM,QAAQ,EAAE;AAAA,EAC3E;AACA,SAAO,EAAE,QAAQ,cAAc,GAAG,GAAG,SAAS;AAChD;AAGA,SAAS,uBAAuB,UAAmD;AACjF,QAAM,KAAK,SAAS,UAAU,GAAG;AACjC,MAAI;AACF,UAAM,YAAY,OAAO,MAAM,EAAE;AACjC,aAAS,IAAI,WAAW,GAAG,IAAI,CAAC;AAChC,WAAO,aAAa,cAAc,SAAS,CAAC;AAAA,EAC9C,UAAE;AACA,cAAU,EAAE;AAAA,EACd;AACF;AAEA,IAAM,SAAS,IAAI,UAAU;AAAA,EAC3B,MAAM;AAAA,EACN,SAAS;AACX,CAAC;AAID,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,EACvF;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,QAChD,OAAO,UAAU,QAAQ,iBAAO,OAAO,SAAS,KAAK,KAAK;AAAA,QAC1D,OAAO,UAAU,SAAS,uBAAQ,OAAO,SAAS,MAAM,KAAK;AAAA,QAC7D,OAAO,eAAe,uFAA2B;AAAA,MACnD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAG5B,YAAM,QAAkB,CAAC,IAAI,IAAI,GAAG;AAEpC,UAAI,OAAO,WAAW,OAAO,QAAQ,SAAS,GAAG;AAC/C,cAAM,cAAc,OAAO,QAAQ,IAAI,OAAK,GAAG,KAAK,OAAO,EAAE,QAAQ,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,KAAK,IAAI;AAC/F,cAAM,KAAK;AAAA;AAAA,EAAgB,WAAW,EAAE;AAAA,MAC1C;AAEA,UAAI,OAAO,YAAY,OAAO,SAAS,SAAS,GAAG;AACjD,cAAM,WAAW,OAAO,SAAS,IAAI,OAAK,OAAO,EAAE,QAAQ,GAAG,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,IAAI;AACzF,cAAM,KAAK;AAAA;AAAA,EAAa,QAAQ,EAAE;AAAA,MACpC;AAEA,YAAM,KAAK;AAAA;AAAA,EAAO,OAAO,QAAQ,EAAE;AAEnC,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,EAAE,EAAE,CAAC;AAAA,MAClD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,iEAAe;AAAA,EACvD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAC9C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,MAAM,GAAG,CAAC;AAAA,MAC7D;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mHAAyB;AAAA,EACjE;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAE9C,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAGA,YAAM,EAAE,OAAO,IAAI,kBAAkB,WAAW,sBAAsB;AAEtE,UAAI;AAEJ,UAAI,kBAAkB;AACtB,UAAI,WAAW,QAAQ;AACrB,cAAM,EAAE,iBAAAA,iBAAgB,IAAI,MAAM,OAAO,sBAAa;AACtD,cAAM,YAAY,MAAMA,iBAAgB,MAAM;AAC9C,YAAI,cAAc,UAAU,cAAc,OAAQ,mBAAkB;AAAA,MACtE;AACA,cAAQ,iBAAiB;AAAA,QACvB,KAAK;AACH,qBAAW,wBAAwB,OAAO,KAAK,MAAM,CAAC;AACtD;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,wBAAwB,MAAM;AAC/C;AAAA,QACF,KAAK;AACH,cAAI;AACF,kBAAM,EAAE,uBAAuB,IAAI,MAAM,OAAO,sBAAiB;AACjE,uBAAW,MAAM,uBAAuB,MAAM;AAAA,UAChD,QAAQ;AACN,uBAAW;AAAA,UACb;AACA;AAAA,QACF,KAAK;AAAA,QACL,KAAK,QAAQ;AAEX,gBAAM,SAAS,MAAM,MAAM,MAAM;AACjC,qBAAW,OAAO,UAAU,OAAO,WAAW;AAC9C;AAAA,QACF;AAAA,MACF;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,EAAE,QAAQ,GAAG,SAAS,GAAG,MAAM,CAAC,EAAE,CAAC;AAAA,MACpF;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4DAA8B;AAAA,EAClE;AAAA,EACA,OAAO,EAAE,WAAW,MAAM,MAAM;AAC9B,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,QAAQ,EAAE,MAAM,CAAC;AAE5C,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,iBAAO,KAAK;AAAA,QACZ,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,MAClD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,IAAI;AAAA;AAAA,EAAQ,OAAO,QAAQ,GAAG,CAAC;AAAA,MACrE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,SAAS,uFAAsB;AAAA,EACtE;AAAA,EACA,OAAO,EAAE,WAAW,YAAY,MAAM;AACpC,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,cAAc,OAAO,OAAO,OAAO,OAAK,EAAE,SAAS,WAAW,EAAE,KAAK;AAC3E,UAAI,YAAY,WAAW,GAAG;AAC5B,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wEAAiB,CAAC;AAAA,UAClD,SAAS;AAAA,QACX;AAAA,MACF;AAEA,UAAI,eAAe,YAAY,QAAQ;AACrC,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,uDAAe,WAAW,YAAO,YAAY,MAAM,6BAAS,CAAC;AAAA,UAC7F,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,aAAa,YAAY,WAAW;AAC1C,YAAM,gBAAgB,iBAAiB,CAAC,UAAU,CAAC;AAEnD,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wBAAS,WAAW,aAAQ,YAAY,MAAM;AAAA;AAAA,EAAS,aAAa,GAAG,CAAC;AAAA,MAC1G;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,IACzD,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EAC3D;AAAA,EACA,OAAO,EAAE,aAAa,YAAY,MAAM;AACtC,QAAI;AACF,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AACtD,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AAEtD,YAAM,SAAS,MAAM,QAAQ,MAAM,IAAI;AACvC,YAAM,EAAE,OAAO,MAAM,IAAI;AAEzB,YAAM,QAAkB;AAAA,QACtB;AAAA,QACA,iBAAO,MAAM,KAAK,oBAAU,MAAM,OAAO,oBAAU,MAAM,QAAQ,oBAAU,MAAM,SAAS;AAAA,QAC1F;AAAA,MACF;AAEA,iBAAW,KAAK,OAAO;AACrB,cAAM,SAAS,EAAE,SAAS,UAAU,MAAM,EAAE,SAAS,YAAY,MAAM,EAAE,SAAS,aAAa,MAAM;AACrG,cAAM,OAAO,EAAE,OAAO,QAAQ,EAAE,QAAQ,SAAS,EAAE,OAAO,QAAQ,yBAAU,EAAE,QAAQ,QAAQ,yBAAU;AACxG,cAAM,MAAM,EAAE,eAAe,SAAY,MAAM,EAAE,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AACpF,cAAM,KAAK,GAAG,MAAM,IAAI,KAAK,UAAU,GAAG,GAAG,CAAC,GAAG,GAAG,EAAE;AAAA,MACxD;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,IAAI,EAAE,CAAC;AAAA,MACpD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EACzD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO,kBAAkB,OAAO,MAAM;AAC5C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,MAAM,MAAM,CAAC,EAAE,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,IACrF,QAAQ,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAAE,SAAS,4JAAqE;AAAA,IACvH,eAAe,EAAE,KAAK,CAAC,YAAY,QAAQ,eAAe,CAAC,EAAE,QAAQ,eAAe,EAAE,SAAS,uJAAuE;AAAA,IACtK,aAAa,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0LAA8C;AAAA,EAC5F;AAAA,EACA,OAAO,EAAE,WAAW,QAAQ,eAAe,YAAY,MAAM;AAC3D,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAG9C,UAAI,kBAAkB,iBAAiB;AACrC,cAAM,SAAS,aAAa,MAAM;AAClC,YAAI,SAAS,WAAW;AACxB,YAAI,QAAQ;AACV,gBAAM,YAAY,MAAM,gBAAgB,MAAM;AAC9C,mBAAS,cAAc;AAAA,QACzB;AACA,YAAI,CAAC,QAAQ;AACX,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,gHAA0C,MAAM,sEAA8B,CAAC;AAAA,YAC/G,SAAS;AAAA,UACX;AAAA,QACF;AAEA,cAAM,aAAa,MAAM,SAAS,QAAQ,MAAM;AAChD,cAAMC,WAAU;AAAA,UACd,oCAAW,WAAW,OAAO,MAAM;AAAA,UACnC,WAAW,UAAU,SAAS,IAAI,8BAAU,WAAW,UAAU,KAAK,IAAI,CAAC,KAAK;AAAA,QAClF,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,cAAM,aAAa,WAAW,OAAO,IAAI,OAAK,OAAO,EAAE,KAAK,KAAK,EAAE,KAAK,EAAE,EAAE,KAAK,IAAI;AAErF,YAAI,aAAa;AACf,oBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,wBAAc,QAAQ,WAAW,GAAG,OAAO,KAAK,WAAW,MAAM,CAAC;AAClE,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAIA,QAAO;AAAA;AAAA;AAAA,EAAiB,UAAU;AAAA;AAAA,2EAA8B,QAAQ,WAAW,CAAC,GAAG,CAAC;AAAA,UAC9H;AAAA,QACF;AAEA,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAIA,QAAO;AAAA;AAAA;AAAA,EAAiB,UAAU;AAAA;AAAA,oKAAsD,CAAC;AAAA,QAC/H;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,MAAM,MAAM;AACjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,WAAW,kBAAkB,OAAO,MAAM;AAChD,YAAM,aAAa,eAAe,OAAO,QAAQ,MAAM;AAEvD,UAAI,WAAW,OAAO,WAAW,KAAK,SAAS,OAAO,WAAW,GAAG;AAClE,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,qNAAgD,CAAC;AAAA,UACjF,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,WAAW,iBAAiB,WAAW,MAAM;AACnD,YAAM,UAAU;AAAA,QACd,oCAAW,WAAW,OAAO,MAAM;AAAA,QACnC,WAAW,UAAU,SAAS,IAAI,8BAAU,WAAW,UAAU,KAAK,IAAI,CAAC,KAAK;AAAA,QAChF,SAAS,OAAO,SAAS,IAAI,8BAAU,SAAS,OAAO,MAAM,+BAAW,SAAS,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AAAA,MACtH,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,UAAI,kBAAkB,QAAQ;AAC5B,cAAM,aAAa,MAAM,eAAe,QAAQ;AAChD,YAAI,aAAa;AACf,oBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,wBAAc,QAAQ,WAAW,GAAG,OAAO,KAAK,UAAU,CAAC;AAC3D,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,kCAAoB,QAAQ,WAAW,CAAC,GAAG,CAAC;AAAA,UACzF;AAAA,QACF;AACA,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA;AAAA;AAAA,EAAsD,QAAQ,GAAG,CAAC;AAAA,QAC/G;AAAA,MACF;AAGA,UAAI,aAAa;AACf,kBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,sBAAc,QAAQ,WAAW,GAAG,UAAU,OAAO;AACrD,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,sDAAoB,QAAQ,WAAW,CAAC;AAAA;AAAA,EAAO,QAAQ,GAAG,CAAC;AAAA,QACxG;AAAA,MACF;AACA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,EAAQ,QAAQ,GAAG,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,eAAe,OAAO;AACpB,QAAM,YAAY,IAAI,qBAAqB;AAC3C,QAAM,OAAO,QAAQ,SAAS;AAChC;AAEA,KAAK,EAAE,MAAM,CAAC,QAAQ;AAAE,UAAQ,MAAM,GAAG;AAAG,UAAQ,KAAK,CAAC;AAAE,CAAC;","names":["detectZipFormat","summary"]}
|
|
1
|
+
{"version":3,"sources":["../src/mcp.ts"],"sourcesContent":["/** kordoc MCP 서버 — Claude/Cursor에서 문서 파싱 도구로 사용 */\n\nimport { McpServer } from \"@modelcontextprotocol/sdk/server/mcp.js\"\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\"\nimport { z } from \"zod\"\nimport { readFileSync, writeFileSync, realpathSync, openSync, readSync, closeSync, statSync, mkdirSync } from \"fs\"\nimport { resolve, isAbsolute, extname, dirname } from \"path\"\nimport { parse, detectFormat, detectZipFormat, blocksToMarkdown, compare, extractFormFields, fillFormFields, markdownToHwpx, fillHwpx } from \"./index.js\"\nimport { VERSION, toArrayBuffer, sanitizeError, KordocError } from \"./utils.js\"\nimport { extractHwp5MetadataOnly } from \"./hwp5/parser.js\"\nimport { extractHwpxMetadataOnly } from \"./hwpx/parser.js\"\n// pdfjs-dist는 optional — dynamic import로 지연 로드\n// import { extractPdfMetadataOnly } from \"./pdf/parser.js\"\n\n/** 허용 파일 확장자 */\nconst ALLOWED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\n/** 최대 파일 크기 (500MB) */\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\n\n/** 경로 정규화 및 보안 검증 */\nfunction safePath(filePath: string): string {\n if (!filePath) throw new KordocError(\"파일 경로가 비어있습니다\")\n const resolved = resolve(filePath)\n let real: string\n try {\n real = realpathSync(resolved)\n } catch (err: any) {\n if (err?.code === \"ENOENT\") throw new KordocError(`파일을 찾을 수 없습니다: ${resolved}`)\n if (err?.code === \"EACCES\" || err?.code === \"EPERM\") throw new KordocError(`파일 접근 권한이 없습니다: ${resolved}`)\n throw new KordocError(`경로 처리 오류 [${err?.code ?? \"UNKNOWN\"}]`)\n }\n if (!isAbsolute(real)) throw new KordocError(\"절대 경로만 허용됩니다\")\n const ext = extname(real).toLowerCase()\n if (!ALLOWED_EXTENSIONS.has(ext)) throw new KordocError(`지원하지 않는 확장자입니다: ${ext} (허용: ${[...ALLOWED_EXTENSIONS].join(\", \")})`)\n return real\n}\n\n/** 최대 파일 크기 — metadata 전용 (50MB, 전체 파싱보다 보수적) */\nconst MAX_METADATA_FILE_SIZE = 50 * 1024 * 1024\n\n/** 파일 읽기 + 크기 검증 공통 로직 */\nfunction readValidatedFile(filePath: string, maxSize = MAX_FILE_SIZE): { buffer: ArrayBuffer; resolved: string } {\n const resolved = safePath(filePath)\n let fileSize: number\n try {\n fileSize = statSync(resolved).size\n } catch (err: any) {\n throw new KordocError(`파일 상태 읽기 실패 [${err?.code ?? \"UNKNOWN\"}]: ${resolved}`)\n }\n if (fileSize > maxSize) {\n throw new KordocError(`파일이 너무 큽니다: ${(fileSize / 1024 / 1024).toFixed(1)}MB (최대 ${maxSize / 1024 / 1024}MB)`)\n }\n let raw: Buffer\n try {\n raw = readFileSync(resolved)\n } catch (err: any) {\n throw new KordocError(`파일 읽기 실패 [${err?.code ?? \"UNKNOWN\"}]: ${resolved}`)\n }\n return { buffer: toArrayBuffer(raw), resolved }\n}\n\n/** 파일 헤더(16바이트)만 읽어 포맷 감지 — 전체 파일 로드 불필요 */\nfunction detectFormatFromHeader(resolved: string): ReturnType<typeof detectFormat> {\n const fd = openSync(resolved, \"r\")\n try {\n const headerBuf = Buffer.alloc(16)\n readSync(fd, headerBuf, 0, 16, 0)\n return detectFormat(toArrayBuffer(headerBuf))\n } finally {\n closeSync(fd)\n }\n}\n\nconst server = new McpServer({\n name: \"kordoc\",\n version: VERSION,\n})\n\n// ─── 도구: parse_document ────────────────────────────\n\nserver.tool(\n \"parse_document\",\n \"한국 문서 파일(HWP, HWPX, PDF, XLSX, DOCX)을 마크다운으로 변환합니다. 파일 경로를 입력하면 포맷을 자동 감지하여 텍스트를 추출합니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\n },\n async ({ file_path }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const meta = [\n `포맷: ${result.fileType.toUpperCase()}`,\n result.pageCount ? `페이지: ${result.pageCount}` : null,\n result.metadata?.title ? `제목: ${result.metadata.title}` : null,\n result.metadata?.author ? `작성자: ${result.metadata.author}` : null,\n result.isImageBased ? \"이미지 기반 PDF (텍스트 추출 불가)\" : null,\n ].filter(Boolean).join(\" | \")\n\n // outline/warnings 부가 정보 추가\n const parts: string[] = [`[${meta}]`]\n\n if (result.outline && result.outline.length > 0) {\n const outlineText = result.outline.map(o => `${\" \".repeat(o.level - 1)}- ${o.text}`).join(\"\\n\")\n parts.push(`\\n📑 문서 구조:\\n${outlineText}`)\n }\n\n if (result.warnings && result.warnings.length > 0) {\n const warnText = result.warnings.map(w => `- [p${w.page || \"?\"}] ${w.message}`).join(\"\\n\")\n parts.push(`\\n⚠️ 경고:\\n${warnText}`)\n }\n\n parts.push(`\\n\\n${result.markdown}`)\n\n return {\n content: [{ type: \"text\", text: parts.join(\"\") }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: detect_format ─────────────────────────────\n\nserver.tool(\n \"detect_format\",\n \"파일의 포맷을 매직 바이트로 감지합니다 (hwpx, hwp, pdf, unknown).\",\n {\n file_path: z.string().min(1).describe(\"감지할 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const resolved = safePath(file_path)\n const format = detectFormatFromHeader(resolved)\n return {\n content: [{ type: \"text\", text: `${file_path}: ${format}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_metadata ────────────────────────────\n\nserver.tool(\n \"parse_metadata\",\n \"문서의 메타데이터(제목, 작성자, 날짜 등)만 빠르게 추출합니다. 전체 파싱 없이 헤더/매니페스트만 읽습니다.\",\n {\n file_path: z.string().min(1).describe(\"메타데이터를 추출할 문서 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const resolved = safePath(file_path)\n const format = detectFormatFromHeader(resolved)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n // metadata 전용 크기 제한 (50MB)\n const { buffer } = readValidatedFile(file_path, MAX_METADATA_FILE_SIZE)\n\n let metadata\n // ZIP 기반 포맷(hwpx)은 내부 구조로 세분화 (XLSX/DOCX 구분)\n let effectiveFormat = format\n if (format === \"hwpx\") {\n const { detectZipFormat } = await import(\"./detect.js\")\n const zipFormat = await detectZipFormat(buffer)\n if (zipFormat === \"xlsx\" || zipFormat === \"docx\") effectiveFormat = zipFormat as any\n }\n switch (effectiveFormat) {\n case \"hwp\":\n metadata = extractHwp5MetadataOnly(Buffer.from(buffer))\n break\n case \"hwpx\":\n metadata = await extractHwpxMetadataOnly(buffer)\n break\n case \"pdf\":\n try {\n const { extractPdfMetadataOnly } = await import(\"./pdf/parser.js\")\n metadata = await extractPdfMetadataOnly(buffer)\n } catch {\n metadata = undefined // pdfjs-dist 미설치 시 metadata 생략\n }\n break\n case \"xlsx\":\n case \"docx\": {\n // XLSX/DOCX는 전용 metadata 추출기가 없으므로 전체 파싱 후 metadata 반환\n const result = await parse(buffer)\n metadata = result.success ? result.metadata : undefined\n break\n }\n }\n\n return {\n content: [{ type: \"text\", text: JSON.stringify({ format, ...metadata }, null, 2) }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_pages ──────────────────────────────\n\nserver.tool(\n \"parse_pages\",\n \"문서의 특정 페이지/섹션 범위만 파싱합니다. PDF는 정확한 페이지, HWP/HWPX는 섹션 단위 근사치입니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\n pages: z.string().min(1).describe(\"페이지 범위 (예: '1-3', '1,3,5-7')\"),\n },\n async ({ file_path, pages }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer, { pages })\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const meta = [\n `포맷: ${result.fileType.toUpperCase()}`,\n `범위: ${pages}`,\n result.pageCount ? `페이지: ${result.pageCount}` : null,\n ].filter(Boolean).join(\" | \")\n\n return {\n content: [{ type: \"text\", text: `[${meta}]\\n\\n${result.markdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_table ──────────────────────────────\n\nserver.tool(\n \"parse_table\",\n \"문서에서 N번째 테이블만 추출합니다 (0-based index). 테이블이 없거나 인덱스 범위를 초과하면 오류를 반환합니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\n table_index: z.number().int().min(0).describe(\"추출할 테이블 인덱스 (0부터 시작)\"),\n },\n async ({ file_path, table_index }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const tableBlocks = result.blocks.filter(b => b.type === \"table\" && b.table)\n if (tableBlocks.length === 0) {\n return {\n content: [{ type: \"text\", text: `문서에 테이블이 없습니다.` }],\n isError: true,\n }\n }\n\n if (table_index >= tableBlocks.length) {\n return {\n content: [{ type: \"text\", text: `테이블 인덱스 초과: ${table_index} (총 ${tableBlocks.length}개 테이블)` }],\n isError: true,\n }\n }\n\n const tableBlock = tableBlocks[table_index]\n const tableMarkdown = blocksToMarkdown([tableBlock])\n\n return {\n content: [{ type: \"text\", text: `[테이블 #${table_index} / 총 ${tableBlocks.length}개]\\n\\n${tableMarkdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: compare_documents ─────────────────────────\n\nserver.tool(\n \"compare_documents\",\n \"두 한국 문서 파일을 비교하여 추가/삭제/변경된 블록을 표시합니다. 신구대조표 생성에 활용됩니다. 크로스 포맷(HWP↔HWPX) 비교 가능.\",\n {\n file_path_a: z.string().min(1).describe(\"비교 원본 문서의 절대 경로\"),\n file_path_b: z.string().min(1).describe(\"비교 대상 문서의 절대 경로\"),\n },\n async ({ file_path_a, file_path_b }) => {\n try {\n const { buffer: bufA } = readValidatedFile(file_path_a)\n const { buffer: bufB } = readValidatedFile(file_path_b)\n\n const result = await compare(bufA, bufB)\n const { stats, diffs } = result\n\n const lines: string[] = [\n `## 문서 비교 결과`,\n `추가: ${stats.added} | 삭제: ${stats.removed} | 변경: ${stats.modified} | 동일: ${stats.unchanged}`,\n \"\",\n ]\n\n for (const d of diffs) {\n const prefix = d.type === \"added\" ? \"+\" : d.type === \"removed\" ? \"-\" : d.type === \"modified\" ? \"~\" : \" \"\n const text = d.after?.text || d.before?.text || (d.after?.table ? \"[테이블]\" : d.before?.table ? \"[테이블]\" : \"\")\n const sim = d.similarity !== undefined ? ` (${(d.similarity * 100).toFixed(0)}%)` : \"\"\n lines.push(`${prefix} ${text.substring(0, 200)}${sim}`)\n }\n\n return {\n content: [{ type: \"text\", text: lines.join(\"\\n\") }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_form ───────────────────────────────\n\nserver.tool(\n \"parse_form\",\n \"한국 서식 문서에서 레이블-값 쌍을 구조화된 JSON으로 추출합니다. 양식/서식 문서에 최적화.\",\n {\n file_path: z.string().min(1).describe(\"서식 문서 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\n isError: true,\n }\n }\n\n const form = extractFormFields(result.blocks)\n return {\n content: [{ type: \"text\", text: JSON.stringify(form, null, 2) }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: fill_form ───────────────────────────────\n\nserver.tool(\n \"fill_form\",\n \"한국 서식 문서의 빈칸을 채워서 새 문서로 출력합니다. hwpx-preserve를 사용하면 원본 서식(테두리, 폰트, 병합 등)을 100% 유지합니다.\",\n {\n file_path: z.string().min(1).describe(\"서식 템플릿 문서의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\n fields: z.record(z.string(), z.string()).describe(\"채울 필드 맵 (라벨 → 값). 예: {\\\"성명\\\": \\\"홍길동\\\", \\\"전화번호\\\": \\\"010-1234-5678\\\"}\"),\n output_format: z.enum([\"markdown\", \"hwpx\", \"hwpx-preserve\"]).default(\"hwpx-preserve\").describe(\"출력 포맷: hwpx-preserve (원본 스타일 보존, HWPX 전용), hwpx (새 HWPX 생성), markdown\"),\n output_path: z.string().optional().describe(\"출력 파일 저장 경로 (선택). 지정 시 파일로 저장, 미지정 시 텍스트로 반환\"),\n },\n async ({ file_path, fields, output_format, output_path }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n\n // ─── hwpx-preserve: 원본 ZIP 직접 수정 (스타일 보존) ───\n if (output_format === \"hwpx-preserve\") {\n const format = detectFormat(buffer)\n let isHwpx = format === \"hwpx\"\n if (isHwpx) {\n const zipFormat = await detectZipFormat(buffer)\n isHwpx = zipFormat === \"hwpx\"\n }\n if (!isHwpx) {\n return {\n content: [{ type: \"text\", text: `hwpx-preserve는 HWPX 파일만 지원합니다 (감지된 포맷: ${format}). hwpx 또는 markdown을 사용하세요.` }],\n isError: true,\n }\n }\n\n const hwpxResult = await fillHwpx(buffer, fields)\n const summary = [\n `채워진 필드: ${hwpxResult.filled.length}개 (원본 스타일 보존)`,\n hwpxResult.unmatched.length > 0 ? `매칭 실패: ${hwpxResult.unmatched.join(\", \")}` : null,\n ].filter(Boolean).join(\" | \")\n\n const filledList = hwpxResult.filled.map(f => ` - ${f.label}: ${f.value}`).join(\"\\n\")\n\n if (output_path) {\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\n writeFileSync(resolve(output_path), Buffer.from(hwpxResult.buffer))\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\n채워진 필드:\\n${filledList}\\n\\nHWPX 파일 저장 (원본 서식 유지): ${resolve(output_path)}` }],\n }\n }\n\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\n채워진 필드:\\n${filledList}\\n\\n⚠️ output_path를 지정하면 원본 서식이 유지된 HWPX 파일로 저장됩니다.` }],\n }\n }\n\n // ─── 일반 경로: parse → fill → output ───\n const result = await parse(buffer)\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\n isError: true,\n }\n }\n\n const formInfo = extractFormFields(result.blocks)\n const fillResult = fillFormFields(result.blocks, fields)\n\n if (fillResult.filled.length === 0 && formInfo.fields.length === 0) {\n return {\n content: [{ type: \"text\", text: `서식 필드를 찾을 수 없습니다. 일반 문서이거나 서식 패턴이 감지되지 않았습니다.` }],\n isError: true,\n }\n }\n\n const markdown = blocksToMarkdown(fillResult.blocks)\n const summary = [\n `채워진 필드: ${fillResult.filled.length}개`,\n fillResult.unmatched.length > 0 ? `매칭 실패: ${fillResult.unmatched.join(\", \")}` : null,\n formInfo.fields.length > 0 ? `서식 필드: ${formInfo.fields.length}개 (확신도 ${(formInfo.confidence * 100).toFixed(0)}%)` : null,\n ].filter(Boolean).join(\" | \")\n\n if (output_format === \"hwpx\") {\n const hwpxBuffer = await markdownToHwpx(markdown)\n if (output_path) {\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\n writeFileSync(resolve(output_path), Buffer.from(hwpxBuffer))\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\nHWPX 파일 저장: ${resolve(output_path)}` }],\n }\n }\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\n⚠️ output_path를 지정하면 HWPX 파일로 저장됩니다. 미리보기:\\n\\n${markdown}` }],\n }\n }\n\n // markdown\n if (output_path) {\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\n writeFileSync(resolve(output_path), markdown, \"utf-8\")\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\n마크다운 파일 저장: ${resolve(output_path)}\\n\\n${markdown}` }],\n }\n }\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\n${markdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 서버 시작 ───────────────────────────────────────\n\nasync function main() {\n const transport = new StdioServerTransport()\n await server.connect(transport)\n}\n\nmain().catch((err) => { console.error(err); process.exit(1) })\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;AAEA,SAAS,iBAAiB;AAC1B,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAClB,SAAS,cAAc,eAAe,cAAc,UAAU,UAAU,WAAW,UAAU,iBAAiB;AAC9G,SAAS,SAAS,YAAY,SAAS,eAAe;AAStD,IAAM,qBAAqB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAE9E,IAAM,gBAAgB,MAAM,OAAO;AAGnC,SAAS,SAAS,UAA0B;AAC1C,MAAI,CAAC,SAAU,OAAM,IAAI,YAAY,sEAAe;AACpD,QAAM,WAAW,QAAQ,QAAQ;AACjC,MAAI;AACJ,MAAI;AACF,WAAO,aAAa,QAAQ;AAAA,EAC9B,SAAS,KAAU;AACjB,QAAI,KAAK,SAAS,SAAU,OAAM,IAAI,YAAY,oEAAkB,QAAQ,EAAE;AAC9E,QAAI,KAAK,SAAS,YAAY,KAAK,SAAS,QAAS,OAAM,IAAI,YAAY,0EAAmB,QAAQ,EAAE;AACxG,UAAM,IAAI,YAAY,2CAAa,KAAK,QAAQ,SAAS,GAAG;AAAA,EAC9D;AACA,MAAI,CAAC,WAAW,IAAI,EAAG,OAAM,IAAI,YAAY,gEAAc;AAC3D,QAAM,MAAM,QAAQ,IAAI,EAAE,YAAY;AACtC,MAAI,CAAC,mBAAmB,IAAI,GAAG,EAAG,OAAM,IAAI,YAAY,+EAAmB,GAAG,mBAAS,CAAC,GAAG,kBAAkB,EAAE,KAAK,IAAI,CAAC,GAAG;AAC5H,SAAO;AACT;AAGA,IAAM,yBAAyB,KAAK,OAAO;AAG3C,SAAS,kBAAkB,UAAkB,UAAU,eAA0D;AAC/G,QAAM,WAAW,SAAS,QAAQ;AAClC,MAAI;AACJ,MAAI;AACF,eAAW,SAAS,QAAQ,EAAE;AAAA,EAChC,SAAS,KAAU;AACjB,UAAM,IAAI,YAAY,wDAAgB,KAAK,QAAQ,SAAS,MAAM,QAAQ,EAAE;AAAA,EAC9E;AACA,MAAI,WAAW,SAAS;AACtB,UAAM,IAAI,YAAY,wDAAgB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC,oBAAU,UAAU,OAAO,IAAI,KAAK;AAAA,EAC9G;AACA,MAAI;AACJ,MAAI;AACF,UAAM,aAAa,QAAQ;AAAA,EAC7B,SAAS,KAAU;AACjB,UAAM,IAAI,YAAY,2CAAa,KAAK,QAAQ,SAAS,MAAM,QAAQ,EAAE;AAAA,EAC3E;AACA,SAAO,EAAE,QAAQ,cAAc,GAAG,GAAG,SAAS;AAChD;AAGA,SAAS,uBAAuB,UAAmD;AACjF,QAAM,KAAK,SAAS,UAAU,GAAG;AACjC,MAAI;AACF,UAAM,YAAY,OAAO,MAAM,EAAE;AACjC,aAAS,IAAI,WAAW,GAAG,IAAI,CAAC;AAChC,WAAO,aAAa,cAAc,SAAS,CAAC;AAAA,EAC9C,UAAE;AACA,cAAU,EAAE;AAAA,EACd;AACF;AAEA,IAAM,SAAS,IAAI,UAAU;AAAA,EAC3B,MAAM;AAAA,EACN,SAAS;AACX,CAAC;AAID,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,EACvF;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,QAChD,OAAO,UAAU,QAAQ,iBAAO,OAAO,SAAS,KAAK,KAAK;AAAA,QAC1D,OAAO,UAAU,SAAS,uBAAQ,OAAO,SAAS,MAAM,KAAK;AAAA,QAC7D,OAAO,eAAe,uFAA2B;AAAA,MACnD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAG5B,YAAM,QAAkB,CAAC,IAAI,IAAI,GAAG;AAEpC,UAAI,OAAO,WAAW,OAAO,QAAQ,SAAS,GAAG;AAC/C,cAAM,cAAc,OAAO,QAAQ,IAAI,OAAK,GAAG,KAAK,OAAO,EAAE,QAAQ,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,KAAK,IAAI;AAC/F,cAAM,KAAK;AAAA;AAAA,EAAgB,WAAW,EAAE;AAAA,MAC1C;AAEA,UAAI,OAAO,YAAY,OAAO,SAAS,SAAS,GAAG;AACjD,cAAM,WAAW,OAAO,SAAS,IAAI,OAAK,OAAO,EAAE,QAAQ,GAAG,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,IAAI;AACzF,cAAM,KAAK;AAAA;AAAA,EAAa,QAAQ,EAAE;AAAA,MACpC;AAEA,YAAM,KAAK;AAAA;AAAA,EAAO,OAAO,QAAQ,EAAE;AAEnC,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,EAAE,EAAE,CAAC;AAAA,MAClD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,iEAAe;AAAA,EACvD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAC9C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,MAAM,GAAG,CAAC;AAAA,MAC7D;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mHAAyB;AAAA,EACjE;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAE9C,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAGA,YAAM,EAAE,OAAO,IAAI,kBAAkB,WAAW,sBAAsB;AAEtE,UAAI;AAEJ,UAAI,kBAAkB;AACtB,UAAI,WAAW,QAAQ;AACrB,cAAM,EAAE,iBAAAA,iBAAgB,IAAI,MAAM,OAAO,sBAAa;AACtD,cAAM,YAAY,MAAMA,iBAAgB,MAAM;AAC9C,YAAI,cAAc,UAAU,cAAc,OAAQ,mBAAkB;AAAA,MACtE;AACA,cAAQ,iBAAiB;AAAA,QACvB,KAAK;AACH,qBAAW,wBAAwB,OAAO,KAAK,MAAM,CAAC;AACtD;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,wBAAwB,MAAM;AAC/C;AAAA,QACF,KAAK;AACH,cAAI;AACF,kBAAM,EAAE,uBAAuB,IAAI,MAAM,OAAO,sBAAiB;AACjE,uBAAW,MAAM,uBAAuB,MAAM;AAAA,UAChD,QAAQ;AACN,uBAAW;AAAA,UACb;AACA;AAAA,QACF,KAAK;AAAA,QACL,KAAK,QAAQ;AAEX,gBAAM,SAAS,MAAM,MAAM,MAAM;AACjC,qBAAW,OAAO,UAAU,OAAO,WAAW;AAC9C;AAAA,QACF;AAAA,MACF;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,EAAE,QAAQ,GAAG,SAAS,GAAG,MAAM,CAAC,EAAE,CAAC;AAAA,MACpF;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4DAA8B;AAAA,EAClE;AAAA,EACA,OAAO,EAAE,WAAW,MAAM,MAAM;AAC9B,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,QAAQ,EAAE,MAAM,CAAC;AAE5C,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,iBAAO,KAAK;AAAA,QACZ,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,MAClD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,IAAI;AAAA;AAAA,EAAQ,OAAO,QAAQ,GAAG,CAAC;AAAA,MACrE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,SAAS,uFAAsB;AAAA,EACtE;AAAA,EACA,OAAO,EAAE,WAAW,YAAY,MAAM;AACpC,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,cAAc,OAAO,OAAO,OAAO,OAAK,EAAE,SAAS,WAAW,EAAE,KAAK;AAC3E,UAAI,YAAY,WAAW,GAAG;AAC5B,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wEAAiB,CAAC;AAAA,UAClD,SAAS;AAAA,QACX;AAAA,MACF;AAEA,UAAI,eAAe,YAAY,QAAQ;AACrC,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,uDAAe,WAAW,YAAO,YAAY,MAAM,6BAAS,CAAC;AAAA,UAC7F,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,aAAa,YAAY,WAAW;AAC1C,YAAM,gBAAgB,iBAAiB,CAAC,UAAU,CAAC;AAEnD,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wBAAS,WAAW,aAAQ,YAAY,MAAM;AAAA;AAAA,EAAS,aAAa,GAAG,CAAC;AAAA,MAC1G;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,IACzD,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EAC3D;AAAA,EACA,OAAO,EAAE,aAAa,YAAY,MAAM;AACtC,QAAI;AACF,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AACtD,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AAEtD,YAAM,SAAS,MAAM,QAAQ,MAAM,IAAI;AACvC,YAAM,EAAE,OAAO,MAAM,IAAI;AAEzB,YAAM,QAAkB;AAAA,QACtB;AAAA,QACA,iBAAO,MAAM,KAAK,oBAAU,MAAM,OAAO,oBAAU,MAAM,QAAQ,oBAAU,MAAM,SAAS;AAAA,QAC1F;AAAA,MACF;AAEA,iBAAW,KAAK,OAAO;AACrB,cAAM,SAAS,EAAE,SAAS,UAAU,MAAM,EAAE,SAAS,YAAY,MAAM,EAAE,SAAS,aAAa,MAAM;AACrG,cAAM,OAAO,EAAE,OAAO,QAAQ,EAAE,QAAQ,SAAS,EAAE,OAAO,QAAQ,yBAAU,EAAE,QAAQ,QAAQ,yBAAU;AACxG,cAAM,MAAM,EAAE,eAAe,SAAY,MAAM,EAAE,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AACpF,cAAM,KAAK,GAAG,MAAM,IAAI,KAAK,UAAU,GAAG,GAAG,CAAC,GAAG,GAAG,EAAE;AAAA,MACxD;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,IAAI,EAAE,CAAC;AAAA,MACpD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EACzD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO,kBAAkB,OAAO,MAAM;AAC5C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,MAAM,MAAM,CAAC,EAAE,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,IACrF,QAAQ,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAAE,SAAS,4JAAqE;AAAA,IACvH,eAAe,EAAE,KAAK,CAAC,YAAY,QAAQ,eAAe,CAAC,EAAE,QAAQ,eAAe,EAAE,SAAS,uJAAuE;AAAA,IACtK,aAAa,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0LAA8C;AAAA,EAC5F;AAAA,EACA,OAAO,EAAE,WAAW,QAAQ,eAAe,YAAY,MAAM;AAC3D,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAG9C,UAAI,kBAAkB,iBAAiB;AACrC,cAAM,SAAS,aAAa,MAAM;AAClC,YAAI,SAAS,WAAW;AACxB,YAAI,QAAQ;AACV,gBAAM,YAAY,MAAM,gBAAgB,MAAM;AAC9C,mBAAS,cAAc;AAAA,QACzB;AACA,YAAI,CAAC,QAAQ;AACX,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,gHAA0C,MAAM,sEAA8B,CAAC;AAAA,YAC/G,SAAS;AAAA,UACX;AAAA,QACF;AAEA,cAAM,aAAa,MAAM,SAAS,QAAQ,MAAM;AAChD,cAAMC,WAAU;AAAA,UACd,oCAAW,WAAW,OAAO,MAAM;AAAA,UACnC,WAAW,UAAU,SAAS,IAAI,8BAAU,WAAW,UAAU,KAAK,IAAI,CAAC,KAAK;AAAA,QAClF,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,cAAM,aAAa,WAAW,OAAO,IAAI,OAAK,OAAO,EAAE,KAAK,KAAK,EAAE,KAAK,EAAE,EAAE,KAAK,IAAI;AAErF,YAAI,aAAa;AACf,oBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,wBAAc,QAAQ,WAAW,GAAG,OAAO,KAAK,WAAW,MAAM,CAAC;AAClE,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAIA,QAAO;AAAA;AAAA;AAAA,EAAiB,UAAU;AAAA;AAAA,2EAA8B,QAAQ,WAAW,CAAC,GAAG,CAAC;AAAA,UAC9H;AAAA,QACF;AAEA,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAIA,QAAO;AAAA;AAAA;AAAA,EAAiB,UAAU;AAAA;AAAA,oKAAsD,CAAC;AAAA,QAC/H;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,MAAM,MAAM;AACjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,WAAW,kBAAkB,OAAO,MAAM;AAChD,YAAM,aAAa,eAAe,OAAO,QAAQ,MAAM;AAEvD,UAAI,WAAW,OAAO,WAAW,KAAK,SAAS,OAAO,WAAW,GAAG;AAClE,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,qNAAgD,CAAC;AAAA,UACjF,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,WAAW,iBAAiB,WAAW,MAAM;AACnD,YAAM,UAAU;AAAA,QACd,oCAAW,WAAW,OAAO,MAAM;AAAA,QACnC,WAAW,UAAU,SAAS,IAAI,8BAAU,WAAW,UAAU,KAAK,IAAI,CAAC,KAAK;AAAA,QAChF,SAAS,OAAO,SAAS,IAAI,8BAAU,SAAS,OAAO,MAAM,+BAAW,SAAS,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AAAA,MACtH,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,UAAI,kBAAkB,QAAQ;AAC5B,cAAM,aAAa,MAAM,eAAe,QAAQ;AAChD,YAAI,aAAa;AACf,oBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,wBAAc,QAAQ,WAAW,GAAG,OAAO,KAAK,UAAU,CAAC;AAC3D,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,kCAAoB,QAAQ,WAAW,CAAC,GAAG,CAAC;AAAA,UACzF;AAAA,QACF;AACA,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA;AAAA;AAAA,EAAsD,QAAQ,GAAG,CAAC;AAAA,QAC/G;AAAA,MACF;AAGA,UAAI,aAAa;AACf,kBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,sBAAc,QAAQ,WAAW,GAAG,UAAU,OAAO;AACrD,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,sDAAoB,QAAQ,WAAW,CAAC;AAAA;AAAA,EAAO,QAAQ,GAAG,CAAC;AAAA,QACxG;AAAA,MACF;AACA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,EAAQ,QAAQ,GAAG,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,eAAe,OAAO;AACpB,QAAM,YAAY,IAAI,qBAAqB;AAC3C,QAAM,OAAO,QAAQ,SAAS;AAChC;AAEA,KAAK,EAAE,MAAM,CAAC,QAAQ;AAAE,UAAQ,MAAM,GAAG;AAAG,UAAQ,KAAK,CAAC;AAAE,CAAC;","names":["detectZipFormat","summary"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["
|
|
1
|
+
{"version":3,"sources":["/Users/mong-e/workspace/kordoc/dist/page-range-3C7UGGEK.cjs"],"names":[],"mappings":"AAAA;AACE;AACF,wDAA6B;AAC7B;AACE;AACF,0DAAC","file":"/Users/mong-e/workspace/kordoc/dist/page-range-3C7UGGEK.cjs"}
|
|
File without changes
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
var
|
|
9
|
+
var _chunkY476BOHIcjs = require('./chunk-Y476BOHI.cjs');
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -1179,7 +1179,7 @@ async function loadPdfWithTimeout(buffer) {
|
|
|
1179
1179
|
new Promise((_, reject) => {
|
|
1180
1180
|
timer = setTimeout(() => {
|
|
1181
1181
|
loadingTask.destroy();
|
|
1182
|
-
reject(new (0,
|
|
1182
|
+
reject(new (0, _chunkY476BOHIcjs.KordocError)("PDF \uB85C\uB529 \uD0C0\uC784\uC544\uC6C3 (30\uCD08 \uCD08\uACFC)"));
|
|
1183
1183
|
}, PDF_LOAD_TIMEOUT_MS);
|
|
1184
1184
|
})
|
|
1185
1185
|
]);
|
|
@@ -1188,10 +1188,11 @@ async function loadPdfWithTimeout(buffer) {
|
|
|
1188
1188
|
}
|
|
1189
1189
|
}
|
|
1190
1190
|
async function parsePdfDocument(buffer, options) {
|
|
1191
|
+
const formulaBuffer = _optionalChain([options, 'optionalAccess', _10 => _10.formulaOcr]) ? buffer.slice(0) : null;
|
|
1191
1192
|
const doc = await loadPdfWithTimeout(buffer);
|
|
1192
1193
|
try {
|
|
1193
1194
|
const pageCount = doc.numPages;
|
|
1194
|
-
if (pageCount === 0) throw new (0,
|
|
1195
|
+
if (pageCount === 0) throw new (0, _chunkY476BOHIcjs.KordocError)("PDF\uC5D0 \uD398\uC774\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
1195
1196
|
const metadata = { pageCount };
|
|
1196
1197
|
await extractPdfMetadata(doc, metadata);
|
|
1197
1198
|
const blocks = [];
|
|
@@ -1199,7 +1200,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1199
1200
|
let totalChars = 0;
|
|
1200
1201
|
let totalTextBytes = 0;
|
|
1201
1202
|
const effectivePageCount = Math.min(pageCount, MAX_PAGES);
|
|
1202
|
-
const pageFilter = _optionalChain([options, 'optionalAccess',
|
|
1203
|
+
const pageFilter = _optionalChain([options, 'optionalAccess', _11 => _11.pages]) ? _chunkMUOQXDZ4cjs.parsePageRange.call(void 0, options.pages, effectivePageCount) : null;
|
|
1203
1204
|
const totalTarget = pageFilter ? pageFilter.size : effectivePageCount;
|
|
1204
1205
|
const fontSizeFreq = /* @__PURE__ */ new Map();
|
|
1205
1206
|
const pageHeights = /* @__PURE__ */ new Map();
|
|
@@ -1228,19 +1229,19 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1228
1229
|
totalChars += t.replace(/\s/g, "").length;
|
|
1229
1230
|
totalTextBytes += t.length * 2;
|
|
1230
1231
|
}
|
|
1231
|
-
if (totalTextBytes > MAX_TOTAL_TEXT) throw new (0,
|
|
1232
|
+
if (totalTextBytes > MAX_TOTAL_TEXT) throw new (0, _chunkY476BOHIcjs.KordocError)("\uD14D\uC2A4\uD2B8 \uCD94\uCD9C \uD06C\uAE30 \uCD08\uACFC");
|
|
1232
1233
|
parsedPages++;
|
|
1233
|
-
_optionalChain([options, 'optionalAccess',
|
|
1234
|
+
_optionalChain([options, 'optionalAccess', _12 => _12.onProgress, 'optionalCall', _13 => _13(parsedPages, totalTarget)]);
|
|
1234
1235
|
} catch (pageErr) {
|
|
1235
|
-
if (pageErr instanceof
|
|
1236
|
+
if (pageErr instanceof _chunkY476BOHIcjs.KordocError) throw pageErr;
|
|
1236
1237
|
warnings.push({ page: i, message: `\uD398\uC774\uC9C0 ${i} \uD30C\uC2F1 \uC2E4\uD328: ${pageErr instanceof Error ? pageErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
1237
1238
|
}
|
|
1238
1239
|
}
|
|
1239
1240
|
const parsedPageCount = parsedPages || (pageFilter ? pageFilter.size : effectivePageCount);
|
|
1240
1241
|
if (totalChars / Math.max(parsedPageCount, 1) < 10) {
|
|
1241
|
-
if (_optionalChain([options, 'optionalAccess',
|
|
1242
|
+
if (_optionalChain([options, 'optionalAccess', _14 => _14.ocr])) {
|
|
1242
1243
|
try {
|
|
1243
|
-
const { ocrPages } = await Promise.resolve().then(() => _interopRequireWildcard(require("./provider-
|
|
1244
|
+
const { ocrPages } = await Promise.resolve().then(() => _interopRequireWildcard(require("./provider-SNONEZNW.cjs")));
|
|
1244
1245
|
const ocrBlocks = await ocrPages(doc, options.ocr, pageFilter, effectivePageCount);
|
|
1245
1246
|
if (ocrBlocks.length > 0) {
|
|
1246
1247
|
const ocrMarkdown = ocrBlocks.map((b) => b.text || "").filter(Boolean).join("\n\n");
|
|
@@ -1249,21 +1250,31 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1249
1250
|
} catch (e2) {
|
|
1250
1251
|
}
|
|
1251
1252
|
}
|
|
1252
|
-
throw Object.assign(new (0,
|
|
1253
|
+
throw Object.assign(new (0, _chunkY476BOHIcjs.KordocError)(`\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF (${pageCount}\uD398\uC774\uC9C0, ${totalChars}\uC790)`), { isImageBased: true });
|
|
1253
1254
|
}
|
|
1254
|
-
if (_optionalChain([options, 'optionalAccess',
|
|
1255
|
+
if (_optionalChain([options, 'optionalAccess', _15 => _15.removeHeaderFooter]) !== false && parsedPageCount >= 3) {
|
|
1255
1256
|
const removed = removeHeaderFooterBlocks(blocks, pageHeights, warnings);
|
|
1256
1257
|
for (let ri = removed.length - 1; ri >= 0; ri--) {
|
|
1257
1258
|
blocks.splice(removed[ri], 1);
|
|
1258
1259
|
}
|
|
1259
1260
|
}
|
|
1261
|
+
if (_optionalChain([options, 'optionalAccess', _16 => _16.formulaOcr]) && formulaBuffer) {
|
|
1262
|
+
try {
|
|
1263
|
+
await applyFormulaOcr(formulaBuffer, blocks, pageFilter, effectivePageCount, warnings, options.onProgress);
|
|
1264
|
+
} catch (e) {
|
|
1265
|
+
warnings.push({
|
|
1266
|
+
message: `\uC218\uC2DD OCR \uC2E4\uD328: ${e instanceof Error ? e.message : String(e)}`,
|
|
1267
|
+
code: "PARTIAL_PARSE"
|
|
1268
|
+
});
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1260
1271
|
const medianFontSize = computeMedianFontSizeFromFreq(fontSizeFreq);
|
|
1261
1272
|
if (medianFontSize > 0) {
|
|
1262
1273
|
detectHeadings(blocks, medianFontSize);
|
|
1263
1274
|
}
|
|
1264
1275
|
detectMarkerHeadings(blocks);
|
|
1265
1276
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
1266
|
-
let markdown = cleanPdfText(
|
|
1277
|
+
let markdown = cleanPdfText(_chunkY476BOHIcjs.blocksToMarkdown.call(void 0, blocks));
|
|
1267
1278
|
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0 };
|
|
1268
1279
|
} finally {
|
|
1269
1280
|
await doc.destroy().catch(() => {
|
|
@@ -1273,7 +1284,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1273
1284
|
async function extractPdfMetadata(doc, metadata) {
|
|
1274
1285
|
try {
|
|
1275
1286
|
const result = await doc.getMetadata();
|
|
1276
|
-
if (!_optionalChain([result, 'optionalAccess',
|
|
1287
|
+
if (!_optionalChain([result, 'optionalAccess', _17 => _17.info])) return;
|
|
1277
1288
|
const info = result.info;
|
|
1278
1289
|
if (typeof info.Title === "string" && info.Title.trim()) metadata.title = info.Title.trim();
|
|
1279
1290
|
if (typeof info.Author === "string" && info.Author.trim()) metadata.author = info.Author.trim();
|
|
@@ -1336,15 +1347,15 @@ function computeMedianFontSizeFromFreq(freq) {
|
|
|
1336
1347
|
}
|
|
1337
1348
|
function detectHeadings(blocks, medianFontSize) {
|
|
1338
1349
|
for (const block of blocks) {
|
|
1339
|
-
if (block.type !== "paragraph" || !block.text || !_optionalChain([block, 'access',
|
|
1350
|
+
if (block.type !== "paragraph" || !block.text || !_optionalChain([block, 'access', _18 => _18.style, 'optionalAccess', _19 => _19.fontSize])) continue;
|
|
1340
1351
|
const text = block.text.trim();
|
|
1341
1352
|
if (text.length === 0 || text.length > 200) continue;
|
|
1342
1353
|
if (/^\d+$/.test(text)) continue;
|
|
1343
1354
|
const ratio = block.style.fontSize / medianFontSize;
|
|
1344
1355
|
let level = 0;
|
|
1345
|
-
if (ratio >=
|
|
1346
|
-
else if (ratio >=
|
|
1347
|
-
else if (ratio >=
|
|
1356
|
+
if (ratio >= _chunkY476BOHIcjs.HEADING_RATIO_H1) level = 1;
|
|
1357
|
+
else if (ratio >= _chunkY476BOHIcjs.HEADING_RATIO_H2) level = 2;
|
|
1358
|
+
else if (ratio >= _chunkY476BOHIcjs.HEADING_RATIO_H3) level = 3;
|
|
1348
1359
|
if (level > 0) {
|
|
1349
1360
|
block.type = "heading";
|
|
1350
1361
|
block.level = level;
|
|
@@ -1404,7 +1415,7 @@ function detectMarkerHeadings(blocks) {
|
|
|
1404
1415
|
block.level = 4;
|
|
1405
1416
|
continue;
|
|
1406
1417
|
}
|
|
1407
|
-
if (/^[가-힣]{2,6}$/.test(text) && _optionalChain([block, 'access',
|
|
1418
|
+
if (/^[가-힣]{2,6}$/.test(text) && _optionalChain([block, 'access', _20 => _20.style, 'optionalAccess', _21 => _21.fontSize])) {
|
|
1408
1419
|
const prev = blocks[i - 1];
|
|
1409
1420
|
const next = blocks[i + 1];
|
|
1410
1421
|
const prevIsStructural = !prev || prev.type === "table" || prev.type === "heading" || prev.type === "separator";
|
|
@@ -1592,7 +1603,7 @@ function extractBlocksWithGrids(items, pageNum, grids, horizontals, verticals) {
|
|
|
1592
1603
|
}
|
|
1593
1604
|
if (remaining.length > 0) {
|
|
1594
1605
|
const allY = remaining.map((i) => i.y);
|
|
1595
|
-
const pageH =
|
|
1606
|
+
const pageH = _chunkY476BOHIcjs.safeMax.call(void 0, allY) - _chunkY476BOHIcjs.safeMin.call(void 0, allY);
|
|
1596
1607
|
const groups = xyCutOrder(remaining, Math.max(15, pageH * 0.03));
|
|
1597
1608
|
const textBlocks = [];
|
|
1598
1609
|
for (const group of groups) {
|
|
@@ -1680,7 +1691,7 @@ function extractPageBlocksFallback(items, pageNum) {
|
|
|
1680
1691
|
blocks.push({ type: "paragraph", text: tableText, pageNumber: pageNum, bbox, style: dominantStyle(items) });
|
|
1681
1692
|
} else {
|
|
1682
1693
|
const allY = items.map((i) => i.y);
|
|
1683
|
-
const pageHeight =
|
|
1694
|
+
const pageHeight = _chunkY476BOHIcjs.safeMax.call(void 0, allY) - _chunkY476BOHIcjs.safeMin.call(void 0, allY);
|
|
1684
1695
|
const gapThreshold = Math.max(15, pageHeight * 0.03);
|
|
1685
1696
|
const orderedGroups = xyCutOrder(items, gapThreshold);
|
|
1686
1697
|
for (const group of orderedGroups) {
|
|
@@ -1729,7 +1740,7 @@ function dominantStyle(items) {
|
|
|
1729
1740
|
}
|
|
1730
1741
|
}
|
|
1731
1742
|
if (dominantSize === 0) return void 0;
|
|
1732
|
-
const fontName = _optionalChain([items, 'access',
|
|
1743
|
+
const fontName = _optionalChain([items, 'access', _22 => _22.find, 'call', _23 => _23((i) => i.fontSize === dominantSize), 'optionalAccess', _24 => _24.fontName]) || void 0;
|
|
1733
1744
|
return { fontSize: dominantSize, fontName };
|
|
1734
1745
|
}
|
|
1735
1746
|
function normalizeItems(rawItems) {
|
|
@@ -1827,14 +1838,14 @@ function isProseSpread(items) {
|
|
|
1827
1838
|
for (let i = 1; i < sorted.length; i++) {
|
|
1828
1839
|
gaps.push(sorted[i].x - (sorted[i - 1].x + sorted[i - 1].w));
|
|
1829
1840
|
}
|
|
1830
|
-
const maxGap =
|
|
1841
|
+
const maxGap = _chunkY476BOHIcjs.safeMax.call(void 0, gaps);
|
|
1831
1842
|
const avgLen = items.reduce((s, i) => s + i.text.length, 0) / items.length;
|
|
1832
1843
|
return maxGap < 40 && avgLen < 5;
|
|
1833
1844
|
}
|
|
1834
1845
|
function detectColumns(yLines) {
|
|
1835
1846
|
const allItems = yLines.flat();
|
|
1836
1847
|
if (allItems.length === 0) return null;
|
|
1837
|
-
const pageWidth =
|
|
1848
|
+
const pageWidth = _chunkY476BOHIcjs.safeMax.call(void 0, allItems.map((i) => i.x + i.w)) - _chunkY476BOHIcjs.safeMin.call(void 0, allItems.map((i) => i.x));
|
|
1838
1849
|
if (pageWidth < 100) return null;
|
|
1839
1850
|
let bigoLineIdx = -1;
|
|
1840
1851
|
for (let i = 0; i < yLines.length; i++) {
|
|
@@ -2034,7 +2045,7 @@ function buildGridTable(lines, columns) {
|
|
|
2034
2045
|
return md.join("\n");
|
|
2035
2046
|
}
|
|
2036
2047
|
function mergeLineSimple(items) {
|
|
2037
|
-
if (items.length <= 1) return _optionalChain([items, 'access',
|
|
2048
|
+
if (items.length <= 1) return _optionalChain([items, 'access', _25 => _25[0], 'optionalAccess', _26 => _26.text]) || "";
|
|
2038
2049
|
const sorted = [...items].sort((a, b) => a.x - b.x);
|
|
2039
2050
|
const isEvenSpaced = detectEvenSpacedItems(sorted);
|
|
2040
2051
|
let result = sorted[0].text;
|
|
@@ -2071,7 +2082,10 @@ function mergeLineSimple(items) {
|
|
|
2071
2082
|
function cleanPdfText(text) {
|
|
2072
2083
|
return mergeKoreanLines(
|
|
2073
2084
|
text.replace(/^\d{1,4}\n/, "").replace(/^[\s]*[-–—]\s*[-–—]?\d+[-–—]?[\s]*[-–—]?[\s]*$/gm, "").replace(/^\s*\d+\s*\/\s*\d+\s*$/gm, "").replace(/\n\d{1,4}\n/g, "\n").replace(/\n\d{1,4}$/, "").replace(/^#{1,6}\s*\d{1,4}\s*$/gm, "")
|
|
2074
|
-
).replace(/^(?!\| ---).*$/gm, (line) =>
|
|
2085
|
+
).replace(/^(?!\| ---).*$/gm, (line) => {
|
|
2086
|
+
if (/^\s*\${1,2}.+\${1,2}\s*$/.test(line)) return line;
|
|
2087
|
+
return collapseEvenSpacing(line);
|
|
2088
|
+
}).replace(/([□■◆○●▶ㅇ])\s+([가-힣])\s+([가-힣])/g, "$1 $2$3").replace(/\n{3,}/g, "\n\n").trim();
|
|
2075
2089
|
}
|
|
2076
2090
|
function startsWithMarker(line) {
|
|
2077
2091
|
const t = line.trimStart();
|
|
@@ -2194,7 +2208,7 @@ function removeHeaderFooterBlocks(blocks, pageHeights, warnings) {
|
|
|
2194
2208
|
const bottomEntries = [];
|
|
2195
2209
|
for (let bi = 0; bi < blocks.length; bi++) {
|
|
2196
2210
|
const b = blocks[bi];
|
|
2197
|
-
if (!b.bbox || !b.pageNumber || !_optionalChain([b, 'access',
|
|
2211
|
+
if (!b.bbox || !b.pageNumber || !_optionalChain([b, 'access', _27 => _27.text, 'optionalAccess', _28 => _28.trim, 'call', _29 => _29()])) continue;
|
|
2198
2212
|
const ph = pageHeights.get(b.bbox.page) || pageHeights.get(b.pageNumber);
|
|
2199
2213
|
if (!ph) continue;
|
|
2200
2214
|
const blockTop = ph - (b.bbox.y + b.bbox.height);
|
|
@@ -2217,7 +2231,7 @@ function removeHeaderFooterBlocks(blocks, pageHeights, warnings) {
|
|
|
2217
2231
|
}
|
|
2218
2232
|
const repeatedPatterns = /* @__PURE__ */ new Set();
|
|
2219
2233
|
for (const [p, count] of patternCount) {
|
|
2220
|
-
if (count >= MIN_REPEAT && (_nullishCoalesce(_optionalChain([patternPages, 'access',
|
|
2234
|
+
if (count >= MIN_REPEAT && (_nullishCoalesce(_optionalChain([patternPages, 'access', _30 => _30.get, 'call', _31 => _31(p), 'optionalAccess', _32 => _32.size]), () => ( 0))) >= MIN_REPEAT) {
|
|
2221
2235
|
repeatedPatterns.add(p);
|
|
2222
2236
|
}
|
|
2223
2237
|
}
|
|
@@ -2274,9 +2288,127 @@ function mergeKoreanLines(text) {
|
|
|
2274
2288
|
}
|
|
2275
2289
|
return result.join("\n");
|
|
2276
2290
|
}
|
|
2291
|
+
async function applyFormulaOcr(buffer, blocks, pageFilter, effectivePageCount, warnings, _onProgress) {
|
|
2292
|
+
const formulaMod = await Promise.resolve().then(() => _interopRequireWildcard(require("./formula-XGG6ZP42.cjs")));
|
|
2293
|
+
const { FormulaPipeline, ensureFormulaModels } = formulaMod;
|
|
2294
|
+
await ensureFormulaModels((p) => {
|
|
2295
|
+
if (p.phase === "download" && p.total) {
|
|
2296
|
+
const pct = Math.floor(p.downloaded / p.total * 100);
|
|
2297
|
+
process.stderr.write(`\r[kordoc-formula] ${p.spec.name} ${pct}% (${formatMb(p.downloaded)}/${formatMb(p.total)})`);
|
|
2298
|
+
if (p.downloaded >= p.total) process.stderr.write("\n");
|
|
2299
|
+
} else if (p.phase === "verify") {
|
|
2300
|
+
process.stderr.write(`[kordoc-formula] ${p.spec.name} SHA-256 \uAC80\uC99D \uC911...
|
|
2301
|
+
`);
|
|
2302
|
+
} else if (p.phase === "done") {
|
|
2303
|
+
process.stderr.write(`[kordoc-formula] ${p.spec.name} \uC900\uBE44 \uC644\uB8CC
|
|
2304
|
+
`);
|
|
2305
|
+
} else if (p.phase === "skip") {
|
|
2306
|
+
}
|
|
2307
|
+
});
|
|
2308
|
+
const pipeline = await FormulaPipeline.create();
|
|
2309
|
+
try {
|
|
2310
|
+
const pagesResult = await pipeline.runOnBuffer(buffer, pageFilter);
|
|
2311
|
+
if (pagesResult.length === 0) return;
|
|
2312
|
+
let insertedCount = 0;
|
|
2313
|
+
let removedDupCount = 0;
|
|
2314
|
+
for (const page of pagesResult) {
|
|
2315
|
+
const pageNumber = page.pageNumber;
|
|
2316
|
+
const pdfHeight = page.pdfHeight;
|
|
2317
|
+
const scaleX = page.renderedWidth > 0 ? page.pdfWidth / page.renderedWidth : 0.5;
|
|
2318
|
+
const scaleY = page.renderedHeight > 0 ? page.pdfHeight / page.renderedHeight : 0.5;
|
|
2319
|
+
const candidates = [];
|
|
2320
|
+
for (const r of page.regions) {
|
|
2321
|
+
if (!r.latex || !r.latex.trim()) continue;
|
|
2322
|
+
const wrapped = r.kind === "display" ? `$$${r.latex}$$` : `$${r.latex}$`;
|
|
2323
|
+
const x1 = r.bbox.x1 * scaleX;
|
|
2324
|
+
const x2 = r.bbox.x2 * scaleX;
|
|
2325
|
+
const yTop = pdfHeight - r.bbox.y1 * scaleY;
|
|
2326
|
+
const yBottom = pdfHeight - r.bbox.y2 * scaleY;
|
|
2327
|
+
const centerY = (yTop + yBottom) / 2;
|
|
2328
|
+
const width = x2 - x1;
|
|
2329
|
+
const height = yTop - yBottom;
|
|
2330
|
+
candidates.push({
|
|
2331
|
+
block: {
|
|
2332
|
+
type: "paragraph",
|
|
2333
|
+
text: wrapped,
|
|
2334
|
+
pageNumber,
|
|
2335
|
+
bbox: { page: pageNumber, x: x1, y: yBottom, width, height }
|
|
2336
|
+
},
|
|
2337
|
+
pdfBbox: { x1, x2, yTop, yBottom },
|
|
2338
|
+
centerY
|
|
2339
|
+
});
|
|
2340
|
+
}
|
|
2341
|
+
if (candidates.length === 0) continue;
|
|
2342
|
+
const OVERLAP_THRESHOLD = 0.6;
|
|
2343
|
+
const indicesToRemove = /* @__PURE__ */ new Set();
|
|
2344
|
+
for (let i = 0; i < blocks.length; i++) {
|
|
2345
|
+
const b = blocks[i];
|
|
2346
|
+
if (b.pageNumber !== pageNumber) continue;
|
|
2347
|
+
if (b.type === "table") continue;
|
|
2348
|
+
if (!b.bbox || b.bbox.width <= 0 || b.bbox.height <= 0) continue;
|
|
2349
|
+
const blockArea = b.bbox.width * b.bbox.height;
|
|
2350
|
+
if (blockArea <= 0) continue;
|
|
2351
|
+
for (const c of candidates) {
|
|
2352
|
+
const ox1 = Math.max(b.bbox.x, c.pdfBbox.x1);
|
|
2353
|
+
const ox2 = Math.min(b.bbox.x + b.bbox.width, c.pdfBbox.x2);
|
|
2354
|
+
const oy1 = Math.max(b.bbox.y, c.pdfBbox.yBottom);
|
|
2355
|
+
const oy2 = Math.min(b.bbox.y + b.bbox.height, c.pdfBbox.yTop);
|
|
2356
|
+
const interArea = Math.max(0, ox2 - ox1) * Math.max(0, oy2 - oy1);
|
|
2357
|
+
if (interArea / blockArea >= OVERLAP_THRESHOLD) {
|
|
2358
|
+
indicesToRemove.add(i);
|
|
2359
|
+
break;
|
|
2360
|
+
}
|
|
2361
|
+
}
|
|
2362
|
+
}
|
|
2363
|
+
if (indicesToRemove.size > 0) {
|
|
2364
|
+
const sorted = [...indicesToRemove].sort((a, b) => b - a);
|
|
2365
|
+
for (const idx of sorted) blocks.splice(idx, 1);
|
|
2366
|
+
removedDupCount += indicesToRemove.size;
|
|
2367
|
+
}
|
|
2368
|
+
candidates.sort((a, b) => b.centerY - a.centerY);
|
|
2369
|
+
for (const c of candidates) {
|
|
2370
|
+
let insertIdx = -1;
|
|
2371
|
+
let pageFirstIdx = -1;
|
|
2372
|
+
let pageLastIdx = -1;
|
|
2373
|
+
for (let i = 0; i < blocks.length; i++) {
|
|
2374
|
+
const b = blocks[i];
|
|
2375
|
+
if (b.pageNumber !== pageNumber) continue;
|
|
2376
|
+
if (pageFirstIdx === -1) pageFirstIdx = i;
|
|
2377
|
+
pageLastIdx = i;
|
|
2378
|
+
if (!b.bbox) continue;
|
|
2379
|
+
const blockCenter = b.bbox.y + b.bbox.height / 2;
|
|
2380
|
+
if (blockCenter < c.centerY) {
|
|
2381
|
+
insertIdx = i;
|
|
2382
|
+
break;
|
|
2383
|
+
}
|
|
2384
|
+
}
|
|
2385
|
+
if (insertIdx !== -1) {
|
|
2386
|
+
blocks.splice(insertIdx, 0, c.block);
|
|
2387
|
+
} else if (pageLastIdx !== -1) {
|
|
2388
|
+
blocks.splice(pageLastIdx + 1, 0, c.block);
|
|
2389
|
+
} else {
|
|
2390
|
+
blocks.push(c.block);
|
|
2391
|
+
}
|
|
2392
|
+
insertedCount++;
|
|
2393
|
+
}
|
|
2394
|
+
}
|
|
2395
|
+
if (insertedCount > 0 || removedDupCount > 0) {
|
|
2396
|
+
process.stderr.write(
|
|
2397
|
+
`[kordoc-formula] ${insertedCount}\uAC1C \uC218\uC2DD \uC0BD\uC785, ${removedDupCount}\uAC1C \uC911\uBCF5 block \uC81C\uAC70 (${pagesResult.length}\uAC1C \uD398\uC774\uC9C0)
|
|
2398
|
+
`
|
|
2399
|
+
);
|
|
2400
|
+
}
|
|
2401
|
+
} finally {
|
|
2402
|
+
await pipeline.destroy().catch(() => {
|
|
2403
|
+
});
|
|
2404
|
+
}
|
|
2405
|
+
}
|
|
2406
|
+
function formatMb(bytes) {
|
|
2407
|
+
return `${(bytes / 1024 / 1024).toFixed(1)}MB`;
|
|
2408
|
+
}
|
|
2277
2409
|
|
|
2278
2410
|
|
|
2279
2411
|
|
|
2280
2412
|
|
|
2281
2413
|
exports.cleanPdfText = cleanPdfText; exports.extractPdfMetadataOnly = extractPdfMetadataOnly; exports.parsePdfDocument = parsePdfDocument;
|
|
2282
|
-
//# sourceMappingURL=parser-
|
|
2414
|
+
//# sourceMappingURL=parser-7OFQ67QL.cjs.map
|