kordoc 2.8.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +469 -450
- package/dist/{chunk-3QA624ON.js → chunk-M24KMDAR.js} +6 -6
- package/dist/chunk-M24KMDAR.js.map +1 -0
- package/dist/{chunk-5CJGKKMZ.js → chunk-MEPHGCPQ.js} +1 -1
- package/dist/chunk-MEPHGCPQ.js.map +1 -0
- package/dist/chunk-MOL7MDBG.js +0 -0
- package/dist/chunk-MUOQXDZ4.cjs.map +1 -1
- package/dist/{chunk-HXWPJPRO.cjs → chunk-QB7CS534.cjs} +2 -2
- package/dist/chunk-QB7CS534.cjs.map +1 -0
- package/dist/{chunk-DLQY6FJH.js → chunk-RXZLTACX.js} +2 -2
- package/dist/chunk-RXZLTACX.js.map +1 -0
- package/dist/{chunk-XSF3N6GU.js → chunk-SJ5TPMBT.js} +2 -2
- package/dist/chunk-SJ5TPMBT.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/cli.js.map +1 -1
- package/dist/{detect-PJZMUL2Z.js → detect-RI2MQ33K.js} +2 -2
- package/dist/formula-JCNF43NE.js +0 -0
- package/dist/formula-XGG6ZP42.cjs.map +1 -1
- package/dist/index.cjs +99 -99
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +28 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.js +4 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +5 -5
- package/dist/mcp.js.map +1 -1
- package/dist/page-range-3C7UGGEK.cjs.map +1 -1
- package/dist/page-range-737B4EZW.js +0 -0
- package/dist/{parser-LKF6PGPD.cjs → parser-EL5YETUA.cjs} +159 -19
- package/dist/parser-EL5YETUA.cjs.map +1 -0
- package/dist/{parser-ZQQM6J7T.js → parser-OMPBVEFU.js} +146 -6
- package/dist/parser-OMPBVEFU.js.map +1 -0
- package/dist/{parser-UCO6WPUW.js → parser-XBYGROQB.js} +146 -6
- package/dist/parser-XBYGROQB.js.map +1 -0
- package/dist/{provider-WPIYEALY.js → provider-2SEHU2FM.js} +1 -1
- package/dist/provider-2SEHU2FM.js.map +1 -0
- package/dist/{provider-7H4CPZYS.js → provider-AKROB7WQ.js} +1 -1
- package/dist/provider-AKROB7WQ.js.map +1 -0
- package/dist/{provider-YN2SSK4X.cjs → provider-SNONEZNW.cjs} +1 -1
- package/dist/provider-SNONEZNW.cjs.map +1 -0
- package/dist/setup-57FB3LSP.js +0 -0
- package/dist/{watch-MRHNFJPC.js → watch-ULLLK7ID.js} +4 -4
- package/dist/watch-ULLLK7ID.js.map +1 -0
- package/package.json +98 -98
- package/dist/chunk-3QA624ON.js.map +0 -1
- package/dist/chunk-5CJGKKMZ.js.map +0 -1
- package/dist/chunk-DLQY6FJH.js.map +0 -1
- package/dist/chunk-HXWPJPRO.cjs.map +0 -1
- package/dist/chunk-XSF3N6GU.js.map +0 -1
- package/dist/parser-LKF6PGPD.cjs.map +0 -1
- package/dist/parser-UCO6WPUW.js.map +0 -1
- package/dist/parser-ZQQM6J7T.js.map +0 -1
- package/dist/provider-7H4CPZYS.js.map +0 -1
- package/dist/provider-WPIYEALY.js.map +0 -1
- package/dist/provider-YN2SSK4X.cjs.map +0 -1
- package/dist/watch-MRHNFJPC.js.map +0 -1
- /package/dist/{detect-PJZMUL2Z.js.map → detect-RI2MQ33K.js.map} +0 -0
package/dist/mcp.js
CHANGED
|
@@ -8,18 +8,18 @@ import {
|
|
|
8
8
|
fillHwpx,
|
|
9
9
|
markdownToHwpx,
|
|
10
10
|
parse
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-M24KMDAR.js";
|
|
12
12
|
import {
|
|
13
13
|
detectFormat,
|
|
14
14
|
detectZipFormat
|
|
15
|
-
} from "./chunk-
|
|
15
|
+
} from "./chunk-MEPHGCPQ.js";
|
|
16
16
|
import {
|
|
17
17
|
KordocError,
|
|
18
18
|
VERSION,
|
|
19
19
|
blocksToMarkdown,
|
|
20
20
|
sanitizeError,
|
|
21
21
|
toArrayBuffer
|
|
22
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-SJ5TPMBT.js";
|
|
23
23
|
import "./chunk-MOL7MDBG.js";
|
|
24
24
|
|
|
25
25
|
// src/mcp.ts
|
|
@@ -178,7 +178,7 @@ server.tool(
|
|
|
178
178
|
let metadata;
|
|
179
179
|
let effectiveFormat = format;
|
|
180
180
|
if (format === "hwpx") {
|
|
181
|
-
const { detectZipFormat: detectZipFormat2 } = await import("./detect-
|
|
181
|
+
const { detectZipFormat: detectZipFormat2 } = await import("./detect-RI2MQ33K.js");
|
|
182
182
|
const zipFormat = await detectZipFormat2(buffer);
|
|
183
183
|
if (zipFormat === "xlsx" || zipFormat === "docx") effectiveFormat = zipFormat;
|
|
184
184
|
}
|
|
@@ -191,7 +191,7 @@ server.tool(
|
|
|
191
191
|
break;
|
|
192
192
|
case "pdf":
|
|
193
193
|
try {
|
|
194
|
-
const { extractPdfMetadataOnly } = await import("./parser-
|
|
194
|
+
const { extractPdfMetadataOnly } = await import("./parser-XBYGROQB.js");
|
|
195
195
|
metadata = await extractPdfMetadataOnly(buffer);
|
|
196
196
|
} catch {
|
|
197
197
|
metadata = void 0;
|
package/dist/mcp.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/mcp.ts"],"sourcesContent":["/** kordoc MCP 서버 — Claude/Cursor에서 문서 파싱 도구로 사용 */\r\n\r\nimport { McpServer } from \"@modelcontextprotocol/sdk/server/mcp.js\"\r\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\"\r\nimport { z } from \"zod\"\r\nimport { readFileSync, writeFileSync, realpathSync, openSync, readSync, closeSync, statSync, mkdirSync } from \"fs\"\r\nimport { resolve, isAbsolute, extname, dirname } from \"path\"\r\nimport { parse, detectFormat, detectZipFormat, blocksToMarkdown, compare, extractFormFields, fillFormFields, markdownToHwpx, fillHwpx } from \"./index.js\"\r\nimport { VERSION, toArrayBuffer, sanitizeError, KordocError } from \"./utils.js\"\r\nimport { extractHwp5MetadataOnly } from \"./hwp5/parser.js\"\r\nimport { extractHwpxMetadataOnly } from \"./hwpx/parser.js\"\r\n// pdfjs-dist는 optional — dynamic import로 지연 로드\r\n// import { extractPdfMetadataOnly } from \"./pdf/parser.js\"\r\n\r\n/** 허용 파일 확장자 */\r\nconst ALLOWED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\r\n/** 최대 파일 크기 (500MB) */\r\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\r\n\r\n/** 경로 정규화 및 보안 검증 */\r\nfunction safePath(filePath: string): string {\r\n if (!filePath) throw new KordocError(\"파일 경로가 비어있습니다\")\r\n const resolved = resolve(filePath)\r\n let real: string\r\n try {\r\n real = realpathSync(resolved)\r\n } catch (err: any) {\r\n if (err?.code === \"ENOENT\") throw new KordocError(`파일을 찾을 수 없습니다: ${resolved}`)\r\n if (err?.code === \"EACCES\" || err?.code === \"EPERM\") throw new KordocError(`파일 접근 권한이 없습니다: ${resolved}`)\r\n throw new KordocError(`경로 처리 오류 [${err?.code ?? \"UNKNOWN\"}]`)\r\n }\r\n if (!isAbsolute(real)) throw new KordocError(\"절대 경로만 허용됩니다\")\r\n const ext = extname(real).toLowerCase()\r\n if (!ALLOWED_EXTENSIONS.has(ext)) throw new KordocError(`지원하지 않는 확장자입니다: ${ext} (허용: ${[...ALLOWED_EXTENSIONS].join(\", \")})`)\r\n return real\r\n}\r\n\r\n/** 최대 파일 크기 — metadata 전용 (50MB, 전체 파싱보다 보수적) */\r\nconst MAX_METADATA_FILE_SIZE = 50 * 1024 * 1024\r\n\r\n/** 파일 읽기 + 크기 검증 공통 로직 */\r\nfunction readValidatedFile(filePath: string, maxSize = MAX_FILE_SIZE): { buffer: ArrayBuffer; resolved: string } {\r\n const resolved = safePath(filePath)\r\n let fileSize: number\r\n try {\r\n fileSize = statSync(resolved).size\r\n } catch (err: any) {\r\n throw new KordocError(`파일 상태 읽기 실패 [${err?.code ?? \"UNKNOWN\"}]: ${resolved}`)\r\n }\r\n if (fileSize > maxSize) {\r\n throw new KordocError(`파일이 너무 큽니다: ${(fileSize / 1024 / 1024).toFixed(1)}MB (최대 ${maxSize / 1024 / 1024}MB)`)\r\n }\r\n let raw: Buffer\r\n try {\r\n raw = readFileSync(resolved)\r\n } catch (err: any) {\r\n throw new KordocError(`파일 읽기 실패 [${err?.code ?? \"UNKNOWN\"}]: ${resolved}`)\r\n }\r\n return { buffer: toArrayBuffer(raw), resolved }\r\n}\r\n\r\n/** 파일 헤더(16바이트)만 읽어 포맷 감지 — 전체 파일 로드 불필요 */\r\nfunction detectFormatFromHeader(resolved: string): ReturnType<typeof detectFormat> {\r\n const fd = openSync(resolved, \"r\")\r\n try {\r\n const headerBuf = Buffer.alloc(16)\r\n readSync(fd, headerBuf, 0, 16, 0)\r\n return detectFormat(toArrayBuffer(headerBuf))\r\n } finally {\r\n closeSync(fd)\r\n }\r\n}\r\n\r\nconst server = new McpServer({\r\n name: \"kordoc\",\r\n version: VERSION,\r\n})\r\n\r\n// ─── 도구: parse_document ────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_document\",\r\n \"한국 문서 파일(HWP, HWPX, PDF, XLSX, DOCX)을 마크다운으로 변환합니다. 파일 경로를 입력하면 포맷을 자동 감지하여 텍스트를 추출합니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const format = detectFormat(buffer)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const result = await parse(buffer)\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const meta = [\r\n `포맷: ${result.fileType.toUpperCase()}`,\r\n result.pageCount ? `페이지: ${result.pageCount}` : null,\r\n result.metadata?.title ? `제목: ${result.metadata.title}` : null,\r\n result.metadata?.author ? `작성자: ${result.metadata.author}` : null,\r\n result.isImageBased ? \"이미지 기반 PDF (텍스트 추출 불가)\" : null,\r\n ].filter(Boolean).join(\" | \")\r\n\r\n // outline/warnings 부가 정보 추가\r\n const parts: string[] = [`[${meta}]`]\r\n\r\n if (result.outline && result.outline.length > 0) {\r\n const outlineText = result.outline.map(o => `${\" \".repeat(o.level - 1)}- ${o.text}`).join(\"\\n\")\r\n parts.push(`\\n📑 문서 구조:\\n${outlineText}`)\r\n }\r\n\r\n if (result.warnings && result.warnings.length > 0) {\r\n const warnText = result.warnings.map(w => `- [p${w.page || \"?\"}] ${w.message}`).join(\"\\n\")\r\n parts.push(`\\n⚠️ 경고:\\n${warnText}`)\r\n }\r\n\r\n parts.push(`\\n\\n${result.markdown}`)\r\n\r\n return {\r\n content: [{ type: \"text\", text: parts.join(\"\") }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: detect_format ─────────────────────────────\r\n\r\nserver.tool(\r\n \"detect_format\",\r\n \"파일의 포맷을 매직 바이트로 감지합니다 (hwpx, hwp, pdf, unknown).\",\r\n {\r\n file_path: z.string().min(1).describe(\"감지할 파일의 절대 경로\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const resolved = safePath(file_path)\r\n const format = detectFormatFromHeader(resolved)\r\n return {\r\n content: [{ type: \"text\", text: `${file_path}: ${format}` }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_metadata ────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_metadata\",\r\n \"문서의 메타데이터(제목, 작성자, 날짜 등)만 빠르게 추출합니다. 전체 파싱 없이 헤더/매니페스트만 읽습니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"메타데이터를 추출할 문서 파일의 절대 경로\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const resolved = safePath(file_path)\r\n const format = detectFormatFromHeader(resolved)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n // metadata 전용 크기 제한 (50MB)\r\n const { buffer } = readValidatedFile(file_path, MAX_METADATA_FILE_SIZE)\r\n\r\n let metadata\r\n // ZIP 기반 포맷(hwpx)은 내부 구조로 세분화 (XLSX/DOCX 구분)\r\n let effectiveFormat = format\r\n if (format === \"hwpx\") {\r\n const { detectZipFormat } = await import(\"./detect.js\")\r\n const zipFormat = await detectZipFormat(buffer)\r\n if (zipFormat === \"xlsx\" || zipFormat === \"docx\") effectiveFormat = zipFormat as any\r\n }\r\n switch (effectiveFormat) {\r\n case \"hwp\":\r\n metadata = extractHwp5MetadataOnly(Buffer.from(buffer))\r\n break\r\n case \"hwpx\":\r\n metadata = await extractHwpxMetadataOnly(buffer)\r\n break\r\n case \"pdf\":\r\n try {\r\n const { extractPdfMetadataOnly } = await import(\"./pdf/parser.js\")\r\n metadata = await extractPdfMetadataOnly(buffer)\r\n } catch {\r\n metadata = undefined // pdfjs-dist 미설치 시 metadata 생략\r\n }\r\n break\r\n case \"xlsx\":\r\n case \"docx\": {\r\n // XLSX/DOCX는 전용 metadata 추출기가 없으므로 전체 파싱 후 metadata 반환\r\n const result = await parse(buffer)\r\n metadata = result.success ? result.metadata : undefined\r\n break\r\n }\r\n }\r\n\r\n return {\r\n content: [{ type: \"text\", text: JSON.stringify({ format, ...metadata }, null, 2) }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_pages ──────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_pages\",\r\n \"문서의 특정 페이지/섹션 범위만 파싱합니다. PDF는 정확한 페이지, HWP/HWPX는 섹션 단위 근사치입니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\r\n pages: z.string().min(1).describe(\"페이지 범위 (예: '1-3', '1,3,5-7')\"),\r\n },\r\n async ({ file_path, pages }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const format = detectFormat(buffer)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const result = await parse(buffer, { pages })\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const meta = [\r\n `포맷: ${result.fileType.toUpperCase()}`,\r\n `범위: ${pages}`,\r\n result.pageCount ? `페이지: ${result.pageCount}` : null,\r\n ].filter(Boolean).join(\" | \")\r\n\r\n return {\r\n content: [{ type: \"text\", text: `[${meta}]\\n\\n${result.markdown}` }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_table ──────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_table\",\r\n \"문서에서 N번째 테이블만 추출합니다 (0-based index). 테이블이 없거나 인덱스 범위를 초과하면 오류를 반환합니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\r\n table_index: z.number().int().min(0).describe(\"추출할 테이블 인덱스 (0부터 시작)\"),\r\n },\r\n async ({ file_path, table_index }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const format = detectFormat(buffer)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const result = await parse(buffer)\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const tableBlocks = result.blocks.filter(b => b.type === \"table\" && b.table)\r\n if (tableBlocks.length === 0) {\r\n return {\r\n content: [{ type: \"text\", text: `문서에 테이블이 없습니다.` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n if (table_index >= tableBlocks.length) {\r\n return {\r\n content: [{ type: \"text\", text: `테이블 인덱스 초과: ${table_index} (총 ${tableBlocks.length}개 테이블)` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const tableBlock = tableBlocks[table_index]\r\n const tableMarkdown = blocksToMarkdown([tableBlock])\r\n\r\n return {\r\n content: [{ type: \"text\", text: `[테이블 #${table_index} / 총 ${tableBlocks.length}개]\\n\\n${tableMarkdown}` }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: compare_documents ─────────────────────────\r\n\r\nserver.tool(\r\n \"compare_documents\",\r\n \"두 한국 문서 파일을 비교하여 추가/삭제/변경된 블록을 표시합니다. 신구대조표 생성에 활용됩니다. 크로스 포맷(HWP↔HWPX) 비교 가능.\",\r\n {\r\n file_path_a: z.string().min(1).describe(\"비교 원본 문서의 절대 경로\"),\r\n file_path_b: z.string().min(1).describe(\"비교 대상 문서의 절대 경로\"),\r\n },\r\n async ({ file_path_a, file_path_b }) => {\r\n try {\r\n const { buffer: bufA } = readValidatedFile(file_path_a)\r\n const { buffer: bufB } = readValidatedFile(file_path_b)\r\n\r\n const result = await compare(bufA, bufB)\r\n const { stats, diffs } = result\r\n\r\n const lines: string[] = [\r\n `## 문서 비교 결과`,\r\n `추가: ${stats.added} | 삭제: ${stats.removed} | 변경: ${stats.modified} | 동일: ${stats.unchanged}`,\r\n \"\",\r\n ]\r\n\r\n for (const d of diffs) {\r\n const prefix = d.type === \"added\" ? \"+\" : d.type === \"removed\" ? \"-\" : d.type === \"modified\" ? \"~\" : \" \"\r\n const text = d.after?.text || d.before?.text || (d.after?.table ? \"[테이블]\" : d.before?.table ? \"[테이블]\" : \"\")\r\n const sim = d.similarity !== undefined ? ` (${(d.similarity * 100).toFixed(0)}%)` : \"\"\r\n lines.push(`${prefix} ${text.substring(0, 200)}${sim}`)\r\n }\r\n\r\n return {\r\n content: [{ type: \"text\", text: lines.join(\"\\n\") }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_form ───────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_form\",\r\n \"한국 서식 문서에서 레이블-값 쌍을 구조화된 JSON으로 추출합니다. 양식/서식 문서에 최적화.\",\r\n {\r\n file_path: z.string().min(1).describe(\"서식 문서 파일의 절대 경로\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const result = await parse(buffer)\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const form = extractFormFields(result.blocks)\r\n return {\r\n content: [{ type: \"text\", text: JSON.stringify(form, null, 2) }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: fill_form ───────────────────────────────\r\n\r\nserver.tool(\r\n \"fill_form\",\r\n \"한국 서식 문서의 빈칸을 채워서 새 문서로 출력합니다. hwpx-preserve를 사용하면 원본 서식(테두리, 폰트, 병합 등)을 100% 유지합니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"서식 템플릿 문서의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\r\n fields: z.record(z.string(), z.string()).describe(\"채울 필드 맵 (라벨 → 값). 예: {\\\"성명\\\": \\\"홍길동\\\", \\\"전화번호\\\": \\\"010-1234-5678\\\"}\"),\r\n output_format: z.enum([\"markdown\", \"hwpx\", \"hwpx-preserve\"]).default(\"hwpx-preserve\").describe(\"출력 포맷: hwpx-preserve (원본 스타일 보존, HWPX 전용), hwpx (새 HWPX 생성), markdown\"),\r\n output_path: z.string().optional().describe(\"출력 파일 저장 경로 (선택). 지정 시 파일로 저장, 미지정 시 텍스트로 반환\"),\r\n },\r\n async ({ file_path, fields, output_format, output_path }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n\r\n // ─── hwpx-preserve: 원본 ZIP 직접 수정 (스타일 보존) ───\r\n if (output_format === \"hwpx-preserve\") {\r\n const format = detectFormat(buffer)\r\n let isHwpx = format === \"hwpx\"\r\n if (isHwpx) {\r\n const zipFormat = await detectZipFormat(buffer)\r\n isHwpx = zipFormat === \"hwpx\"\r\n }\r\n if (!isHwpx) {\r\n return {\r\n content: [{ type: \"text\", text: `hwpx-preserve는 HWPX 파일만 지원합니다 (감지된 포맷: ${format}). hwpx 또는 markdown을 사용하세요.` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const hwpxResult = await fillHwpx(buffer, fields)\r\n const summary = [\r\n `채워진 필드: ${hwpxResult.filled.length}개 (원본 스타일 보존)`,\r\n hwpxResult.unmatched.length > 0 ? `매칭 실패: ${hwpxResult.unmatched.join(\", \")}` : null,\r\n ].filter(Boolean).join(\" | \")\r\n\r\n const filledList = hwpxResult.filled.map(f => ` - ${f.label}: ${f.value}`).join(\"\\n\")\r\n\r\n if (output_path) {\r\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\r\n writeFileSync(resolve(output_path), Buffer.from(hwpxResult.buffer))\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\n채워진 필드:\\n${filledList}\\n\\nHWPX 파일 저장 (원본 서식 유지): ${resolve(output_path)}` }],\r\n }\r\n }\r\n\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\n채워진 필드:\\n${filledList}\\n\\n⚠️ output_path를 지정하면 원본 서식이 유지된 HWPX 파일로 저장됩니다.` }],\r\n }\r\n }\r\n\r\n // ─── 일반 경로: parse → fill → output ───\r\n const result = await parse(buffer)\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const formInfo = extractFormFields(result.blocks)\r\n const fillResult = fillFormFields(result.blocks, fields)\r\n\r\n if (fillResult.filled.length === 0 && formInfo.fields.length === 0) {\r\n return {\r\n content: [{ type: \"text\", text: `서식 필드를 찾을 수 없습니다. 일반 문서이거나 서식 패턴이 감지되지 않았습니다.` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const markdown = blocksToMarkdown(fillResult.blocks)\r\n const summary = [\r\n `채워진 필드: ${fillResult.filled.length}개`,\r\n fillResult.unmatched.length > 0 ? `매칭 실패: ${fillResult.unmatched.join(\", \")}` : null,\r\n formInfo.fields.length > 0 ? `서식 필드: ${formInfo.fields.length}개 (확신도 ${(formInfo.confidence * 100).toFixed(0)}%)` : null,\r\n ].filter(Boolean).join(\" | \")\r\n\r\n if (output_format === \"hwpx\") {\r\n const hwpxBuffer = await markdownToHwpx(markdown)\r\n if (output_path) {\r\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\r\n writeFileSync(resolve(output_path), Buffer.from(hwpxBuffer))\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\nHWPX 파일 저장: ${resolve(output_path)}` }],\r\n }\r\n }\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\n⚠️ output_path를 지정하면 HWPX 파일로 저장됩니다. 미리보기:\\n\\n${markdown}` }],\r\n }\r\n }\r\n\r\n // markdown\r\n if (output_path) {\r\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\r\n writeFileSync(resolve(output_path), markdown, \"utf-8\")\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\n마크다운 파일 저장: ${resolve(output_path)}\\n\\n${markdown}` }],\r\n }\r\n }\r\n return {\r\n content: [{ type: \"text\", text: `[${summary}]\\n\\n${markdown}` }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 서버 시작 ───────────────────────────────────────\r\n\r\nasync function main() {\r\n const transport = new StdioServerTransport()\r\n await server.connect(transport)\r\n}\r\n\r\nmain().catch((err) => { console.error(err); process.exit(1) })\r\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;AAEA,SAAS,iBAAiB;AAC1B,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAClB,SAAS,cAAc,eAAe,cAAc,UAAU,UAAU,WAAW,UAAU,iBAAiB;AAC9G,SAAS,SAAS,YAAY,SAAS,eAAe;AAStD,IAAM,qBAAqB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAE9E,IAAM,gBAAgB,MAAM,OAAO;AAGnC,SAAS,SAAS,UAA0B;AAC1C,MAAI,CAAC,SAAU,OAAM,IAAI,YAAY,sEAAe;AACpD,QAAM,WAAW,QAAQ,QAAQ;AACjC,MAAI;AACJ,MAAI;AACF,WAAO,aAAa,QAAQ;AAAA,EAC9B,SAAS,KAAU;AACjB,QAAI,KAAK,SAAS,SAAU,OAAM,IAAI,YAAY,oEAAkB,QAAQ,EAAE;AAC9E,QAAI,KAAK,SAAS,YAAY,KAAK,SAAS,QAAS,OAAM,IAAI,YAAY,0EAAmB,QAAQ,EAAE;AACxG,UAAM,IAAI,YAAY,2CAAa,KAAK,QAAQ,SAAS,GAAG;AAAA,EAC9D;AACA,MAAI,CAAC,WAAW,IAAI,EAAG,OAAM,IAAI,YAAY,gEAAc;AAC3D,QAAM,MAAM,QAAQ,IAAI,EAAE,YAAY;AACtC,MAAI,CAAC,mBAAmB,IAAI,GAAG,EAAG,OAAM,IAAI,YAAY,+EAAmB,GAAG,mBAAS,CAAC,GAAG,kBAAkB,EAAE,KAAK,IAAI,CAAC,GAAG;AAC5H,SAAO;AACT;AAGA,IAAM,yBAAyB,KAAK,OAAO;AAG3C,SAAS,kBAAkB,UAAkB,UAAU,eAA0D;AAC/G,QAAM,WAAW,SAAS,QAAQ;AAClC,MAAI;AACJ,MAAI;AACF,eAAW,SAAS,QAAQ,EAAE;AAAA,EAChC,SAAS,KAAU;AACjB,UAAM,IAAI,YAAY,wDAAgB,KAAK,QAAQ,SAAS,MAAM,QAAQ,EAAE;AAAA,EAC9E;AACA,MAAI,WAAW,SAAS;AACtB,UAAM,IAAI,YAAY,wDAAgB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC,oBAAU,UAAU,OAAO,IAAI,KAAK;AAAA,EAC9G;AACA,MAAI;AACJ,MAAI;AACF,UAAM,aAAa,QAAQ;AAAA,EAC7B,SAAS,KAAU;AACjB,UAAM,IAAI,YAAY,2CAAa,KAAK,QAAQ,SAAS,MAAM,QAAQ,EAAE;AAAA,EAC3E;AACA,SAAO,EAAE,QAAQ,cAAc,GAAG,GAAG,SAAS;AAChD;AAGA,SAAS,uBAAuB,UAAmD;AACjF,QAAM,KAAK,SAAS,UAAU,GAAG;AACjC,MAAI;AACF,UAAM,YAAY,OAAO,MAAM,EAAE;AACjC,aAAS,IAAI,WAAW,GAAG,IAAI,CAAC;AAChC,WAAO,aAAa,cAAc,SAAS,CAAC;AAAA,EAC9C,UAAE;AACA,cAAU,EAAE;AAAA,EACd;AACF;AAEA,IAAM,SAAS,IAAI,UAAU;AAAA,EAC3B,MAAM;AAAA,EACN,SAAS;AACX,CAAC;AAID,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,EACvF;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,QAChD,OAAO,UAAU,QAAQ,iBAAO,OAAO,SAAS,KAAK,KAAK;AAAA,QAC1D,OAAO,UAAU,SAAS,uBAAQ,OAAO,SAAS,MAAM,KAAK;AAAA,QAC7D,OAAO,eAAe,uFAA2B;AAAA,MACnD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAG5B,YAAM,QAAkB,CAAC,IAAI,IAAI,GAAG;AAEpC,UAAI,OAAO,WAAW,OAAO,QAAQ,SAAS,GAAG;AAC/C,cAAM,cAAc,OAAO,QAAQ,IAAI,OAAK,GAAG,KAAK,OAAO,EAAE,QAAQ,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,KAAK,IAAI;AAC/F,cAAM,KAAK;AAAA;AAAA,EAAgB,WAAW,EAAE;AAAA,MAC1C;AAEA,UAAI,OAAO,YAAY,OAAO,SAAS,SAAS,GAAG;AACjD,cAAM,WAAW,OAAO,SAAS,IAAI,OAAK,OAAO,EAAE,QAAQ,GAAG,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,IAAI;AACzF,cAAM,KAAK;AAAA;AAAA,EAAa,QAAQ,EAAE;AAAA,MACpC;AAEA,YAAM,KAAK;AAAA;AAAA,EAAO,OAAO,QAAQ,EAAE;AAEnC,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,EAAE,EAAE,CAAC;AAAA,MAClD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,iEAAe;AAAA,EACvD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAC9C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,MAAM,GAAG,CAAC;AAAA,MAC7D;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mHAAyB;AAAA,EACjE;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAE9C,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAGA,YAAM,EAAE,OAAO,IAAI,kBAAkB,WAAW,sBAAsB;AAEtE,UAAI;AAEJ,UAAI,kBAAkB;AACtB,UAAI,WAAW,QAAQ;AACrB,cAAM,EAAE,iBAAAA,iBAAgB,IAAI,MAAM,OAAO,sBAAa;AACtD,cAAM,YAAY,MAAMA,iBAAgB,MAAM;AAC9C,YAAI,cAAc,UAAU,cAAc,OAAQ,mBAAkB;AAAA,MACtE;AACA,cAAQ,iBAAiB;AAAA,QACvB,KAAK;AACH,qBAAW,wBAAwB,OAAO,KAAK,MAAM,CAAC;AACtD;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,wBAAwB,MAAM;AAC/C;AAAA,QACF,KAAK;AACH,cAAI;AACF,kBAAM,EAAE,uBAAuB,IAAI,MAAM,OAAO,sBAAiB;AACjE,uBAAW,MAAM,uBAAuB,MAAM;AAAA,UAChD,QAAQ;AACN,uBAAW;AAAA,UACb;AACA;AAAA,QACF,KAAK;AAAA,QACL,KAAK,QAAQ;AAEX,gBAAM,SAAS,MAAM,MAAM,MAAM;AACjC,qBAAW,OAAO,UAAU,OAAO,WAAW;AAC9C;AAAA,QACF;AAAA,MACF;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,EAAE,QAAQ,GAAG,SAAS,GAAG,MAAM,CAAC,EAAE,CAAC;AAAA,MACpF;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4DAA8B;AAAA,EAClE;AAAA,EACA,OAAO,EAAE,WAAW,MAAM,MAAM;AAC9B,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,QAAQ,EAAE,MAAM,CAAC;AAE5C,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,iBAAO,KAAK;AAAA,QACZ,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,MAClD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,IAAI;AAAA;AAAA,EAAQ,OAAO,QAAQ,GAAG,CAAC;AAAA,MACrE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,SAAS,uFAAsB;AAAA,EACtE;AAAA,EACA,OAAO,EAAE,WAAW,YAAY,MAAM;AACpC,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,cAAc,OAAO,OAAO,OAAO,OAAK,EAAE,SAAS,WAAW,EAAE,KAAK;AAC3E,UAAI,YAAY,WAAW,GAAG;AAC5B,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wEAAiB,CAAC;AAAA,UAClD,SAAS;AAAA,QACX;AAAA,MACF;AAEA,UAAI,eAAe,YAAY,QAAQ;AACrC,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,uDAAe,WAAW,YAAO,YAAY,MAAM,6BAAS,CAAC;AAAA,UAC7F,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,aAAa,YAAY,WAAW;AAC1C,YAAM,gBAAgB,iBAAiB,CAAC,UAAU,CAAC;AAEnD,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wBAAS,WAAW,aAAQ,YAAY,MAAM;AAAA;AAAA,EAAS,aAAa,GAAG,CAAC;AAAA,MAC1G;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,IACzD,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EAC3D;AAAA,EACA,OAAO,EAAE,aAAa,YAAY,MAAM;AACtC,QAAI;AACF,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AACtD,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AAEtD,YAAM,SAAS,MAAM,QAAQ,MAAM,IAAI;AACvC,YAAM,EAAE,OAAO,MAAM,IAAI;AAEzB,YAAM,QAAkB;AAAA,QACtB;AAAA,QACA,iBAAO,MAAM,KAAK,oBAAU,MAAM,OAAO,oBAAU,MAAM,QAAQ,oBAAU,MAAM,SAAS;AAAA,QAC1F;AAAA,MACF;AAEA,iBAAW,KAAK,OAAO;AACrB,cAAM,SAAS,EAAE,SAAS,UAAU,MAAM,EAAE,SAAS,YAAY,MAAM,EAAE,SAAS,aAAa,MAAM;AACrG,cAAM,OAAO,EAAE,OAAO,QAAQ,EAAE,QAAQ,SAAS,EAAE,OAAO,QAAQ,yBAAU,EAAE,QAAQ,QAAQ,yBAAU;AACxG,cAAM,MAAM,EAAE,eAAe,SAAY,MAAM,EAAE,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AACpF,cAAM,KAAK,GAAG,MAAM,IAAI,KAAK,UAAU,GAAG,GAAG,CAAC,GAAG,GAAG,EAAE;AAAA,MACxD;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,IAAI,EAAE,CAAC;AAAA,MACpD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EACzD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO,kBAAkB,OAAO,MAAM;AAC5C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,MAAM,MAAM,CAAC,EAAE,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,IACrF,QAAQ,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAAE,SAAS,4JAAqE;AAAA,IACvH,eAAe,EAAE,KAAK,CAAC,YAAY,QAAQ,eAAe,CAAC,EAAE,QAAQ,eAAe,EAAE,SAAS,uJAAuE;AAAA,IACtK,aAAa,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0LAA8C;AAAA,EAC5F;AAAA,EACA,OAAO,EAAE,WAAW,QAAQ,eAAe,YAAY,MAAM;AAC3D,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAG9C,UAAI,kBAAkB,iBAAiB;AACrC,cAAM,SAAS,aAAa,MAAM;AAClC,YAAI,SAAS,WAAW;AACxB,YAAI,QAAQ;AACV,gBAAM,YAAY,MAAM,gBAAgB,MAAM;AAC9C,mBAAS,cAAc;AAAA,QACzB;AACA,YAAI,CAAC,QAAQ;AACX,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,gHAA0C,MAAM,sEAA8B,CAAC;AAAA,YAC/G,SAAS;AAAA,UACX;AAAA,QACF;AAEA,cAAM,aAAa,MAAM,SAAS,QAAQ,MAAM;AAChD,cAAMC,WAAU;AAAA,UACd,oCAAW,WAAW,OAAO,MAAM;AAAA,UACnC,WAAW,UAAU,SAAS,IAAI,8BAAU,WAAW,UAAU,KAAK,IAAI,CAAC,KAAK;AAAA,QAClF,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,cAAM,aAAa,WAAW,OAAO,IAAI,OAAK,OAAO,EAAE,KAAK,KAAK,EAAE,KAAK,EAAE,EAAE,KAAK,IAAI;AAErF,YAAI,aAAa;AACf,oBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,wBAAc,QAAQ,WAAW,GAAG,OAAO,KAAK,WAAW,MAAM,CAAC;AAClE,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAIA,QAAO;AAAA;AAAA;AAAA,EAAiB,UAAU;AAAA;AAAA,2EAA8B,QAAQ,WAAW,CAAC,GAAG,CAAC;AAAA,UAC9H;AAAA,QACF;AAEA,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAIA,QAAO;AAAA;AAAA;AAAA,EAAiB,UAAU;AAAA;AAAA,oKAAsD,CAAC;AAAA,QAC/H;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,MAAM,MAAM;AACjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,WAAW,kBAAkB,OAAO,MAAM;AAChD,YAAM,aAAa,eAAe,OAAO,QAAQ,MAAM;AAEvD,UAAI,WAAW,OAAO,WAAW,KAAK,SAAS,OAAO,WAAW,GAAG;AAClE,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,qNAAgD,CAAC;AAAA,UACjF,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,WAAW,iBAAiB,WAAW,MAAM;AACnD,YAAM,UAAU;AAAA,QACd,oCAAW,WAAW,OAAO,MAAM;AAAA,QACnC,WAAW,UAAU,SAAS,IAAI,8BAAU,WAAW,UAAU,KAAK,IAAI,CAAC,KAAK;AAAA,QAChF,SAAS,OAAO,SAAS,IAAI,8BAAU,SAAS,OAAO,MAAM,+BAAW,SAAS,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AAAA,MACtH,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,UAAI,kBAAkB,QAAQ;AAC5B,cAAM,aAAa,MAAM,eAAe,QAAQ;AAChD,YAAI,aAAa;AACf,oBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,wBAAc,QAAQ,WAAW,GAAG,OAAO,KAAK,UAAU,CAAC;AAC3D,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,kCAAoB,QAAQ,WAAW,CAAC,GAAG,CAAC;AAAA,UACzF;AAAA,QACF;AACA,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA;AAAA;AAAA,EAAsD,QAAQ,GAAG,CAAC;AAAA,QAC/G;AAAA,MACF;AAGA,UAAI,aAAa;AACf,kBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,sBAAc,QAAQ,WAAW,GAAG,UAAU,OAAO;AACrD,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,sDAAoB,QAAQ,WAAW,CAAC;AAAA;AAAA,EAAO,QAAQ,GAAG,CAAC;AAAA,QACxG;AAAA,MACF;AACA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,EAAQ,QAAQ,GAAG,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,eAAe,OAAO;AACpB,QAAM,YAAY,IAAI,qBAAqB;AAC3C,QAAM,OAAO,QAAQ,SAAS;AAChC;AAEA,KAAK,EAAE,MAAM,CAAC,QAAQ;AAAE,UAAQ,MAAM,GAAG;AAAG,UAAQ,KAAK,CAAC;AAAE,CAAC;","names":["detectZipFormat","summary"]}
|
|
1
|
+
{"version":3,"sources":["../src/mcp.ts"],"sourcesContent":["/** kordoc MCP 서버 — Claude/Cursor에서 문서 파싱 도구로 사용 */\n\nimport { McpServer } from \"@modelcontextprotocol/sdk/server/mcp.js\"\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\"\nimport { z } from \"zod\"\nimport { readFileSync, writeFileSync, realpathSync, openSync, readSync, closeSync, statSync, mkdirSync } from \"fs\"\nimport { resolve, isAbsolute, extname, dirname } from \"path\"\nimport { parse, detectFormat, detectZipFormat, blocksToMarkdown, compare, extractFormFields, fillFormFields, markdownToHwpx, fillHwpx } from \"./index.js\"\nimport { VERSION, toArrayBuffer, sanitizeError, KordocError } from \"./utils.js\"\nimport { extractHwp5MetadataOnly } from \"./hwp5/parser.js\"\nimport { extractHwpxMetadataOnly } from \"./hwpx/parser.js\"\n// pdfjs-dist는 optional — dynamic import로 지연 로드\n// import { extractPdfMetadataOnly } from \"./pdf/parser.js\"\n\n/** 허용 파일 확장자 */\nconst ALLOWED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\n/** 최대 파일 크기 (500MB) */\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\n\n/** 경로 정규화 및 보안 검증 */\nfunction safePath(filePath: string): string {\n if (!filePath) throw new KordocError(\"파일 경로가 비어있습니다\")\n const resolved = resolve(filePath)\n let real: string\n try {\n real = realpathSync(resolved)\n } catch (err: any) {\n if (err?.code === \"ENOENT\") throw new KordocError(`파일을 찾을 수 없습니다: ${resolved}`)\n if (err?.code === \"EACCES\" || err?.code === \"EPERM\") throw new KordocError(`파일 접근 권한이 없습니다: ${resolved}`)\n throw new KordocError(`경로 처리 오류 [${err?.code ?? \"UNKNOWN\"}]`)\n }\n if (!isAbsolute(real)) throw new KordocError(\"절대 경로만 허용됩니다\")\n const ext = extname(real).toLowerCase()\n if (!ALLOWED_EXTENSIONS.has(ext)) throw new KordocError(`지원하지 않는 확장자입니다: ${ext} (허용: ${[...ALLOWED_EXTENSIONS].join(\", \")})`)\n return real\n}\n\n/** 최대 파일 크기 — metadata 전용 (50MB, 전체 파싱보다 보수적) */\nconst MAX_METADATA_FILE_SIZE = 50 * 1024 * 1024\n\n/** 파일 읽기 + 크기 검증 공통 로직 */\nfunction readValidatedFile(filePath: string, maxSize = MAX_FILE_SIZE): { buffer: ArrayBuffer; resolved: string } {\n const resolved = safePath(filePath)\n let fileSize: number\n try {\n fileSize = statSync(resolved).size\n } catch (err: any) {\n throw new KordocError(`파일 상태 읽기 실패 [${err?.code ?? \"UNKNOWN\"}]: ${resolved}`)\n }\n if (fileSize > maxSize) {\n throw new KordocError(`파일이 너무 큽니다: ${(fileSize / 1024 / 1024).toFixed(1)}MB (최대 ${maxSize / 1024 / 1024}MB)`)\n }\n let raw: Buffer\n try {\n raw = readFileSync(resolved)\n } catch (err: any) {\n throw new KordocError(`파일 읽기 실패 [${err?.code ?? \"UNKNOWN\"}]: ${resolved}`)\n }\n return { buffer: toArrayBuffer(raw), resolved }\n}\n\n/** 파일 헤더(16바이트)만 읽어 포맷 감지 — 전체 파일 로드 불필요 */\nfunction detectFormatFromHeader(resolved: string): ReturnType<typeof detectFormat> {\n const fd = openSync(resolved, \"r\")\n try {\n const headerBuf = Buffer.alloc(16)\n readSync(fd, headerBuf, 0, 16, 0)\n return detectFormat(toArrayBuffer(headerBuf))\n } finally {\n closeSync(fd)\n }\n}\n\nconst server = new McpServer({\n name: \"kordoc\",\n version: VERSION,\n})\n\n// ─── 도구: parse_document ────────────────────────────\n\nserver.tool(\n \"parse_document\",\n \"한국 문서 파일(HWP, HWPX, PDF, XLSX, DOCX)을 마크다운으로 변환합니다. 파일 경로를 입력하면 포맷을 자동 감지하여 텍스트를 추출합니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\n },\n async ({ file_path }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const meta = [\n `포맷: ${result.fileType.toUpperCase()}`,\n result.pageCount ? `페이지: ${result.pageCount}` : null,\n result.metadata?.title ? `제목: ${result.metadata.title}` : null,\n result.metadata?.author ? `작성자: ${result.metadata.author}` : null,\n result.isImageBased ? \"이미지 기반 PDF (텍스트 추출 불가)\" : null,\n ].filter(Boolean).join(\" | \")\n\n // outline/warnings 부가 정보 추가\n const parts: string[] = [`[${meta}]`]\n\n if (result.outline && result.outline.length > 0) {\n const outlineText = result.outline.map(o => `${\" \".repeat(o.level - 1)}- ${o.text}`).join(\"\\n\")\n parts.push(`\\n📑 문서 구조:\\n${outlineText}`)\n }\n\n if (result.warnings && result.warnings.length > 0) {\n const warnText = result.warnings.map(w => `- [p${w.page || \"?\"}] ${w.message}`).join(\"\\n\")\n parts.push(`\\n⚠️ 경고:\\n${warnText}`)\n }\n\n parts.push(`\\n\\n${result.markdown}`)\n\n return {\n content: [{ type: \"text\", text: parts.join(\"\") }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: detect_format ─────────────────────────────\n\nserver.tool(\n \"detect_format\",\n \"파일의 포맷을 매직 바이트로 감지합니다 (hwpx, hwp, pdf, unknown).\",\n {\n file_path: z.string().min(1).describe(\"감지할 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const resolved = safePath(file_path)\n const format = detectFormatFromHeader(resolved)\n return {\n content: [{ type: \"text\", text: `${file_path}: ${format}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_metadata ────────────────────────────\n\nserver.tool(\n \"parse_metadata\",\n \"문서의 메타데이터(제목, 작성자, 날짜 등)만 빠르게 추출합니다. 전체 파싱 없이 헤더/매니페스트만 읽습니다.\",\n {\n file_path: z.string().min(1).describe(\"메타데이터를 추출할 문서 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const resolved = safePath(file_path)\n const format = detectFormatFromHeader(resolved)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n // metadata 전용 크기 제한 (50MB)\n const { buffer } = readValidatedFile(file_path, MAX_METADATA_FILE_SIZE)\n\n let metadata\n // ZIP 기반 포맷(hwpx)은 내부 구조로 세분화 (XLSX/DOCX 구분)\n let effectiveFormat = format\n if (format === \"hwpx\") {\n const { detectZipFormat } = await import(\"./detect.js\")\n const zipFormat = await detectZipFormat(buffer)\n if (zipFormat === \"xlsx\" || zipFormat === \"docx\") effectiveFormat = zipFormat as any\n }\n switch (effectiveFormat) {\n case \"hwp\":\n metadata = extractHwp5MetadataOnly(Buffer.from(buffer))\n break\n case \"hwpx\":\n metadata = await extractHwpxMetadataOnly(buffer)\n break\n case \"pdf\":\n try {\n const { extractPdfMetadataOnly } = await import(\"./pdf/parser.js\")\n metadata = await extractPdfMetadataOnly(buffer)\n } catch {\n metadata = undefined // pdfjs-dist 미설치 시 metadata 생략\n }\n break\n case \"xlsx\":\n case \"docx\": {\n // XLSX/DOCX는 전용 metadata 추출기가 없으므로 전체 파싱 후 metadata 반환\n const result = await parse(buffer)\n metadata = result.success ? result.metadata : undefined\n break\n }\n }\n\n return {\n content: [{ type: \"text\", text: JSON.stringify({ format, ...metadata }, null, 2) }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_pages ──────────────────────────────\n\nserver.tool(\n \"parse_pages\",\n \"문서의 특정 페이지/섹션 범위만 파싱합니다. PDF는 정확한 페이지, HWP/HWPX는 섹션 단위 근사치입니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\n pages: z.string().min(1).describe(\"페이지 범위 (예: '1-3', '1,3,5-7')\"),\n },\n async ({ file_path, pages }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer, { pages })\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const meta = [\n `포맷: ${result.fileType.toUpperCase()}`,\n `범위: ${pages}`,\n result.pageCount ? `페이지: ${result.pageCount}` : null,\n ].filter(Boolean).join(\" | \")\n\n return {\n content: [{ type: \"text\", text: `[${meta}]\\n\\n${result.markdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_table ──────────────────────────────\n\nserver.tool(\n \"parse_table\",\n \"문서에서 N번째 테이블만 추출합니다 (0-based index). 테이블이 없거나 인덱스 범위를 초과하면 오류를 반환합니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\n table_index: z.number().int().min(0).describe(\"추출할 테이블 인덱스 (0부터 시작)\"),\n },\n async ({ file_path, table_index }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const tableBlocks = result.blocks.filter(b => b.type === \"table\" && b.table)\n if (tableBlocks.length === 0) {\n return {\n content: [{ type: \"text\", text: `문서에 테이블이 없습니다.` }],\n isError: true,\n }\n }\n\n if (table_index >= tableBlocks.length) {\n return {\n content: [{ type: \"text\", text: `테이블 인덱스 초과: ${table_index} (총 ${tableBlocks.length}개 테이블)` }],\n isError: true,\n }\n }\n\n const tableBlock = tableBlocks[table_index]\n const tableMarkdown = blocksToMarkdown([tableBlock])\n\n return {\n content: [{ type: \"text\", text: `[테이블 #${table_index} / 총 ${tableBlocks.length}개]\\n\\n${tableMarkdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: compare_documents ─────────────────────────\n\nserver.tool(\n \"compare_documents\",\n \"두 한국 문서 파일을 비교하여 추가/삭제/변경된 블록을 표시합니다. 신구대조표 생성에 활용됩니다. 크로스 포맷(HWP↔HWPX) 비교 가능.\",\n {\n file_path_a: z.string().min(1).describe(\"비교 원본 문서의 절대 경로\"),\n file_path_b: z.string().min(1).describe(\"비교 대상 문서의 절대 경로\"),\n },\n async ({ file_path_a, file_path_b }) => {\n try {\n const { buffer: bufA } = readValidatedFile(file_path_a)\n const { buffer: bufB } = readValidatedFile(file_path_b)\n\n const result = await compare(bufA, bufB)\n const { stats, diffs } = result\n\n const lines: string[] = [\n `## 문서 비교 결과`,\n `추가: ${stats.added} | 삭제: ${stats.removed} | 변경: ${stats.modified} | 동일: ${stats.unchanged}`,\n \"\",\n ]\n\n for (const d of diffs) {\n const prefix = d.type === \"added\" ? \"+\" : d.type === \"removed\" ? \"-\" : d.type === \"modified\" ? \"~\" : \" \"\n const text = d.after?.text || d.before?.text || (d.after?.table ? \"[테이블]\" : d.before?.table ? \"[테이블]\" : \"\")\n const sim = d.similarity !== undefined ? ` (${(d.similarity * 100).toFixed(0)}%)` : \"\"\n lines.push(`${prefix} ${text.substring(0, 200)}${sim}`)\n }\n\n return {\n content: [{ type: \"text\", text: lines.join(\"\\n\") }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_form ───────────────────────────────\n\nserver.tool(\n \"parse_form\",\n \"한국 서식 문서에서 레이블-값 쌍을 구조화된 JSON으로 추출합니다. 양식/서식 문서에 최적화.\",\n {\n file_path: z.string().min(1).describe(\"서식 문서 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\n isError: true,\n }\n }\n\n const form = extractFormFields(result.blocks)\n return {\n content: [{ type: \"text\", text: JSON.stringify(form, null, 2) }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: fill_form ───────────────────────────────\n\nserver.tool(\n \"fill_form\",\n \"한국 서식 문서의 빈칸을 채워서 새 문서로 출력합니다. hwpx-preserve를 사용하면 원본 서식(테두리, 폰트, 병합 등)을 100% 유지합니다.\",\n {\n file_path: z.string().min(1).describe(\"서식 템플릿 문서의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\n fields: z.record(z.string(), z.string()).describe(\"채울 필드 맵 (라벨 → 값). 예: {\\\"성명\\\": \\\"홍길동\\\", \\\"전화번호\\\": \\\"010-1234-5678\\\"}\"),\n output_format: z.enum([\"markdown\", \"hwpx\", \"hwpx-preserve\"]).default(\"hwpx-preserve\").describe(\"출력 포맷: hwpx-preserve (원본 스타일 보존, HWPX 전용), hwpx (새 HWPX 생성), markdown\"),\n output_path: z.string().optional().describe(\"출력 파일 저장 경로 (선택). 지정 시 파일로 저장, 미지정 시 텍스트로 반환\"),\n },\n async ({ file_path, fields, output_format, output_path }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n\n // ─── hwpx-preserve: 원본 ZIP 직접 수정 (스타일 보존) ───\n if (output_format === \"hwpx-preserve\") {\n const format = detectFormat(buffer)\n let isHwpx = format === \"hwpx\"\n if (isHwpx) {\n const zipFormat = await detectZipFormat(buffer)\n isHwpx = zipFormat === \"hwpx\"\n }\n if (!isHwpx) {\n return {\n content: [{ type: \"text\", text: `hwpx-preserve는 HWPX 파일만 지원합니다 (감지된 포맷: ${format}). hwpx 또는 markdown을 사용하세요.` }],\n isError: true,\n }\n }\n\n const hwpxResult = await fillHwpx(buffer, fields)\n const summary = [\n `채워진 필드: ${hwpxResult.filled.length}개 (원본 스타일 보존)`,\n hwpxResult.unmatched.length > 0 ? `매칭 실패: ${hwpxResult.unmatched.join(\", \")}` : null,\n ].filter(Boolean).join(\" | \")\n\n const filledList = hwpxResult.filled.map(f => ` - ${f.label}: ${f.value}`).join(\"\\n\")\n\n if (output_path) {\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\n writeFileSync(resolve(output_path), Buffer.from(hwpxResult.buffer))\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\n채워진 필드:\\n${filledList}\\n\\nHWPX 파일 저장 (원본 서식 유지): ${resolve(output_path)}` }],\n }\n }\n\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\n채워진 필드:\\n${filledList}\\n\\n⚠️ output_path를 지정하면 원본 서식이 유지된 HWPX 파일로 저장됩니다.` }],\n }\n }\n\n // ─── 일반 경로: parse → fill → output ───\n const result = await parse(buffer)\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\n isError: true,\n }\n }\n\n const formInfo = extractFormFields(result.blocks)\n const fillResult = fillFormFields(result.blocks, fields)\n\n if (fillResult.filled.length === 0 && formInfo.fields.length === 0) {\n return {\n content: [{ type: \"text\", text: `서식 필드를 찾을 수 없습니다. 일반 문서이거나 서식 패턴이 감지되지 않았습니다.` }],\n isError: true,\n }\n }\n\n const markdown = blocksToMarkdown(fillResult.blocks)\n const summary = [\n `채워진 필드: ${fillResult.filled.length}개`,\n fillResult.unmatched.length > 0 ? `매칭 실패: ${fillResult.unmatched.join(\", \")}` : null,\n formInfo.fields.length > 0 ? `서식 필드: ${formInfo.fields.length}개 (확신도 ${(formInfo.confidence * 100).toFixed(0)}%)` : null,\n ].filter(Boolean).join(\" | \")\n\n if (output_format === \"hwpx\") {\n const hwpxBuffer = await markdownToHwpx(markdown)\n if (output_path) {\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\n writeFileSync(resolve(output_path), Buffer.from(hwpxBuffer))\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\nHWPX 파일 저장: ${resolve(output_path)}` }],\n }\n }\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\n⚠️ output_path를 지정하면 HWPX 파일로 저장됩니다. 미리보기:\\n\\n${markdown}` }],\n }\n }\n\n // markdown\n if (output_path) {\n mkdirSync(dirname(resolve(output_path)), { recursive: true })\n writeFileSync(resolve(output_path), markdown, \"utf-8\")\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\n마크다운 파일 저장: ${resolve(output_path)}\\n\\n${markdown}` }],\n }\n }\n return {\n content: [{ type: \"text\", text: `[${summary}]\\n\\n${markdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 서버 시작 ───────────────────────────────────────\n\nasync function main() {\n const transport = new StdioServerTransport()\n await server.connect(transport)\n}\n\nmain().catch((err) => { console.error(err); process.exit(1) })\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;AAEA,SAAS,iBAAiB;AAC1B,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAClB,SAAS,cAAc,eAAe,cAAc,UAAU,UAAU,WAAW,UAAU,iBAAiB;AAC9G,SAAS,SAAS,YAAY,SAAS,eAAe;AAStD,IAAM,qBAAqB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAE9E,IAAM,gBAAgB,MAAM,OAAO;AAGnC,SAAS,SAAS,UAA0B;AAC1C,MAAI,CAAC,SAAU,OAAM,IAAI,YAAY,sEAAe;AACpD,QAAM,WAAW,QAAQ,QAAQ;AACjC,MAAI;AACJ,MAAI;AACF,WAAO,aAAa,QAAQ;AAAA,EAC9B,SAAS,KAAU;AACjB,QAAI,KAAK,SAAS,SAAU,OAAM,IAAI,YAAY,oEAAkB,QAAQ,EAAE;AAC9E,QAAI,KAAK,SAAS,YAAY,KAAK,SAAS,QAAS,OAAM,IAAI,YAAY,0EAAmB,QAAQ,EAAE;AACxG,UAAM,IAAI,YAAY,2CAAa,KAAK,QAAQ,SAAS,GAAG;AAAA,EAC9D;AACA,MAAI,CAAC,WAAW,IAAI,EAAG,OAAM,IAAI,YAAY,gEAAc;AAC3D,QAAM,MAAM,QAAQ,IAAI,EAAE,YAAY;AACtC,MAAI,CAAC,mBAAmB,IAAI,GAAG,EAAG,OAAM,IAAI,YAAY,+EAAmB,GAAG,mBAAS,CAAC,GAAG,kBAAkB,EAAE,KAAK,IAAI,CAAC,GAAG;AAC5H,SAAO;AACT;AAGA,IAAM,yBAAyB,KAAK,OAAO;AAG3C,SAAS,kBAAkB,UAAkB,UAAU,eAA0D;AAC/G,QAAM,WAAW,SAAS,QAAQ;AAClC,MAAI;AACJ,MAAI;AACF,eAAW,SAAS,QAAQ,EAAE;AAAA,EAChC,SAAS,KAAU;AACjB,UAAM,IAAI,YAAY,wDAAgB,KAAK,QAAQ,SAAS,MAAM,QAAQ,EAAE;AAAA,EAC9E;AACA,MAAI,WAAW,SAAS;AACtB,UAAM,IAAI,YAAY,wDAAgB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC,oBAAU,UAAU,OAAO,IAAI,KAAK;AAAA,EAC9G;AACA,MAAI;AACJ,MAAI;AACF,UAAM,aAAa,QAAQ;AAAA,EAC7B,SAAS,KAAU;AACjB,UAAM,IAAI,YAAY,2CAAa,KAAK,QAAQ,SAAS,MAAM,QAAQ,EAAE;AAAA,EAC3E;AACA,SAAO,EAAE,QAAQ,cAAc,GAAG,GAAG,SAAS;AAChD;AAGA,SAAS,uBAAuB,UAAmD;AACjF,QAAM,KAAK,SAAS,UAAU,GAAG;AACjC,MAAI;AACF,UAAM,YAAY,OAAO,MAAM,EAAE;AACjC,aAAS,IAAI,WAAW,GAAG,IAAI,CAAC;AAChC,WAAO,aAAa,cAAc,SAAS,CAAC;AAAA,EAC9C,UAAE;AACA,cAAU,EAAE;AAAA,EACd;AACF;AAEA,IAAM,SAAS,IAAI,UAAU;AAAA,EAC3B,MAAM;AAAA,EACN,SAAS;AACX,CAAC;AAID,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,EACvF;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,QAChD,OAAO,UAAU,QAAQ,iBAAO,OAAO,SAAS,KAAK,KAAK;AAAA,QAC1D,OAAO,UAAU,SAAS,uBAAQ,OAAO,SAAS,MAAM,KAAK;AAAA,QAC7D,OAAO,eAAe,uFAA2B;AAAA,MACnD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAG5B,YAAM,QAAkB,CAAC,IAAI,IAAI,GAAG;AAEpC,UAAI,OAAO,WAAW,OAAO,QAAQ,SAAS,GAAG;AAC/C,cAAM,cAAc,OAAO,QAAQ,IAAI,OAAK,GAAG,KAAK,OAAO,EAAE,QAAQ,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,KAAK,IAAI;AAC/F,cAAM,KAAK;AAAA;AAAA,EAAgB,WAAW,EAAE;AAAA,MAC1C;AAEA,UAAI,OAAO,YAAY,OAAO,SAAS,SAAS,GAAG;AACjD,cAAM,WAAW,OAAO,SAAS,IAAI,OAAK,OAAO,EAAE,QAAQ,GAAG,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,IAAI;AACzF,cAAM,KAAK;AAAA;AAAA,EAAa,QAAQ,EAAE;AAAA,MACpC;AAEA,YAAM,KAAK;AAAA;AAAA,EAAO,OAAO,QAAQ,EAAE;AAEnC,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,EAAE,EAAE,CAAC;AAAA,MAClD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,iEAAe;AAAA,EACvD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAC9C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,MAAM,GAAG,CAAC;AAAA,MAC7D;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mHAAyB;AAAA,EACjE;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAE9C,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAGA,YAAM,EAAE,OAAO,IAAI,kBAAkB,WAAW,sBAAsB;AAEtE,UAAI;AAEJ,UAAI,kBAAkB;AACtB,UAAI,WAAW,QAAQ;AACrB,cAAM,EAAE,iBAAAA,iBAAgB,IAAI,MAAM,OAAO,sBAAa;AACtD,cAAM,YAAY,MAAMA,iBAAgB,MAAM;AAC9C,YAAI,cAAc,UAAU,cAAc,OAAQ,mBAAkB;AAAA,MACtE;AACA,cAAQ,iBAAiB;AAAA,QACvB,KAAK;AACH,qBAAW,wBAAwB,OAAO,KAAK,MAAM,CAAC;AACtD;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,wBAAwB,MAAM;AAC/C;AAAA,QACF,KAAK;AACH,cAAI;AACF,kBAAM,EAAE,uBAAuB,IAAI,MAAM,OAAO,sBAAiB;AACjE,uBAAW,MAAM,uBAAuB,MAAM;AAAA,UAChD,QAAQ;AACN,uBAAW;AAAA,UACb;AACA;AAAA,QACF,KAAK;AAAA,QACL,KAAK,QAAQ;AAEX,gBAAM,SAAS,MAAM,MAAM,MAAM;AACjC,qBAAW,OAAO,UAAU,OAAO,WAAW;AAC9C;AAAA,QACF;AAAA,MACF;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,EAAE,QAAQ,GAAG,SAAS,GAAG,MAAM,CAAC,EAAE,CAAC;AAAA,MACpF;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4DAA8B;AAAA,EAClE;AAAA,EACA,OAAO,EAAE,WAAW,MAAM,MAAM;AAC9B,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,QAAQ,EAAE,MAAM,CAAC;AAE5C,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,iBAAO,KAAK;AAAA,QACZ,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,MAClD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,IAAI;AAAA;AAAA,EAAQ,OAAO,QAAQ,GAAG,CAAC;AAAA,MACrE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,SAAS,uFAAsB;AAAA,EACtE;AAAA,EACA,OAAO,EAAE,WAAW,YAAY,MAAM;AACpC,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,cAAc,OAAO,OAAO,OAAO,OAAK,EAAE,SAAS,WAAW,EAAE,KAAK;AAC3E,UAAI,YAAY,WAAW,GAAG;AAC5B,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wEAAiB,CAAC;AAAA,UAClD,SAAS;AAAA,QACX;AAAA,MACF;AAEA,UAAI,eAAe,YAAY,QAAQ;AACrC,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,uDAAe,WAAW,YAAO,YAAY,MAAM,6BAAS,CAAC;AAAA,UAC7F,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,aAAa,YAAY,WAAW;AAC1C,YAAM,gBAAgB,iBAAiB,CAAC,UAAU,CAAC;AAEnD,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wBAAS,WAAW,aAAQ,YAAY,MAAM;AAAA;AAAA,EAAS,aAAa,GAAG,CAAC;AAAA,MAC1G;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,IACzD,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EAC3D;AAAA,EACA,OAAO,EAAE,aAAa,YAAY,MAAM;AACtC,QAAI;AACF,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AACtD,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AAEtD,YAAM,SAAS,MAAM,QAAQ,MAAM,IAAI;AACvC,YAAM,EAAE,OAAO,MAAM,IAAI;AAEzB,YAAM,QAAkB;AAAA,QACtB;AAAA,QACA,iBAAO,MAAM,KAAK,oBAAU,MAAM,OAAO,oBAAU,MAAM,QAAQ,oBAAU,MAAM,SAAS;AAAA,QAC1F;AAAA,MACF;AAEA,iBAAW,KAAK,OAAO;AACrB,cAAM,SAAS,EAAE,SAAS,UAAU,MAAM,EAAE,SAAS,YAAY,MAAM,EAAE,SAAS,aAAa,MAAM;AACrG,cAAM,OAAO,EAAE,OAAO,QAAQ,EAAE,QAAQ,SAAS,EAAE,OAAO,QAAQ,yBAAU,EAAE,QAAQ,QAAQ,yBAAU;AACxG,cAAM,MAAM,EAAE,eAAe,SAAY,MAAM,EAAE,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AACpF,cAAM,KAAK,GAAG,MAAM,IAAI,KAAK,UAAU,GAAG,GAAG,CAAC,GAAG,GAAG,EAAE;AAAA,MACxD;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,IAAI,EAAE,CAAC;AAAA,MACpD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EACzD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO,kBAAkB,OAAO,MAAM;AAC5C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,MAAM,MAAM,CAAC,EAAE,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,IACrF,QAAQ,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC,EAAE,SAAS,4JAAqE;AAAA,IACvH,eAAe,EAAE,KAAK,CAAC,YAAY,QAAQ,eAAe,CAAC,EAAE,QAAQ,eAAe,EAAE,SAAS,uJAAuE;AAAA,IACtK,aAAa,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0LAA8C;AAAA,EAC5F;AAAA,EACA,OAAO,EAAE,WAAW,QAAQ,eAAe,YAAY,MAAM;AAC3D,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAG9C,UAAI,kBAAkB,iBAAiB;AACrC,cAAM,SAAS,aAAa,MAAM;AAClC,YAAI,SAAS,WAAW;AACxB,YAAI,QAAQ;AACV,gBAAM,YAAY,MAAM,gBAAgB,MAAM;AAC9C,mBAAS,cAAc;AAAA,QACzB;AACA,YAAI,CAAC,QAAQ;AACX,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,gHAA0C,MAAM,sEAA8B,CAAC;AAAA,YAC/G,SAAS;AAAA,UACX;AAAA,QACF;AAEA,cAAM,aAAa,MAAM,SAAS,QAAQ,MAAM;AAChD,cAAMC,WAAU;AAAA,UACd,oCAAW,WAAW,OAAO,MAAM;AAAA,UACnC,WAAW,UAAU,SAAS,IAAI,8BAAU,WAAW,UAAU,KAAK,IAAI,CAAC,KAAK;AAAA,QAClF,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,cAAM,aAAa,WAAW,OAAO,IAAI,OAAK,OAAO,EAAE,KAAK,KAAK,EAAE,KAAK,EAAE,EAAE,KAAK,IAAI;AAErF,YAAI,aAAa;AACf,oBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,wBAAc,QAAQ,WAAW,GAAG,OAAO,KAAK,WAAW,MAAM,CAAC;AAClE,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAIA,QAAO;AAAA;AAAA;AAAA,EAAiB,UAAU;AAAA;AAAA,2EAA8B,QAAQ,WAAW,CAAC,GAAG,CAAC;AAAA,UAC9H;AAAA,QACF;AAEA,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAIA,QAAO;AAAA;AAAA;AAAA,EAAiB,UAAU;AAAA;AAAA,oKAAsD,CAAC;AAAA,QAC/H;AAAA,MACF;AAGA,YAAM,SAAS,MAAM,MAAM,MAAM;AACjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,WAAW,kBAAkB,OAAO,MAAM;AAChD,YAAM,aAAa,eAAe,OAAO,QAAQ,MAAM;AAEvD,UAAI,WAAW,OAAO,WAAW,KAAK,SAAS,OAAO,WAAW,GAAG;AAClE,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,qNAAgD,CAAC;AAAA,UACjF,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,WAAW,iBAAiB,WAAW,MAAM;AACnD,YAAM,UAAU;AAAA,QACd,oCAAW,WAAW,OAAO,MAAM;AAAA,QACnC,WAAW,UAAU,SAAS,IAAI,8BAAU,WAAW,UAAU,KAAK,IAAI,CAAC,KAAK;AAAA,QAChF,SAAS,OAAO,SAAS,IAAI,8BAAU,SAAS,OAAO,MAAM,+BAAW,SAAS,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AAAA,MACtH,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,UAAI,kBAAkB,QAAQ;AAC5B,cAAM,aAAa,MAAM,eAAe,QAAQ;AAChD,YAAI,aAAa;AACf,oBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,wBAAc,QAAQ,WAAW,GAAG,OAAO,KAAK,UAAU,CAAC;AAC3D,iBAAO;AAAA,YACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,kCAAoB,QAAQ,WAAW,CAAC,GAAG,CAAC;AAAA,UACzF;AAAA,QACF;AACA,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA;AAAA;AAAA,EAAsD,QAAQ,GAAG,CAAC;AAAA,QAC/G;AAAA,MACF;AAGA,UAAI,aAAa;AACf,kBAAU,QAAQ,QAAQ,WAAW,CAAC,GAAG,EAAE,WAAW,KAAK,CAAC;AAC5D,sBAAc,QAAQ,WAAW,GAAG,UAAU,OAAO;AACrD,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,sDAAoB,QAAQ,WAAW,CAAC;AAAA;AAAA,EAAO,QAAQ,GAAG,CAAC;AAAA,QACxG;AAAA,MACF;AACA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,OAAO;AAAA;AAAA,EAAQ,QAAQ,GAAG,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,eAAe,OAAO;AACpB,QAAM,YAAY,IAAI,qBAAqB;AAC3C,QAAM,OAAO,QAAQ,SAAS;AAChC;AAEA,KAAK,EAAE,MAAM,CAAC,QAAQ;AAAE,UAAQ,MAAM,GAAG;AAAG,UAAQ,KAAK,CAAC;AAAE,CAAC;","names":["detectZipFormat","summary"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["
|
|
1
|
+
{"version":3,"sources":["/Users/mong-e/workspace/kordoc/dist/page-range-3C7UGGEK.cjs"],"names":[],"mappings":"AAAA;AACE;AACF,wDAA6B;AAC7B;AACE;AACF,0DAAC","file":"/Users/mong-e/workspace/kordoc/dist/page-range-3C7UGGEK.cjs"}
|
|
File without changes
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
var
|
|
9
|
+
var _chunkQB7CS534cjs = require('./chunk-QB7CS534.cjs');
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
var _chunkMUOQXDZ4cjs = require('./chunk-MUOQXDZ4.cjs');
|
|
@@ -1142,6 +1142,120 @@ function buildClusterTable(rows, columns, pageNum) {
|
|
|
1142
1142
|
};
|
|
1143
1143
|
}
|
|
1144
1144
|
|
|
1145
|
+
// src/pdf/quality.ts
|
|
1146
|
+
function computePageQuality(page, text) {
|
|
1147
|
+
let total = 0;
|
|
1148
|
+
let hangul = 0;
|
|
1149
|
+
let control = 0;
|
|
1150
|
+
let replacement = 0;
|
|
1151
|
+
let pua = 0;
|
|
1152
|
+
for (let i = 0; i < text.length; i++) {
|
|
1153
|
+
const code = text.charCodeAt(i);
|
|
1154
|
+
if (code === 32 || code === 9 || code === 10 || code === 13) continue;
|
|
1155
|
+
total++;
|
|
1156
|
+
if (code < 32 || code === 127 || code >= 128 && code <= 159) {
|
|
1157
|
+
control++;
|
|
1158
|
+
continue;
|
|
1159
|
+
}
|
|
1160
|
+
if (code === 65533) {
|
|
1161
|
+
replacement++;
|
|
1162
|
+
continue;
|
|
1163
|
+
}
|
|
1164
|
+
if (code >= 44032 && code <= 55203) {
|
|
1165
|
+
hangul++;
|
|
1166
|
+
continue;
|
|
1167
|
+
}
|
|
1168
|
+
if (code >= 57344 && code <= 63743 || code >= 56192 && code <= 56319) {
|
|
1169
|
+
pua++;
|
|
1170
|
+
continue;
|
|
1171
|
+
}
|
|
1172
|
+
}
|
|
1173
|
+
const denom = total || 1;
|
|
1174
|
+
const puaRatio = pua / denom;
|
|
1175
|
+
const controlCharRatio = control / denom;
|
|
1176
|
+
const replacementCharRatio = replacement / denom;
|
|
1177
|
+
let needsOcr = false;
|
|
1178
|
+
let ocrReason;
|
|
1179
|
+
if (total < LOW_TEXT_THRESHOLD) {
|
|
1180
|
+
needsOcr = true;
|
|
1181
|
+
ocrReason = "low_text";
|
|
1182
|
+
} else if (puaRatio >= HIGH_PUA_THRESHOLD) {
|
|
1183
|
+
needsOcr = true;
|
|
1184
|
+
ocrReason = "high_pua";
|
|
1185
|
+
} else if (controlCharRatio >= HIGH_CONTROL_THRESHOLD) {
|
|
1186
|
+
needsOcr = true;
|
|
1187
|
+
ocrReason = "high_control";
|
|
1188
|
+
} else if (replacementCharRatio >= HIGH_REPLACEMENT_THRESHOLD) {
|
|
1189
|
+
needsOcr = true;
|
|
1190
|
+
ocrReason = "high_replacement";
|
|
1191
|
+
}
|
|
1192
|
+
return {
|
|
1193
|
+
page,
|
|
1194
|
+
textChars: total,
|
|
1195
|
+
hangulRatio: hangul / denom,
|
|
1196
|
+
controlCharRatio,
|
|
1197
|
+
replacementCharRatio,
|
|
1198
|
+
puaRatio,
|
|
1199
|
+
needsOcr,
|
|
1200
|
+
ocrReason
|
|
1201
|
+
};
|
|
1202
|
+
}
|
|
1203
|
+
var LOW_TEXT_THRESHOLD = 20;
|
|
1204
|
+
var HIGH_PUA_THRESHOLD = 0.2;
|
|
1205
|
+
var HIGH_CONTROL_THRESHOLD = 0.05;
|
|
1206
|
+
var HIGH_REPLACEMENT_THRESHOLD = 0.05;
|
|
1207
|
+
var DOC_NEEDS_OCR_PAGE_RATIO = 0.3;
|
|
1208
|
+
function stripControlChars(text) {
|
|
1209
|
+
return text.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x80-\x9F]/g, "");
|
|
1210
|
+
}
|
|
1211
|
+
function summarizeDocumentQuality(pages) {
|
|
1212
|
+
if (pages.length === 0) {
|
|
1213
|
+
return {
|
|
1214
|
+
totalPages: 0,
|
|
1215
|
+
totalTextChars: 0,
|
|
1216
|
+
avgHangulRatio: 0,
|
|
1217
|
+
avgControlCharRatio: 0,
|
|
1218
|
+
avgReplacementCharRatio: 0,
|
|
1219
|
+
avgPuaRatio: 0,
|
|
1220
|
+
lowTextPageCount: 0,
|
|
1221
|
+
highPuaPageCount: 0,
|
|
1222
|
+
needsOcr: false,
|
|
1223
|
+
ocrCandidatePages: []
|
|
1224
|
+
};
|
|
1225
|
+
}
|
|
1226
|
+
let textChars = 0;
|
|
1227
|
+
let hangul = 0;
|
|
1228
|
+
let control = 0;
|
|
1229
|
+
let replacement = 0;
|
|
1230
|
+
let pua = 0;
|
|
1231
|
+
let lowText = 0;
|
|
1232
|
+
let highPua = 0;
|
|
1233
|
+
const ocrCandidatePages = [];
|
|
1234
|
+
for (const p of pages) {
|
|
1235
|
+
textChars += p.textChars;
|
|
1236
|
+
hangul += p.hangulRatio;
|
|
1237
|
+
control += p.controlCharRatio;
|
|
1238
|
+
replacement += p.replacementCharRatio;
|
|
1239
|
+
pua += p.puaRatio;
|
|
1240
|
+
if (p.textChars < LOW_TEXT_THRESHOLD) lowText++;
|
|
1241
|
+
if (p.puaRatio >= HIGH_PUA_THRESHOLD) highPua++;
|
|
1242
|
+
if (p.needsOcr) ocrCandidatePages.push(p.page);
|
|
1243
|
+
}
|
|
1244
|
+
const n = pages.length;
|
|
1245
|
+
return {
|
|
1246
|
+
totalPages: n,
|
|
1247
|
+
totalTextChars: textChars,
|
|
1248
|
+
avgHangulRatio: hangul / n,
|
|
1249
|
+
avgControlCharRatio: control / n,
|
|
1250
|
+
avgReplacementCharRatio: replacement / n,
|
|
1251
|
+
avgPuaRatio: pua / n,
|
|
1252
|
+
lowTextPageCount: lowText,
|
|
1253
|
+
highPuaPageCount: highPua,
|
|
1254
|
+
needsOcr: ocrCandidatePages.length / n >= DOC_NEEDS_OCR_PAGE_RATIO,
|
|
1255
|
+
ocrCandidatePages
|
|
1256
|
+
};
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1145
1259
|
// src/pdf/polyfill.ts
|
|
1146
1260
|
var _pdfworkermjs = require('pdfjs-dist/legacy/build/pdf.worker.mjs'); var pdfjsWorker = _interopRequireWildcard(_pdfworkermjs);
|
|
1147
1261
|
var g = globalThis;
|
|
@@ -1179,7 +1293,7 @@ async function loadPdfWithTimeout(buffer) {
|
|
|
1179
1293
|
new Promise((_, reject) => {
|
|
1180
1294
|
timer = setTimeout(() => {
|
|
1181
1295
|
loadingTask.destroy();
|
|
1182
|
-
reject(new (0,
|
|
1296
|
+
reject(new (0, _chunkQB7CS534cjs.KordocError)("PDF \uB85C\uB529 \uD0C0\uC784\uC544\uC6C3 (30\uCD08 \uCD08\uACFC)"));
|
|
1183
1297
|
}, PDF_LOAD_TIMEOUT_MS);
|
|
1184
1298
|
})
|
|
1185
1299
|
]);
|
|
@@ -1192,11 +1306,12 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1192
1306
|
const doc = await loadPdfWithTimeout(buffer);
|
|
1193
1307
|
try {
|
|
1194
1308
|
const pageCount = doc.numPages;
|
|
1195
|
-
if (pageCount === 0) throw new (0,
|
|
1309
|
+
if (pageCount === 0) throw new (0, _chunkQB7CS534cjs.KordocError)("PDF\uC5D0 \uD398\uC774\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
1196
1310
|
const metadata = { pageCount };
|
|
1197
1311
|
await extractPdfMetadata(doc, metadata);
|
|
1198
1312
|
const blocks = [];
|
|
1199
1313
|
const warnings = [];
|
|
1314
|
+
const pageQuality = [];
|
|
1200
1315
|
let totalChars = 0;
|
|
1201
1316
|
let totalTextBytes = 0;
|
|
1202
1317
|
const effectivePageCount = Math.min(pageCount, MAX_PAGES);
|
|
@@ -1224,16 +1339,19 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1224
1339
|
const opList = await page.getOperatorList();
|
|
1225
1340
|
const pageBlocks = extractPageBlocksWithLines(visible, i, opList, viewport.width, viewport.height);
|
|
1226
1341
|
for (const b of pageBlocks) blocks.push(b);
|
|
1342
|
+
let pageText = "";
|
|
1227
1343
|
for (const b of pageBlocks) {
|
|
1228
1344
|
const t = b.text || "";
|
|
1229
1345
|
totalChars += t.replace(/\s/g, "").length;
|
|
1230
1346
|
totalTextBytes += t.length * 2;
|
|
1347
|
+
pageText += pageText ? "\n" + t : t;
|
|
1231
1348
|
}
|
|
1232
|
-
|
|
1349
|
+
pageQuality.push(computePageQuality(i, pageText));
|
|
1350
|
+
if (totalTextBytes > MAX_TOTAL_TEXT) throw new (0, _chunkQB7CS534cjs.KordocError)("\uD14D\uC2A4\uD2B8 \uCD94\uCD9C \uD06C\uAE30 \uCD08\uACFC");
|
|
1233
1351
|
parsedPages++;
|
|
1234
1352
|
_optionalChain([options, 'optionalAccess', _12 => _12.onProgress, 'optionalCall', _13 => _13(parsedPages, totalTarget)]);
|
|
1235
1353
|
} catch (pageErr) {
|
|
1236
|
-
if (pageErr instanceof
|
|
1354
|
+
if (pageErr instanceof _chunkQB7CS534cjs.KordocError) throw pageErr;
|
|
1237
1355
|
warnings.push({ page: i, message: `\uD398\uC774\uC9C0 ${i} \uD30C\uC2F1 \uC2E4\uD328: ${pageErr instanceof Error ? pageErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
1238
1356
|
}
|
|
1239
1357
|
}
|
|
@@ -1241,16 +1359,16 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1241
1359
|
if (totalChars / Math.max(parsedPageCount, 1) < 10) {
|
|
1242
1360
|
if (_optionalChain([options, 'optionalAccess', _14 => _14.ocr])) {
|
|
1243
1361
|
try {
|
|
1244
|
-
const { ocrPages } = await Promise.resolve().then(() => _interopRequireWildcard(require("./provider-
|
|
1362
|
+
const { ocrPages } = await Promise.resolve().then(() => _interopRequireWildcard(require("./provider-SNONEZNW.cjs")));
|
|
1245
1363
|
const ocrBlocks = await ocrPages(doc, options.ocr, pageFilter, effectivePageCount);
|
|
1246
1364
|
if (ocrBlocks.length > 0) {
|
|
1247
1365
|
const ocrMarkdown = ocrBlocks.map((b) => b.text || "").filter(Boolean).join("\n\n");
|
|
1248
|
-
return { markdown: ocrMarkdown, blocks: ocrBlocks, metadata, warnings, isImageBased: true };
|
|
1366
|
+
return { markdown: ocrMarkdown, blocks: ocrBlocks, metadata, warnings, isImageBased: true, pageQuality, qualitySummary: summarizeDocumentQuality(pageQuality) };
|
|
1249
1367
|
}
|
|
1250
1368
|
} catch (e2) {
|
|
1251
1369
|
}
|
|
1252
1370
|
}
|
|
1253
|
-
throw Object.assign(new (0,
|
|
1371
|
+
throw Object.assign(new (0, _chunkQB7CS534cjs.KordocError)(`\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF (${pageCount}\uD398\uC774\uC9C0, ${totalChars}\uC790)`), { isImageBased: true });
|
|
1254
1372
|
}
|
|
1255
1373
|
if (_optionalChain([options, 'optionalAccess', _15 => _15.removeHeaderFooter]) !== false && parsedPageCount >= 3) {
|
|
1256
1374
|
const removed = removeHeaderFooterBlocks(blocks, pageHeights, warnings);
|
|
@@ -1274,8 +1392,17 @@ async function parsePdfDocument(buffer, options) {
|
|
|
1274
1392
|
}
|
|
1275
1393
|
detectMarkerHeadings(blocks);
|
|
1276
1394
|
const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
1277
|
-
|
|
1278
|
-
|
|
1395
|
+
sanitizeBlockControlChars(blocks);
|
|
1396
|
+
let markdown = cleanPdfText(_chunkQB7CS534cjs.blocksToMarkdown.call(void 0, blocks));
|
|
1397
|
+
return {
|
|
1398
|
+
markdown,
|
|
1399
|
+
blocks,
|
|
1400
|
+
metadata,
|
|
1401
|
+
outline: outline.length > 0 ? outline : void 0,
|
|
1402
|
+
warnings: warnings.length > 0 ? warnings : void 0,
|
|
1403
|
+
pageQuality,
|
|
1404
|
+
qualitySummary: summarizeDocumentQuality(pageQuality)
|
|
1405
|
+
};
|
|
1279
1406
|
} finally {
|
|
1280
1407
|
await doc.destroy().catch(() => {
|
|
1281
1408
|
});
|
|
@@ -1353,9 +1480,9 @@ function detectHeadings(blocks, medianFontSize) {
|
|
|
1353
1480
|
if (/^\d+$/.test(text)) continue;
|
|
1354
1481
|
const ratio = block.style.fontSize / medianFontSize;
|
|
1355
1482
|
let level = 0;
|
|
1356
|
-
if (ratio >=
|
|
1357
|
-
else if (ratio >=
|
|
1358
|
-
else if (ratio >=
|
|
1483
|
+
if (ratio >= _chunkQB7CS534cjs.HEADING_RATIO_H1) level = 1;
|
|
1484
|
+
else if (ratio >= _chunkQB7CS534cjs.HEADING_RATIO_H2) level = 2;
|
|
1485
|
+
else if (ratio >= _chunkQB7CS534cjs.HEADING_RATIO_H3) level = 3;
|
|
1359
1486
|
if (level > 0) {
|
|
1360
1487
|
block.type = "heading";
|
|
1361
1488
|
block.level = level;
|
|
@@ -1603,7 +1730,7 @@ function extractBlocksWithGrids(items, pageNum, grids, horizontals, verticals) {
|
|
|
1603
1730
|
}
|
|
1604
1731
|
if (remaining.length > 0) {
|
|
1605
1732
|
const allY = remaining.map((i) => i.y);
|
|
1606
|
-
const pageH =
|
|
1733
|
+
const pageH = _chunkQB7CS534cjs.safeMax.call(void 0, allY) - _chunkQB7CS534cjs.safeMin.call(void 0, allY);
|
|
1607
1734
|
const groups = xyCutOrder(remaining, Math.max(15, pageH * 0.03));
|
|
1608
1735
|
const textBlocks = [];
|
|
1609
1736
|
for (const group of groups) {
|
|
@@ -1691,7 +1818,7 @@ function extractPageBlocksFallback(items, pageNum) {
|
|
|
1691
1818
|
blocks.push({ type: "paragraph", text: tableText, pageNumber: pageNum, bbox, style: dominantStyle(items) });
|
|
1692
1819
|
} else {
|
|
1693
1820
|
const allY = items.map((i) => i.y);
|
|
1694
|
-
const pageHeight =
|
|
1821
|
+
const pageHeight = _chunkQB7CS534cjs.safeMax.call(void 0, allY) - _chunkQB7CS534cjs.safeMin.call(void 0, allY);
|
|
1695
1822
|
const gapThreshold = Math.max(15, pageHeight * 0.03);
|
|
1696
1823
|
const orderedGroups = xyCutOrder(items, gapThreshold);
|
|
1697
1824
|
for (const group of orderedGroups) {
|
|
@@ -1838,14 +1965,14 @@ function isProseSpread(items) {
|
|
|
1838
1965
|
for (let i = 1; i < sorted.length; i++) {
|
|
1839
1966
|
gaps.push(sorted[i].x - (sorted[i - 1].x + sorted[i - 1].w));
|
|
1840
1967
|
}
|
|
1841
|
-
const maxGap =
|
|
1968
|
+
const maxGap = _chunkQB7CS534cjs.safeMax.call(void 0, gaps);
|
|
1842
1969
|
const avgLen = items.reduce((s, i) => s + i.text.length, 0) / items.length;
|
|
1843
1970
|
return maxGap < 40 && avgLen < 5;
|
|
1844
1971
|
}
|
|
1845
1972
|
function detectColumns(yLines) {
|
|
1846
1973
|
const allItems = yLines.flat();
|
|
1847
1974
|
if (allItems.length === 0) return null;
|
|
1848
|
-
const pageWidth =
|
|
1975
|
+
const pageWidth = _chunkQB7CS534cjs.safeMax.call(void 0, allItems.map((i) => i.x + i.w)) - _chunkQB7CS534cjs.safeMin.call(void 0, allItems.map((i) => i.x));
|
|
1849
1976
|
if (pageWidth < 100) return null;
|
|
1850
1977
|
let bigoLineIdx = -1;
|
|
1851
1978
|
for (let i = 0; i < yLines.length; i++) {
|
|
@@ -2079,9 +2206,22 @@ function mergeLineSimple(items) {
|
|
|
2079
2206
|
}
|
|
2080
2207
|
return result;
|
|
2081
2208
|
}
|
|
2209
|
+
function sanitizeBlockControlChars(blocks) {
|
|
2210
|
+
for (const b of blocks) {
|
|
2211
|
+
if (b.text) b.text = stripControlChars(b.text);
|
|
2212
|
+
if (b.table) {
|
|
2213
|
+
for (const row of b.table.cells) {
|
|
2214
|
+
for (const cell of row) {
|
|
2215
|
+
if (cell.text) cell.text = stripControlChars(cell.text);
|
|
2216
|
+
}
|
|
2217
|
+
}
|
|
2218
|
+
}
|
|
2219
|
+
if (b.children) sanitizeBlockControlChars(b.children);
|
|
2220
|
+
}
|
|
2221
|
+
}
|
|
2082
2222
|
function cleanPdfText(text) {
|
|
2083
2223
|
return mergeKoreanLines(
|
|
2084
|
-
text.replace(/^\d{1,4}\n/, "").replace(/^[\s]*[-–—]\s*[-–—]?\d+[-–—]?[\s]*[-–—]?[\s]*$/gm, "").replace(/^\s*\d+\s*\/\s*\d+\s*$/gm, "").replace(/\n\d{1,4}\n/g, "\n").replace(/\n\d{1,4}$/, "").replace(/^#{1,6}\s*\d{1,4}\s*$/gm, "")
|
|
2224
|
+
stripControlChars(text).replace(/^\d{1,4}\n/, "").replace(/^[\s]*[-–—]\s*[-–—]?\d+[-–—]?[\s]*[-–—]?[\s]*$/gm, "").replace(/^\s*\d+\s*\/\s*\d+\s*$/gm, "").replace(/\n\d{1,4}\n/g, "\n").replace(/\n\d{1,4}$/, "").replace(/^#{1,6}\s*\d{1,4}\s*$/gm, "")
|
|
2085
2225
|
).replace(/^(?!\| ---).*$/gm, (line) => {
|
|
2086
2226
|
if (/^\s*\${1,2}.+\${1,2}\s*$/.test(line)) return line;
|
|
2087
2227
|
return collapseEvenSpacing(line);
|
|
@@ -2411,4 +2551,4 @@ function formatMb(bytes) {
|
|
|
2411
2551
|
|
|
2412
2552
|
|
|
2413
2553
|
exports.cleanPdfText = cleanPdfText; exports.extractPdfMetadataOnly = extractPdfMetadataOnly; exports.parsePdfDocument = parsePdfDocument;
|
|
2414
|
-
//# sourceMappingURL=parser-
|
|
2554
|
+
//# sourceMappingURL=parser-EL5YETUA.cjs.map
|