@clazic/kordoc 2.5.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -1
- package/dist/batch-provider-XRF6F26E.js +234 -0
- package/dist/batch-provider-XRF6F26E.js.map +1 -0
- package/dist/chunk-S7BHLD2V.js +200 -0
- package/dist/{chunk-Y4WFKJ5P.js.map → chunk-S7BHLD2V.js.map} +1 -1
- package/dist/{chunk-IJGNPAK2.js → chunk-TND4YFBV.js} +2 -2
- package/dist/{chunk-QG6BYZMR.js → chunk-TS3F57LY.js} +160 -8
- package/dist/chunk-TS3F57LY.js.map +1 -0
- package/dist/cli.js +53 -6
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +420 -145
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +71 -2
- package/dist/index.d.ts +71 -2
- package/dist/index.js +407 -135
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +44 -3
- package/dist/mcp.js.map +1 -1
- package/dist/{resolve-XWYJYKKH.js → resolve-ZSUEJK3E.js} +4 -4
- package/dist/{utils-RBXHHCLI.js → utils-F66K7PXH.js} +2 -2
- package/dist/{watch-5CCMTZ7F.js → watch-2S5ULHAM.js} +4 -4
- package/package.json +1 -1
- package/dist/batch-provider-5BFJRKAZ.js +0 -190
- package/dist/batch-provider-5BFJRKAZ.js.map +0 -1
- package/dist/chunk-QG6BYZMR.js.map +0 -1
- package/dist/chunk-Y4WFKJ5P.js +0 -167
- /package/dist/{chunk-IJGNPAK2.js.map → chunk-TND4YFBV.js.map} +0 -0
- /package/dist/{resolve-XWYJYKKH.js.map → resolve-ZSUEJK3E.js.map} +0 -0
- /package/dist/{utils-RBXHHCLI.js.map → utils-F66K7PXH.js.map} +0 -0
- /package/dist/{watch-5CCMTZ7F.js.map → watch-2S5ULHAM.js.map} +0 -0
package/dist/mcp.js
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import {
|
|
3
3
|
blocksToMarkdown,
|
|
4
4
|
compare,
|
|
5
|
+
convertToPdf,
|
|
5
6
|
detectFormat,
|
|
6
7
|
extractFormFields,
|
|
7
8
|
extractHwp5MetadataOnly,
|
|
@@ -10,15 +11,15 @@ import {
|
|
|
10
11
|
markdownToHwpx,
|
|
11
12
|
markdownToXlsx,
|
|
12
13
|
parse
|
|
13
|
-
} from "./chunk-
|
|
14
|
+
} from "./chunk-TS3F57LY.js";
|
|
14
15
|
import {
|
|
15
16
|
KordocError,
|
|
16
17
|
VERSION,
|
|
17
18
|
sanitizeError,
|
|
18
19
|
toArrayBuffer
|
|
19
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-TND4YFBV.js";
|
|
20
21
|
import "./chunk-MOL7MDBG.js";
|
|
21
|
-
import "./chunk-
|
|
22
|
+
import "./chunk-S7BHLD2V.js";
|
|
22
23
|
import "./chunk-YW5G6BCJ.js";
|
|
23
24
|
import {
|
|
24
25
|
createLoggerFromEnv,
|
|
@@ -220,6 +221,46 @@ ${warnings.map((w) => ` ${w}`).join("\n")}`);
|
|
|
220
221
|
}
|
|
221
222
|
}
|
|
222
223
|
);
|
|
224
|
+
server.tool(
|
|
225
|
+
"convert_to_pdf",
|
|
226
|
+
"HWP \uB610\uB294 HWPX \uD30C\uC77C\uC744 PDF\uB85C \uBCC0\uD658\uD558\uC5EC \uC800\uC7A5\uD569\uB2C8\uB2E4.",
|
|
227
|
+
{
|
|
228
|
+
input_path: z.string().min(1).describe("\uBCC0\uD658\uD560 HWP/HWPX \uD30C\uC77C\uC758 \uC808\uB300 \uACBD\uB85C"),
|
|
229
|
+
output_path: z.string().min(1).describe("\uC800\uC7A5\uD560 PDF \uD30C\uC77C\uC758 \uC808\uB300 \uACBD\uB85C"),
|
|
230
|
+
pages: z.string().optional().describe("\uD398\uC774\uC9C0 \uBC94\uC704 (\uC608: '1-3,5')"),
|
|
231
|
+
timeout: z.number().optional().describe("\uBCC0\uD658 \uD0C0\uC784\uC544\uC6C3 (ms, \uAE30\uBCF8 60000)")
|
|
232
|
+
},
|
|
233
|
+
async ({ input_path, output_path, pages, timeout }) => {
|
|
234
|
+
try {
|
|
235
|
+
const resolvedInput = resolve(input_path);
|
|
236
|
+
const resolvedOutput = resolve(output_path);
|
|
237
|
+
const result = await convertToPdf(resolvedInput, {
|
|
238
|
+
pages,
|
|
239
|
+
timeoutMs: timeout
|
|
240
|
+
});
|
|
241
|
+
if (!result.success) {
|
|
242
|
+
return {
|
|
243
|
+
content: [{ type: "text", text: `\uBCC0\uD658 \uC2E4\uD328: ${result.error}` }],
|
|
244
|
+
isError: true
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
writeFileSync(resolvedOutput, result.pdf);
|
|
248
|
+
return {
|
|
249
|
+
content: [{
|
|
250
|
+
type: "text",
|
|
251
|
+
text: `\u2705 \uBCC0\uD658 \uC644\uB8CC: ${resolvedOutput}
|
|
252
|
+
\uC6D0\uBCF8: ${result.sourceFormat}
|
|
253
|
+
\uD06C\uAE30: ${(result.pdf.byteLength / 1024).toFixed(1)}KB`
|
|
254
|
+
}]
|
|
255
|
+
};
|
|
256
|
+
} catch (err) {
|
|
257
|
+
return {
|
|
258
|
+
content: [{ type: "text", text: `\uC624\uB958: ${sanitizeError(err)}` }],
|
|
259
|
+
isError: true
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
);
|
|
223
264
|
server.tool(
|
|
224
265
|
"detect_format",
|
|
225
266
|
"\uD30C\uC77C\uC758 \uD3EC\uB9F7\uC744 \uB9E4\uC9C1 \uBC14\uC774\uD2B8\uB85C \uAC10\uC9C0\uD569\uB2C8\uB2E4 (hwpx, hwp, pdf, unknown).",
|
package/dist/mcp.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/mcp.ts"],"sourcesContent":["/** kordoc MCP 서버 — Claude/Cursor에서 문서 파싱 도구로 사용 */\n\nimport { McpServer } from \"@modelcontextprotocol/sdk/server/mcp.js\"\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\"\nimport { z } from \"zod\"\nimport { readFileSync, realpathSync, openSync, readSync, closeSync, statSync, mkdirSync, writeFileSync, readdirSync, existsSync } from \"fs\"\nimport { resolve, isAbsolute, extname, basename } from \"path\"\nimport { parse, detectFormat, blocksToMarkdown, compare, extractFormFields, markdownToHwpx, markdownToXlsx } from \"./index.js\"\nimport type { ExtractedImage } from \"./types.js\"\nimport { VERSION, toArrayBuffer, sanitizeError, KordocError } from \"./utils.js\"\nimport { createLoggerFromEnv, generateRunId } from \"./logging/logger.js\"\nimport { extractHwp5MetadataOnly } from \"./hwp5/parser.js\"\nimport { extractHwpxMetadataOnly } from \"./hwpx/parser.js\"\nimport { extractPdfMetadataOnly } from \"./pdf/parser.js\"\n\n/** 허용 파일 확장자 */\nconst ALLOWED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\n/** 최대 파일 크기 (500MB) */\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\n\n/** 경로 정규화 및 보안 검증 */\nfunction safePath(filePath: string): string {\n if (!filePath) throw new KordocError(\"파일 경로가 비어있습니다\")\n const resolved = resolve(filePath)\n const real = realpathSync(resolved)\n if (!isAbsolute(real)) throw new KordocError(\"절대 경로만 허용됩니다\")\n const ext = extname(real).toLowerCase()\n if (!ALLOWED_EXTENSIONS.has(ext)) throw new KordocError(`지원하지 않는 확장자입니다: ${ext} (허용: ${[...ALLOWED_EXTENSIONS].join(\", \")})`)\n return real\n}\n\n/** 최대 파일 크기 — metadata 전용 (50MB, 전체 파싱보다 보수적) */\nconst MAX_METADATA_FILE_SIZE = 50 * 1024 * 1024\n\n/** 파일 읽기 + 크기 검증 공통 로직 */\nfunction readValidatedFile(filePath: string, maxSize = MAX_FILE_SIZE): { buffer: ArrayBuffer; resolved: string } {\n const resolved = safePath(filePath)\n const fileSize = statSync(resolved).size\n if (fileSize > maxSize) {\n throw new KordocError(`파일이 너무 큽니다: ${(fileSize / 1024 / 1024).toFixed(1)}MB (최대 ${maxSize / 1024 / 1024}MB)`)\n }\n const raw = readFileSync(resolved)\n return { buffer: toArrayBuffer(raw), resolved }\n}\n\n/** 파일 헤더(16바이트)만 읽어 포맷 감지 — 전체 파일 로드 불필요 */\nfunction detectFormatFromHeader(resolved: string): ReturnType<typeof detectFormat> {\n const fd = openSync(resolved, \"r\")\n try {\n const headerBuf = Buffer.alloc(16)\n readSync(fd, headerBuf, 0, 16, 0)\n return detectFormat(toArrayBuffer(headerBuf))\n } finally {\n closeSync(fd)\n }\n}\n\nconst server = new McpServer({\n name: \"kordoc\",\n version: VERSION,\n})\n\nconst mcpLogger = createLoggerFromEnv().withRun(generateRunId(\"mcp\")).child({ component: \"mcp.ts\" })\n\n// ─── 도구: parse_document ────────────────────────────\n\nserver.tool(\n \"parse_document\",\n \"한국 문서 파일(HWP, HWPX, PDF, XLSX, DOCX)을 마크다운으로 변환합니다. 파일 경로를 입력하면 포맷을 자동 감지하여 텍스트를 추출합니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\n image_dir: z.string().optional().describe(\"이미지 저장 폴더 경로 (기본: 파일명과 같은 이름의 폴더)\"),\n ocr: z.enum([\"auto\", \"gemini\", \"claude\", \"codex\", \"ollama\", \"off\"]).optional().describe(\"OCR 모드 (이미지 기반 PDF용): auto, gemini, claude, codex, ollama, off\"),\n },\n async ({ file_path, image_dir, ocr }) => {\n mcpLogger.log({ level: \"info\", stage: \"detect\", event: \"start\", message: \"MCP parse_document 시작\", meta: { file_path } })\n try {\n const { buffer, resolved: resolvedFilePath } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer, { ocrMode: ocr as import(\"./types.js\").OcrMode | undefined })\n\n if (!result.success) {\n mcpLogger.log({ level: \"error\", stage: \"finalize\", event: \"error\", message: \"MCP parse_document 실패\", meta: { file_path, error: result.error, code: result.code } })\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const meta = [\n `포맷: ${result.fileType.toUpperCase()}`,\n result.pageCount ? `페이지: ${result.pageCount}` : null,\n result.metadata?.title ? `제목: ${result.metadata.title}` : null,\n result.metadata?.author ? `작성자: ${result.metadata.author}` : null,\n result.isImageBased ? \"이미지 기반 PDF (텍스트 추출 불가)\" : null,\n ].filter(Boolean).join(\" | \")\n\n // outline/warnings 부가 정보 추가\n const parts: string[] = [`[${meta}]`]\n\n if (result.outline && result.outline.length > 0) {\n const outlineText = result.outline.map(o => `${\" \".repeat(o.level - 1)}- ${o.text}`).join(\"\\n\")\n parts.push(`\\n📑 문서 구조:\\n${outlineText}`)\n }\n\n if (result.warnings && result.warnings.length > 0) {\n const warnText = result.warnings.map(w => `- [p${w.page || \"?\"}] ${w.message}`).join(\"\\n\")\n parts.push(`\\n⚠️ 경고:\\n${warnText}`)\n }\n\n // 이미지 저장\n const savedImages: string[] = []\n if (result.images?.length) {\n const defaultDir = resolve(resolvedFilePath, \"..\", basename(resolvedFilePath).replace(/\\.[^.]+$/, \"\"))\n const imgDir = image_dir ? resolve(image_dir) : defaultDir\n mkdirSync(imgDir, { recursive: true })\n for (const img of result.images) {\n const imgPath = resolve(imgDir, img.filename)\n writeFileSync(imgPath, img.data)\n savedImages.push(imgPath)\n }\n parts.push(`\\n💾 저장된 이미지 (${savedImages.length}개):\\n${savedImages.map(p => ` ${p}`).join(\"\\n\")}`)\n }\n\n parts.push(`\\n\\n${result.markdown}`)\n mcpLogger.log({ level: \"info\", stage: \"finalize\", event: \"done\", message: \"MCP parse_document 완료\", meta: { file_path, fileType: result.fileType } })\n\n return {\n content: [{ type: \"text\", text: parts.join(\"\") }],\n }\n } catch (err) {\n mcpLogger.log({ level: \"error\", stage: \"finalize\", event: \"error\", message: \"MCP parse_document 예외\", error: { message: sanitizeError(err), name: err instanceof Error ? err.name : \"Error\", stack: err instanceof Error ? err.stack : undefined } })\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: convert_document ──────────────────────────\n\nserver.tool(\n \"convert_document\",\n \"마크다운 텍스트를 HWPX 또는 XLSX 파일로 변환하여 저장합니다.\",\n {\n markdown: z.string().min(1).describe(\"변환할 마크다운 텍스트\"),\n output_path: z.string().min(1).describe(\"저장할 파일의 절대 경로 (.hwpx 또는 .xlsx)\"),\n format: z.enum([\"hwpx\", \"xlsx\"]).optional().default(\"hwpx\").describe(\"출력 포맷 (기본: hwpx)\"),\n image_dir: z.string().optional().describe(\"이미지 폴더 경로 (기본: output_path 파일명 폴더)\"),\n template_path: z.string().optional().describe(\"HWPX 템플릿 파일 경로 (hwpx 전용)\"),\n },\n async ({ markdown, output_path, format, image_dir, template_path }) => {\n try {\n const resolvedOutput = resolve(output_path)\n const outputExt = extname(resolvedOutput).toLowerCase()\n if (outputExt !== \".hwpx\" && outputExt !== \".xlsx\") {\n return {\n content: [{ type: \"text\", text: `오류: 출력 파일 확장자는 .hwpx 또는 .xlsx만 허용됩니다: ${outputExt || \"(없음)\"}` }],\n isError: true,\n }\n }\n const warnings: string[] = []\n\n // 이미지 폴더에서 이미지 로드\n const stem = basename(resolvedOutput).replace(/\\.[^.]+$/, \"\")\n const defaultImgDir = resolve(resolvedOutput, \"..\", stem)\n const imgDir = image_dir ? resolve(image_dir) : defaultImgDir\n const images: ExtractedImage[] = []\n\n if (existsSync(imgDir)) {\n const mimeMap: Record<string, string> = {\n png: \"image/png\", jpg: \"image/jpeg\", jpeg: \"image/jpeg\",\n gif: \"image/gif\", bmp: \"image/bmp\",\n }\n for (const entry of readdirSync(imgDir, { withFileTypes: true })) {\n if (!entry.isFile()) continue\n const fname = entry.name\n const ext = extname(fname).slice(1).toLowerCase()\n if (!mimeMap[ext]) continue\n const data = readFileSync(resolve(imgDir, fname))\n images.push({ filename: fname, data: new Uint8Array(data), mimeType: mimeMap[ext] })\n }\n }\n\n let buf: ArrayBuffer\n if (format === \"xlsx\") {\n if (template_path) warnings.push(\"[warn] --template은 hwpx 전용입니다. 무시됩니다.\")\n buf = await markdownToXlsx(markdown, { warnings, images: images.length ? images : undefined })\n } else {\n let templateArrayBuffer: ArrayBuffer | undefined\n if (template_path) {\n const tmpl = readFileSync(safePath(template_path))\n templateArrayBuffer = tmpl.buffer.slice(tmpl.byteOffset, tmpl.byteOffset + tmpl.byteLength)\n }\n buf = await markdownToHwpx(markdown, {\n warnings,\n images: images.length ? images : undefined,\n templateArrayBuffer,\n })\n }\n\n writeFileSync(resolvedOutput, Buffer.from(buf))\n\n const parts = [\n `✅ 변환 완료: ${resolvedOutput}`,\n `포맷: ${format.toUpperCase()}, 크기: ${(buf.byteLength / 1024).toFixed(1)}KB`,\n ]\n if (images.length) parts.push(`포함된 이미지: ${images.length}개 (${imgDir})`)\n if (warnings.length) parts.push(`경고:\\n${warnings.map(w => ` ${w}`).join(\"\\n\")}`)\n\n return { content: [{ type: \"text\", text: parts.join(\"\\n\") }] }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: detect_format ─────────────────────────────\n\nserver.tool(\n \"detect_format\",\n \"파일의 포맷을 매직 바이트로 감지합니다 (hwpx, hwp, pdf, unknown).\",\n {\n file_path: z.string().min(1).describe(\"감지할 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const resolved = safePath(file_path)\n const format = detectFormatFromHeader(resolved)\n return {\n content: [{ type: \"text\", text: `${file_path}: ${format}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_metadata ────────────────────────────\n\nserver.tool(\n \"parse_metadata\",\n \"문서의 메타데이터(제목, 작성자, 날짜 등)만 빠르게 추출합니다. 전체 파싱 없이 헤더/매니페스트만 읽습니다.\",\n {\n file_path: z.string().min(1).describe(\"메타데이터를 추출할 문서 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const resolved = safePath(file_path)\n const format = detectFormatFromHeader(resolved)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n // metadata 전용 크기 제한 (50MB)\n const { buffer } = readValidatedFile(file_path, MAX_METADATA_FILE_SIZE)\n\n let metadata\n switch (format) {\n case \"hwp\":\n metadata = extractHwp5MetadataOnly(Buffer.from(buffer))\n break\n case \"hwpx\":\n metadata = await extractHwpxMetadataOnly(buffer)\n break\n case \"pdf\":\n metadata = await extractPdfMetadataOnly(buffer)\n break\n }\n\n return {\n content: [{ type: \"text\", text: JSON.stringify({ format, ...metadata }, null, 2) }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_pages ──────────────────────────────\n\nserver.tool(\n \"parse_pages\",\n \"문서의 특정 페이지/섹션 범위만 파싱합니다. PDF는 정확한 페이지, HWP/HWPX는 섹션 단위 근사치입니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\n pages: z.string().min(1).describe(\"페이지 범위 (예: '1-3', '1,3,5-7')\"),\n },\n async ({ file_path, pages }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer, { pages })\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const meta = [\n `포맷: ${result.fileType.toUpperCase()}`,\n `범위: ${pages}`,\n result.pageCount ? `페이지: ${result.pageCount}` : null,\n ].filter(Boolean).join(\" | \")\n\n return {\n content: [{ type: \"text\", text: `[${meta}]\\n\\n${result.markdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_table ──────────────────────────────\n\nserver.tool(\n \"parse_table\",\n \"문서에서 N번째 테이블만 추출합니다 (0-based index). 테이블이 없거나 인덱스 범위를 초과하면 오류를 반환합니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\n table_index: z.number().int().min(0).describe(\"추출할 테이블 인덱스 (0부터 시작)\"),\n },\n async ({ file_path, table_index }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const tableBlocks = result.blocks.filter(b => b.type === \"table\" && b.table)\n if (tableBlocks.length === 0) {\n return {\n content: [{ type: \"text\", text: `문서에 테이블이 없습니다.` }],\n isError: true,\n }\n }\n\n if (table_index >= tableBlocks.length) {\n return {\n content: [{ type: \"text\", text: `테이블 인덱스 초과: ${table_index} (총 ${tableBlocks.length}개 테이블)` }],\n isError: true,\n }\n }\n\n const tableBlock = tableBlocks[table_index]\n const tableMarkdown = blocksToMarkdown([tableBlock])\n\n return {\n content: [{ type: \"text\", text: `[테이블 #${table_index} / 총 ${tableBlocks.length}개]\\n\\n${tableMarkdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: compare_documents ─────────────────────────\n\nserver.tool(\n \"compare_documents\",\n \"두 한국 문서 파일을 비교하여 추가/삭제/변경된 블록을 표시합니다. 신구대조표 생성에 활용됩니다. 크로스 포맷(HWP↔HWPX) 비교 가능.\",\n {\n file_path_a: z.string().min(1).describe(\"비교 원본 문서의 절대 경로\"),\n file_path_b: z.string().min(1).describe(\"비교 대상 문서의 절대 경로\"),\n },\n async ({ file_path_a, file_path_b }) => {\n try {\n const { buffer: bufA } = readValidatedFile(file_path_a)\n const { buffer: bufB } = readValidatedFile(file_path_b)\n\n const result = await compare(bufA, bufB)\n const { stats, diffs } = result\n\n const lines: string[] = [\n `## 문서 비교 결과`,\n `추가: ${stats.added} | 삭제: ${stats.removed} | 변경: ${stats.modified} | 동일: ${stats.unchanged}`,\n \"\",\n ]\n\n for (const d of diffs) {\n const prefix = d.type === \"added\" ? \"+\" : d.type === \"removed\" ? \"-\" : d.type === \"modified\" ? \"~\" : \" \"\n const text = d.after?.text || d.before?.text || (d.after?.table ? \"[테이블]\" : d.before?.table ? \"[테이블]\" : \"\")\n const sim = d.similarity !== undefined ? ` (${(d.similarity * 100).toFixed(0)}%)` : \"\"\n lines.push(`${prefix} ${text.substring(0, 200)}${sim}`)\n }\n\n return {\n content: [{ type: \"text\", text: lines.join(\"\\n\") }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_form ───────────────────────────────\n\nserver.tool(\n \"parse_form\",\n \"한국 서식 문서에서 레이블-값 쌍을 구조화된 JSON으로 추출합니다. 양식/서식 문서에 최적화.\",\n {\n file_path: z.string().min(1).describe(\"서식 문서 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\n isError: true,\n }\n }\n\n const form = extractFormFields(result.blocks)\n return {\n content: [{ type: \"text\", text: JSON.stringify(form, null, 2) }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 서버 시작 ───────────────────────────────────────\n\nasync function main() {\n const transport = new StdioServerTransport()\n await server.connect(transport)\n}\n\nmain().catch((err) => { console.error(err); process.exit(1) })\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEA,SAAS,iBAAiB;AAC1B,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAClB,SAAS,cAAc,cAAc,UAAU,UAAU,WAAW,UAAU,WAAW,eAAe,aAAa,kBAAkB;AACvI,SAAS,SAAS,YAAY,SAAS,gBAAgB;AAUvD,IAAM,qBAAqB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAE9E,IAAM,gBAAgB,MAAM,OAAO;AAGnC,SAAS,SAAS,UAA0B;AAC1C,MAAI,CAAC,SAAU,OAAM,IAAI,YAAY,sEAAe;AACpD,QAAM,WAAW,QAAQ,QAAQ;AACjC,QAAM,OAAO,aAAa,QAAQ;AAClC,MAAI,CAAC,WAAW,IAAI,EAAG,OAAM,IAAI,YAAY,gEAAc;AAC3D,QAAM,MAAM,QAAQ,IAAI,EAAE,YAAY;AACtC,MAAI,CAAC,mBAAmB,IAAI,GAAG,EAAG,OAAM,IAAI,YAAY,+EAAmB,GAAG,mBAAS,CAAC,GAAG,kBAAkB,EAAE,KAAK,IAAI,CAAC,GAAG;AAC5H,SAAO;AACT;AAGA,IAAM,yBAAyB,KAAK,OAAO;AAG3C,SAAS,kBAAkB,UAAkB,UAAU,eAA0D;AAC/G,QAAM,WAAW,SAAS,QAAQ;AAClC,QAAM,WAAW,SAAS,QAAQ,EAAE;AACpC,MAAI,WAAW,SAAS;AACtB,UAAM,IAAI,YAAY,wDAAgB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC,oBAAU,UAAU,OAAO,IAAI,KAAK;AAAA,EAC9G;AACA,QAAM,MAAM,aAAa,QAAQ;AACjC,SAAO,EAAE,QAAQ,cAAc,GAAG,GAAG,SAAS;AAChD;AAGA,SAAS,uBAAuB,UAAmD;AACjF,QAAM,KAAK,SAAS,UAAU,GAAG;AACjC,MAAI;AACF,UAAM,YAAY,OAAO,MAAM,EAAE;AACjC,aAAS,IAAI,WAAW,GAAG,IAAI,CAAC;AAChC,WAAO,aAAa,cAAc,SAAS,CAAC;AAAA,EAC9C,UAAE;AACA,cAAU,EAAE;AAAA,EACd;AACF;AAEA,IAAM,SAAS,IAAI,UAAU;AAAA,EAC3B,MAAM;AAAA,EACN,SAAS;AACX,CAAC;AAED,IAAM,YAAY,oBAAoB,EAAE,QAAQ,cAAc,KAAK,CAAC,EAAE,MAAM,EAAE,WAAW,SAAS,CAAC;AAInG,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,IACrF,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,iJAAmC;AAAA,IAC7E,KAAK,EAAE,KAAK,CAAC,QAAQ,UAAU,UAAU,SAAS,UAAU,KAAK,CAAC,EAAE,SAAS,EAAE,SAAS,wGAAgE;AAAA,EAC1J;AAAA,EACA,OAAO,EAAE,WAAW,WAAW,IAAI,MAAM;AACvC,cAAU,IAAI,EAAE,OAAO,QAAQ,OAAO,UAAU,OAAO,SAAS,SAAS,mCAAyB,MAAM,EAAE,UAAU,EAAE,CAAC;AACvH,QAAI;AACF,YAAM,EAAE,QAAQ,UAAU,iBAAiB,IAAI,kBAAkB,SAAS;AAC1E,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,QAAQ,EAAE,SAAS,IAAgD,CAAC;AAE/F,UAAI,CAAC,OAAO,SAAS;AACnB,kBAAU,IAAI,EAAE,OAAO,SAAS,OAAO,YAAY,OAAO,SAAS,SAAS,mCAAyB,MAAM,EAAE,WAAW,OAAO,OAAO,OAAO,MAAM,OAAO,KAAK,EAAE,CAAC;AAClK,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,QAChD,OAAO,UAAU,QAAQ,iBAAO,OAAO,SAAS,KAAK,KAAK;AAAA,QAC1D,OAAO,UAAU,SAAS,uBAAQ,OAAO,SAAS,MAAM,KAAK;AAAA,QAC7D,OAAO,eAAe,uFAA2B;AAAA,MACnD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAG5B,YAAM,QAAkB,CAAC,IAAI,IAAI,GAAG;AAEpC,UAAI,OAAO,WAAW,OAAO,QAAQ,SAAS,GAAG;AAC/C,cAAM,cAAc,OAAO,QAAQ,IAAI,OAAK,GAAG,KAAK,OAAO,EAAE,QAAQ,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,KAAK,IAAI;AAC/F,cAAM,KAAK;AAAA;AAAA,EAAgB,WAAW,EAAE;AAAA,MAC1C;AAEA,UAAI,OAAO,YAAY,OAAO,SAAS,SAAS,GAAG;AACjD,cAAM,WAAW,OAAO,SAAS,IAAI,OAAK,OAAO,EAAE,QAAQ,GAAG,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,IAAI;AACzF,cAAM,KAAK;AAAA;AAAA,EAAa,QAAQ,EAAE;AAAA,MACpC;AAGA,YAAM,cAAwB,CAAC;AAC/B,UAAI,OAAO,QAAQ,QAAQ;AACzB,cAAM,aAAa,QAAQ,kBAAkB,MAAM,SAAS,gBAAgB,EAAE,QAAQ,YAAY,EAAE,CAAC;AACrG,cAAM,SAAS,YAAY,QAAQ,SAAS,IAAI;AAChD,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,gBAAM,UAAU,QAAQ,QAAQ,IAAI,QAAQ;AAC5C,wBAAc,SAAS,IAAI,IAAI;AAC/B,sBAAY,KAAK,OAAO;AAAA,QAC1B;AACA,cAAM,KAAK;AAAA,mDAAiB,YAAY,MAAM;AAAA,EAAQ,YAAY,IAAI,OAAK,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC,EAAE;AAAA,MACnG;AAEA,YAAM,KAAK;AAAA;AAAA,EAAO,OAAO,QAAQ,EAAE;AACnC,gBAAU,IAAI,EAAE,OAAO,QAAQ,OAAO,YAAY,OAAO,QAAQ,SAAS,mCAAyB,MAAM,EAAE,WAAW,UAAU,OAAO,SAAS,EAAE,CAAC;AAEnJ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,EAAE,EAAE,CAAC;AAAA,MAClD;AAAA,IACF,SAAS,KAAK;AACZ,gBAAU,IAAI,EAAE,OAAO,SAAS,OAAO,YAAY,OAAO,SAAS,SAAS,mCAAyB,OAAO,EAAE,SAAS,cAAc,GAAG,GAAG,MAAM,eAAe,QAAQ,IAAI,OAAO,SAAS,OAAO,eAAe,QAAQ,IAAI,QAAQ,OAAU,EAAE,CAAC;AACnP,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,UAAe,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,gEAAc;AAAA,IACxD,aAAe,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4FAAgC;AAAA,IAC1E,QAAe,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC,EAAE,SAAS,EAAE,QAAQ,MAAM,EAAE,SAAS,gDAAkB;AAAA,IAC9F,WAAe,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0GAAoC;AAAA,IAClF,eAAe,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,uEAA0B;AAAA,EAC1E;AAAA,EACA,OAAO,EAAE,UAAU,aAAa,QAAQ,WAAW,cAAc,MAAM;AACrE,QAAI;AACF,YAAM,iBAAiB,QAAQ,WAAW;AAC1C,YAAM,YAAY,QAAQ,cAAc,EAAE,YAAY;AACtD,UAAI,cAAc,WAAW,cAAc,SAAS;AAClD,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,mIAAyC,aAAa,gBAAM,GAAG,CAAC;AAAA,UAChG,SAAS;AAAA,QACX;AAAA,MACF;AACA,YAAM,WAAqB,CAAC;AAG5B,YAAM,OAAO,SAAS,cAAc,EAAE,QAAQ,YAAY,EAAE;AAC5D,YAAM,gBAAgB,QAAQ,gBAAgB,MAAM,IAAI;AACxD,YAAM,SAAS,YAAY,QAAQ,SAAS,IAAI;AAChD,YAAM,SAA2B,CAAC;AAElC,UAAI,WAAW,MAAM,GAAG;AACtB,cAAM,UAAkC;AAAA,UACtC,KAAK;AAAA,UAAa,KAAK;AAAA,UAAc,MAAM;AAAA,UAC3C,KAAK;AAAA,UAAa,KAAK;AAAA,QACzB;AACA,mBAAW,SAAS,YAAY,QAAQ,EAAE,eAAe,KAAK,CAAC,GAAG;AAChE,cAAI,CAAC,MAAM,OAAO,EAAG;AACrB,gBAAM,QAAQ,MAAM;AACpB,gBAAM,MAAM,QAAQ,KAAK,EAAE,MAAM,CAAC,EAAE,YAAY;AAChD,cAAI,CAAC,QAAQ,GAAG,EAAG;AACnB,gBAAM,OAAO,aAAa,QAAQ,QAAQ,KAAK,CAAC;AAChD,iBAAO,KAAK,EAAE,UAAU,OAAO,MAAM,IAAI,WAAW,IAAI,GAAG,UAAU,QAAQ,GAAG,EAAE,CAAC;AAAA,QACrF;AAAA,MACF;AAEA,UAAI;AACJ,UAAI,WAAW,QAAQ;AACrB,YAAI,cAAe,UAAS,KAAK,8FAAuC;AACxE,cAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,OAAU,CAAC;AAAA,MAC/F,OAAO;AACL,YAAI;AACJ,YAAI,eAAe;AACjB,gBAAM,OAAO,aAAa,SAAS,aAAa,CAAC;AACjD,gCAAsB,KAAK,OAAO,MAAM,KAAK,YAAY,KAAK,aAAa,KAAK,UAAU;AAAA,QAC5F;AACA,cAAM,MAAM,eAAe,UAAU;AAAA,UACnC;AAAA,UACA,QAAQ,OAAO,SAAS,SAAS;AAAA,UACjC;AAAA,QACF,CAAC;AAAA,MACH;AAEA,oBAAc,gBAAgB,OAAO,KAAK,GAAG,CAAC;AAE9C,YAAM,QAAQ;AAAA,QACZ,qCAAY,cAAc;AAAA,QAC1B,iBAAO,OAAO,YAAY,CAAC,oBAAU,IAAI,aAAa,MAAM,QAAQ,CAAC,CAAC;AAAA,MACxE;AACA,UAAI,OAAO,OAAQ,OAAM,KAAK,0CAAY,OAAO,MAAM,WAAM,MAAM,GAAG;AACtE,UAAI,SAAS,OAAQ,OAAM,KAAK;AAAA,EAAQ,SAAS,IAAI,OAAK,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC,EAAE;AAEhF,aAAO,EAAE,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,IAAI,EAAE,CAAC,EAAE;AAAA,IAC/D,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,iEAAe;AAAA,EACvD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAC9C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,MAAM,GAAG,CAAC;AAAA,MAC7D;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mHAAyB;AAAA,EACjE;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAE9C,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAGA,YAAM,EAAE,OAAO,IAAI,kBAAkB,WAAW,sBAAsB;AAEtE,UAAI;AACJ,cAAQ,QAAQ;AAAA,QACd,KAAK;AACH,qBAAW,wBAAwB,OAAO,KAAK,MAAM,CAAC;AACtD;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,wBAAwB,MAAM;AAC/C;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,uBAAuB,MAAM;AAC9C;AAAA,MACJ;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,EAAE,QAAQ,GAAG,SAAS,GAAG,MAAM,CAAC,EAAE,CAAC;AAAA,MACpF;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4DAA8B;AAAA,EAClE;AAAA,EACA,OAAO,EAAE,WAAW,MAAM,MAAM;AAC9B,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,QAAQ,EAAE,MAAM,CAAC;AAE5C,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,iBAAO,KAAK;AAAA,QACZ,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,MAClD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,IAAI;AAAA;AAAA,EAAQ,OAAO,QAAQ,GAAG,CAAC;AAAA,MACrE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,SAAS,uFAAsB;AAAA,EACtE;AAAA,EACA,OAAO,EAAE,WAAW,YAAY,MAAM;AACpC,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,cAAc,OAAO,OAAO,OAAO,OAAK,EAAE,SAAS,WAAW,EAAE,KAAK;AAC3E,UAAI,YAAY,WAAW,GAAG;AAC5B,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wEAAiB,CAAC;AAAA,UAClD,SAAS;AAAA,QACX;AAAA,MACF;AAEA,UAAI,eAAe,YAAY,QAAQ;AACrC,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,uDAAe,WAAW,YAAO,YAAY,MAAM,6BAAS,CAAC;AAAA,UAC7F,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,aAAa,YAAY,WAAW;AAC1C,YAAM,gBAAgB,iBAAiB,CAAC,UAAU,CAAC;AAEnD,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wBAAS,WAAW,aAAQ,YAAY,MAAM;AAAA;AAAA,EAAS,aAAa,GAAG,CAAC;AAAA,MAC1G;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,IACzD,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EAC3D;AAAA,EACA,OAAO,EAAE,aAAa,YAAY,MAAM;AACtC,QAAI;AACF,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AACtD,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AAEtD,YAAM,SAAS,MAAM,QAAQ,MAAM,IAAI;AACvC,YAAM,EAAE,OAAO,MAAM,IAAI;AAEzB,YAAM,QAAkB;AAAA,QACtB;AAAA,QACA,iBAAO,MAAM,KAAK,oBAAU,MAAM,OAAO,oBAAU,MAAM,QAAQ,oBAAU,MAAM,SAAS;AAAA,QAC1F;AAAA,MACF;AAEA,iBAAW,KAAK,OAAO;AACrB,cAAM,SAAS,EAAE,SAAS,UAAU,MAAM,EAAE,SAAS,YAAY,MAAM,EAAE,SAAS,aAAa,MAAM;AACrG,cAAM,OAAO,EAAE,OAAO,QAAQ,EAAE,QAAQ,SAAS,EAAE,OAAO,QAAQ,yBAAU,EAAE,QAAQ,QAAQ,yBAAU;AACxG,cAAM,MAAM,EAAE,eAAe,SAAY,MAAM,EAAE,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AACpF,cAAM,KAAK,GAAG,MAAM,IAAI,KAAK,UAAU,GAAG,GAAG,CAAC,GAAG,GAAG,EAAE;AAAA,MACxD;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,IAAI,EAAE,CAAC;AAAA,MACpD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EACzD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO,kBAAkB,OAAO,MAAM;AAC5C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,MAAM,MAAM,CAAC,EAAE,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,eAAe,OAAO;AACpB,QAAM,YAAY,IAAI,qBAAqB;AAC3C,QAAM,OAAO,QAAQ,SAAS;AAChC;AAEA,KAAK,EAAE,MAAM,CAAC,QAAQ;AAAE,UAAQ,MAAM,GAAG;AAAG,UAAQ,KAAK,CAAC;AAAE,CAAC;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/mcp.ts"],"sourcesContent":["/** kordoc MCP 서버 — Claude/Cursor에서 문서 파싱 도구로 사용 */\n\nimport { McpServer } from \"@modelcontextprotocol/sdk/server/mcp.js\"\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\"\nimport { z } from \"zod\"\nimport { readFileSync, realpathSync, openSync, readSync, closeSync, statSync, mkdirSync, writeFileSync, readdirSync, existsSync } from \"fs\"\nimport { resolve, isAbsolute, extname, basename } from \"path\"\nimport { parse, detectFormat, blocksToMarkdown, compare, extractFormFields, markdownToHwpx, markdownToXlsx, convertToPdf } from \"./index.js\"\nimport type { ExtractedImage } from \"./types.js\"\nimport { VERSION, toArrayBuffer, sanitizeError, KordocError } from \"./utils.js\"\nimport { createLoggerFromEnv, generateRunId } from \"./logging/logger.js\"\nimport { extractHwp5MetadataOnly } from \"./hwp5/parser.js\"\nimport { extractHwpxMetadataOnly } from \"./hwpx/parser.js\"\nimport { extractPdfMetadataOnly } from \"./pdf/parser.js\"\n\n/** 허용 파일 확장자 */\nconst ALLOWED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\n/** 최대 파일 크기 (500MB) */\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\n\n/** 경로 정규화 및 보안 검증 */\nfunction safePath(filePath: string): string {\n if (!filePath) throw new KordocError(\"파일 경로가 비어있습니다\")\n const resolved = resolve(filePath)\n const real = realpathSync(resolved)\n if (!isAbsolute(real)) throw new KordocError(\"절대 경로만 허용됩니다\")\n const ext = extname(real).toLowerCase()\n if (!ALLOWED_EXTENSIONS.has(ext)) throw new KordocError(`지원하지 않는 확장자입니다: ${ext} (허용: ${[...ALLOWED_EXTENSIONS].join(\", \")})`)\n return real\n}\n\n/** 최대 파일 크기 — metadata 전용 (50MB, 전체 파싱보다 보수적) */\nconst MAX_METADATA_FILE_SIZE = 50 * 1024 * 1024\n\n/** 파일 읽기 + 크기 검증 공통 로직 */\nfunction readValidatedFile(filePath: string, maxSize = MAX_FILE_SIZE): { buffer: ArrayBuffer; resolved: string } {\n const resolved = safePath(filePath)\n const fileSize = statSync(resolved).size\n if (fileSize > maxSize) {\n throw new KordocError(`파일이 너무 큽니다: ${(fileSize / 1024 / 1024).toFixed(1)}MB (최대 ${maxSize / 1024 / 1024}MB)`)\n }\n const raw = readFileSync(resolved)\n return { buffer: toArrayBuffer(raw), resolved }\n}\n\n/** 파일 헤더(16바이트)만 읽어 포맷 감지 — 전체 파일 로드 불필요 */\nfunction detectFormatFromHeader(resolved: string): ReturnType<typeof detectFormat> {\n const fd = openSync(resolved, \"r\")\n try {\n const headerBuf = Buffer.alloc(16)\n readSync(fd, headerBuf, 0, 16, 0)\n return detectFormat(toArrayBuffer(headerBuf))\n } finally {\n closeSync(fd)\n }\n}\n\nconst server = new McpServer({\n name: \"kordoc\",\n version: VERSION,\n})\n\nconst mcpLogger = createLoggerFromEnv().withRun(generateRunId(\"mcp\")).child({ component: \"mcp.ts\" })\n\n// ─── 도구: parse_document ────────────────────────────\n\nserver.tool(\n \"parse_document\",\n \"한국 문서 파일(HWP, HWPX, PDF, XLSX, DOCX)을 마크다운으로 변환합니다. 파일 경로를 입력하면 포맷을 자동 감지하여 텍스트를 추출합니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\n image_dir: z.string().optional().describe(\"이미지 저장 폴더 경로 (기본: 파일명과 같은 이름의 폴더)\"),\n ocr: z.enum([\"auto\", \"gemini\", \"claude\", \"codex\", \"ollama\", \"off\"]).optional().describe(\"OCR 모드 (이미지 기반 PDF용): auto, gemini, claude, codex, ollama, off\"),\n },\n async ({ file_path, image_dir, ocr }) => {\n mcpLogger.log({ level: \"info\", stage: \"detect\", event: \"start\", message: \"MCP parse_document 시작\", meta: { file_path } })\n try {\n const { buffer, resolved: resolvedFilePath } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer, { ocrMode: ocr as import(\"./types.js\").OcrMode | undefined })\n\n if (!result.success) {\n mcpLogger.log({ level: \"error\", stage: \"finalize\", event: \"error\", message: \"MCP parse_document 실패\", meta: { file_path, error: result.error, code: result.code } })\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const meta = [\n `포맷: ${result.fileType.toUpperCase()}`,\n result.pageCount ? `페이지: ${result.pageCount}` : null,\n result.metadata?.title ? `제목: ${result.metadata.title}` : null,\n result.metadata?.author ? `작성자: ${result.metadata.author}` : null,\n result.isImageBased ? \"이미지 기반 PDF (텍스트 추출 불가)\" : null,\n ].filter(Boolean).join(\" | \")\n\n // outline/warnings 부가 정보 추가\n const parts: string[] = [`[${meta}]`]\n\n if (result.outline && result.outline.length > 0) {\n const outlineText = result.outline.map(o => `${\" \".repeat(o.level - 1)}- ${o.text}`).join(\"\\n\")\n parts.push(`\\n📑 문서 구조:\\n${outlineText}`)\n }\n\n if (result.warnings && result.warnings.length > 0) {\n const warnText = result.warnings.map(w => `- [p${w.page || \"?\"}] ${w.message}`).join(\"\\n\")\n parts.push(`\\n⚠️ 경고:\\n${warnText}`)\n }\n\n // 이미지 저장\n const savedImages: string[] = []\n if (result.images?.length) {\n const defaultDir = resolve(resolvedFilePath, \"..\", basename(resolvedFilePath).replace(/\\.[^.]+$/, \"\"))\n const imgDir = image_dir ? resolve(image_dir) : defaultDir\n mkdirSync(imgDir, { recursive: true })\n for (const img of result.images) {\n const imgPath = resolve(imgDir, img.filename)\n writeFileSync(imgPath, img.data)\n savedImages.push(imgPath)\n }\n parts.push(`\\n💾 저장된 이미지 (${savedImages.length}개):\\n${savedImages.map(p => ` ${p}`).join(\"\\n\")}`)\n }\n\n parts.push(`\\n\\n${result.markdown}`)\n mcpLogger.log({ level: \"info\", stage: \"finalize\", event: \"done\", message: \"MCP parse_document 완료\", meta: { file_path, fileType: result.fileType } })\n\n return {\n content: [{ type: \"text\", text: parts.join(\"\") }],\n }\n } catch (err) {\n mcpLogger.log({ level: \"error\", stage: \"finalize\", event: \"error\", message: \"MCP parse_document 예외\", error: { message: sanitizeError(err), name: err instanceof Error ? err.name : \"Error\", stack: err instanceof Error ? err.stack : undefined } })\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: convert_document ──────────────────────────\n\nserver.tool(\n \"convert_document\",\n \"마크다운 텍스트를 HWPX 또는 XLSX 파일로 변환하여 저장합니다.\",\n {\n markdown: z.string().min(1).describe(\"변환할 마크다운 텍스트\"),\n output_path: z.string().min(1).describe(\"저장할 파일의 절대 경로 (.hwpx 또는 .xlsx)\"),\n format: z.enum([\"hwpx\", \"xlsx\"]).optional().default(\"hwpx\").describe(\"출력 포맷 (기본: hwpx)\"),\n image_dir: z.string().optional().describe(\"이미지 폴더 경로 (기본: output_path 파일명 폴더)\"),\n template_path: z.string().optional().describe(\"HWPX 템플릿 파일 경로 (hwpx 전용)\"),\n },\n async ({ markdown, output_path, format, image_dir, template_path }) => {\n try {\n const resolvedOutput = resolve(output_path)\n const outputExt = extname(resolvedOutput).toLowerCase()\n if (outputExt !== \".hwpx\" && outputExt !== \".xlsx\") {\n return {\n content: [{ type: \"text\", text: `오류: 출력 파일 확장자는 .hwpx 또는 .xlsx만 허용됩니다: ${outputExt || \"(없음)\"}` }],\n isError: true,\n }\n }\n const warnings: string[] = []\n\n // 이미지 폴더에서 이미지 로드\n const stem = basename(resolvedOutput).replace(/\\.[^.]+$/, \"\")\n const defaultImgDir = resolve(resolvedOutput, \"..\", stem)\n const imgDir = image_dir ? resolve(image_dir) : defaultImgDir\n const images: ExtractedImage[] = []\n\n if (existsSync(imgDir)) {\n const mimeMap: Record<string, string> = {\n png: \"image/png\", jpg: \"image/jpeg\", jpeg: \"image/jpeg\",\n gif: \"image/gif\", bmp: \"image/bmp\",\n }\n for (const entry of readdirSync(imgDir, { withFileTypes: true })) {\n if (!entry.isFile()) continue\n const fname = entry.name\n const ext = extname(fname).slice(1).toLowerCase()\n if (!mimeMap[ext]) continue\n const data = readFileSync(resolve(imgDir, fname))\n images.push({ filename: fname, data: new Uint8Array(data), mimeType: mimeMap[ext] })\n }\n }\n\n let buf: ArrayBuffer\n if (format === \"xlsx\") {\n if (template_path) warnings.push(\"[warn] --template은 hwpx 전용입니다. 무시됩니다.\")\n buf = await markdownToXlsx(markdown, { warnings, images: images.length ? images : undefined })\n } else {\n let templateArrayBuffer: ArrayBuffer | undefined\n if (template_path) {\n const tmpl = readFileSync(safePath(template_path))\n templateArrayBuffer = tmpl.buffer.slice(tmpl.byteOffset, tmpl.byteOffset + tmpl.byteLength)\n }\n buf = await markdownToHwpx(markdown, {\n warnings,\n images: images.length ? images : undefined,\n templateArrayBuffer,\n })\n }\n\n writeFileSync(resolvedOutput, Buffer.from(buf))\n\n const parts = [\n `✅ 변환 완료: ${resolvedOutput}`,\n `포맷: ${format.toUpperCase()}, 크기: ${(buf.byteLength / 1024).toFixed(1)}KB`,\n ]\n if (images.length) parts.push(`포함된 이미지: ${images.length}개 (${imgDir})`)\n if (warnings.length) parts.push(`경고:\\n${warnings.map(w => ` ${w}`).join(\"\\n\")}`)\n\n return { content: [{ type: \"text\", text: parts.join(\"\\n\") }] }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: convert_to_pdf ────────────────────────────\n\nserver.tool(\n \"convert_to_pdf\",\n \"HWP 또는 HWPX 파일을 PDF로 변환하여 저장합니다.\",\n {\n input_path: z.string().min(1).describe(\"변환할 HWP/HWPX 파일의 절대 경로\"),\n output_path: z.string().min(1).describe(\"저장할 PDF 파일의 절대 경로\"),\n pages: z.string().optional().describe(\"페이지 범위 (예: '1-3,5')\"),\n timeout: z.number().optional().describe(\"변환 타임아웃 (ms, 기본 60000)\"),\n },\n async ({ input_path, output_path, pages, timeout }) => {\n try {\n const resolvedInput = resolve(input_path)\n const resolvedOutput = resolve(output_path)\n\n const result = await convertToPdf(resolvedInput, {\n pages,\n timeoutMs: timeout,\n })\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `변환 실패: ${result.error}` }],\n isError: true,\n }\n }\n\n writeFileSync(resolvedOutput, result.pdf)\n\n return {\n content: [{\n type: \"text\",\n text: `✅ 변환 완료: ${resolvedOutput}\\n원본: ${result.sourceFormat}\\n크기: ${(result.pdf.byteLength / 1024).toFixed(1)}KB`,\n }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: detect_format ─────────────────────────────\n\nserver.tool(\n \"detect_format\",\n \"파일의 포맷을 매직 바이트로 감지합니다 (hwpx, hwp, pdf, unknown).\",\n {\n file_path: z.string().min(1).describe(\"감지할 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const resolved = safePath(file_path)\n const format = detectFormatFromHeader(resolved)\n return {\n content: [{ type: \"text\", text: `${file_path}: ${format}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_metadata ────────────────────────────\n\nserver.tool(\n \"parse_metadata\",\n \"문서의 메타데이터(제목, 작성자, 날짜 등)만 빠르게 추출합니다. 전체 파싱 없이 헤더/매니페스트만 읽습니다.\",\n {\n file_path: z.string().min(1).describe(\"메타데이터를 추출할 문서 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const resolved = safePath(file_path)\n const format = detectFormatFromHeader(resolved)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n // metadata 전용 크기 제한 (50MB)\n const { buffer } = readValidatedFile(file_path, MAX_METADATA_FILE_SIZE)\n\n let metadata\n switch (format) {\n case \"hwp\":\n metadata = extractHwp5MetadataOnly(Buffer.from(buffer))\n break\n case \"hwpx\":\n metadata = await extractHwpxMetadataOnly(buffer)\n break\n case \"pdf\":\n metadata = await extractPdfMetadataOnly(buffer)\n break\n }\n\n return {\n content: [{ type: \"text\", text: JSON.stringify({ format, ...metadata }, null, 2) }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_pages ──────────────────────────────\n\nserver.tool(\n \"parse_pages\",\n \"문서의 특정 페이지/섹션 범위만 파싱합니다. PDF는 정확한 페이지, HWP/HWPX는 섹션 단위 근사치입니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\n pages: z.string().min(1).describe(\"페이지 범위 (예: '1-3', '1,3,5-7')\"),\n },\n async ({ file_path, pages }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer, { pages })\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const meta = [\n `포맷: ${result.fileType.toUpperCase()}`,\n `범위: ${pages}`,\n result.pageCount ? `페이지: ${result.pageCount}` : null,\n ].filter(Boolean).join(\" | \")\n\n return {\n content: [{ type: \"text\", text: `[${meta}]\\n\\n${result.markdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_table ──────────────────────────────\n\nserver.tool(\n \"parse_table\",\n \"문서에서 N번째 테이블만 추출합니다 (0-based index). 테이블이 없거나 인덱스 범위를 초과하면 오류를 반환합니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\n table_index: z.number().int().min(0).describe(\"추출할 테이블 인덱스 (0부터 시작)\"),\n },\n async ({ file_path, table_index }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const tableBlocks = result.blocks.filter(b => b.type === \"table\" && b.table)\n if (tableBlocks.length === 0) {\n return {\n content: [{ type: \"text\", text: `문서에 테이블이 없습니다.` }],\n isError: true,\n }\n }\n\n if (table_index >= tableBlocks.length) {\n return {\n content: [{ type: \"text\", text: `테이블 인덱스 초과: ${table_index} (총 ${tableBlocks.length}개 테이블)` }],\n isError: true,\n }\n }\n\n const tableBlock = tableBlocks[table_index]\n const tableMarkdown = blocksToMarkdown([tableBlock])\n\n return {\n content: [{ type: \"text\", text: `[테이블 #${table_index} / 총 ${tableBlocks.length}개]\\n\\n${tableMarkdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: compare_documents ─────────────────────────\n\nserver.tool(\n \"compare_documents\",\n \"두 한국 문서 파일을 비교하여 추가/삭제/변경된 블록을 표시합니다. 신구대조표 생성에 활용됩니다. 크로스 포맷(HWP↔HWPX) 비교 가능.\",\n {\n file_path_a: z.string().min(1).describe(\"비교 원본 문서의 절대 경로\"),\n file_path_b: z.string().min(1).describe(\"비교 대상 문서의 절대 경로\"),\n },\n async ({ file_path_a, file_path_b }) => {\n try {\n const { buffer: bufA } = readValidatedFile(file_path_a)\n const { buffer: bufB } = readValidatedFile(file_path_b)\n\n const result = await compare(bufA, bufB)\n const { stats, diffs } = result\n\n const lines: string[] = [\n `## 문서 비교 결과`,\n `추가: ${stats.added} | 삭제: ${stats.removed} | 변경: ${stats.modified} | 동일: ${stats.unchanged}`,\n \"\",\n ]\n\n for (const d of diffs) {\n const prefix = d.type === \"added\" ? \"+\" : d.type === \"removed\" ? \"-\" : d.type === \"modified\" ? \"~\" : \" \"\n const text = d.after?.text || d.before?.text || (d.after?.table ? \"[테이블]\" : d.before?.table ? \"[테이블]\" : \"\")\n const sim = d.similarity !== undefined ? ` (${(d.similarity * 100).toFixed(0)}%)` : \"\"\n lines.push(`${prefix} ${text.substring(0, 200)}${sim}`)\n }\n\n return {\n content: [{ type: \"text\", text: lines.join(\"\\n\") }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_form ───────────────────────────────\n\nserver.tool(\n \"parse_form\",\n \"한국 서식 문서에서 레이블-값 쌍을 구조화된 JSON으로 추출합니다. 양식/서식 문서에 최적화.\",\n {\n file_path: z.string().min(1).describe(\"서식 문서 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\n isError: true,\n }\n }\n\n const form = extractFormFields(result.blocks)\n return {\n content: [{ type: \"text\", text: JSON.stringify(form, null, 2) }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 서버 시작 ───────────────────────────────────────\n\nasync function main() {\n const transport = new StdioServerTransport()\n await server.connect(transport)\n}\n\nmain().catch((err) => { console.error(err); process.exit(1) })\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEA,SAAS,iBAAiB;AAC1B,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAClB,SAAS,cAAc,cAAc,UAAU,UAAU,WAAW,UAAU,WAAW,eAAe,aAAa,kBAAkB;AACvI,SAAS,SAAS,YAAY,SAAS,gBAAgB;AAUvD,IAAM,qBAAqB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAE9E,IAAM,gBAAgB,MAAM,OAAO;AAGnC,SAAS,SAAS,UAA0B;AAC1C,MAAI,CAAC,SAAU,OAAM,IAAI,YAAY,sEAAe;AACpD,QAAM,WAAW,QAAQ,QAAQ;AACjC,QAAM,OAAO,aAAa,QAAQ;AAClC,MAAI,CAAC,WAAW,IAAI,EAAG,OAAM,IAAI,YAAY,gEAAc;AAC3D,QAAM,MAAM,QAAQ,IAAI,EAAE,YAAY;AACtC,MAAI,CAAC,mBAAmB,IAAI,GAAG,EAAG,OAAM,IAAI,YAAY,+EAAmB,GAAG,mBAAS,CAAC,GAAG,kBAAkB,EAAE,KAAK,IAAI,CAAC,GAAG;AAC5H,SAAO;AACT;AAGA,IAAM,yBAAyB,KAAK,OAAO;AAG3C,SAAS,kBAAkB,UAAkB,UAAU,eAA0D;AAC/G,QAAM,WAAW,SAAS,QAAQ;AAClC,QAAM,WAAW,SAAS,QAAQ,EAAE;AACpC,MAAI,WAAW,SAAS;AACtB,UAAM,IAAI,YAAY,wDAAgB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC,oBAAU,UAAU,OAAO,IAAI,KAAK;AAAA,EAC9G;AACA,QAAM,MAAM,aAAa,QAAQ;AACjC,SAAO,EAAE,QAAQ,cAAc,GAAG,GAAG,SAAS;AAChD;AAGA,SAAS,uBAAuB,UAAmD;AACjF,QAAM,KAAK,SAAS,UAAU,GAAG;AACjC,MAAI;AACF,UAAM,YAAY,OAAO,MAAM,EAAE;AACjC,aAAS,IAAI,WAAW,GAAG,IAAI,CAAC;AAChC,WAAO,aAAa,cAAc,SAAS,CAAC;AAAA,EAC9C,UAAE;AACA,cAAU,EAAE;AAAA,EACd;AACF;AAEA,IAAM,SAAS,IAAI,UAAU;AAAA,EAC3B,MAAM;AAAA,EACN,SAAS;AACX,CAAC;AAED,IAAM,YAAY,oBAAoB,EAAE,QAAQ,cAAc,KAAK,CAAC,EAAE,MAAM,EAAE,WAAW,SAAS,CAAC;AAInG,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,IACrF,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,iJAAmC;AAAA,IAC7E,KAAK,EAAE,KAAK,CAAC,QAAQ,UAAU,UAAU,SAAS,UAAU,KAAK,CAAC,EAAE,SAAS,EAAE,SAAS,wGAAgE;AAAA,EAC1J;AAAA,EACA,OAAO,EAAE,WAAW,WAAW,IAAI,MAAM;AACvC,cAAU,IAAI,EAAE,OAAO,QAAQ,OAAO,UAAU,OAAO,SAAS,SAAS,mCAAyB,MAAM,EAAE,UAAU,EAAE,CAAC;AACvH,QAAI;AACF,YAAM,EAAE,QAAQ,UAAU,iBAAiB,IAAI,kBAAkB,SAAS;AAC1E,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,QAAQ,EAAE,SAAS,IAAgD,CAAC;AAE/F,UAAI,CAAC,OAAO,SAAS;AACnB,kBAAU,IAAI,EAAE,OAAO,SAAS,OAAO,YAAY,OAAO,SAAS,SAAS,mCAAyB,MAAM,EAAE,WAAW,OAAO,OAAO,OAAO,MAAM,OAAO,KAAK,EAAE,CAAC;AAClK,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,QAChD,OAAO,UAAU,QAAQ,iBAAO,OAAO,SAAS,KAAK,KAAK;AAAA,QAC1D,OAAO,UAAU,SAAS,uBAAQ,OAAO,SAAS,MAAM,KAAK;AAAA,QAC7D,OAAO,eAAe,uFAA2B;AAAA,MACnD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAG5B,YAAM,QAAkB,CAAC,IAAI,IAAI,GAAG;AAEpC,UAAI,OAAO,WAAW,OAAO,QAAQ,SAAS,GAAG;AAC/C,cAAM,cAAc,OAAO,QAAQ,IAAI,OAAK,GAAG,KAAK,OAAO,EAAE,QAAQ,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,KAAK,IAAI;AAC/F,cAAM,KAAK;AAAA;AAAA,EAAgB,WAAW,EAAE;AAAA,MAC1C;AAEA,UAAI,OAAO,YAAY,OAAO,SAAS,SAAS,GAAG;AACjD,cAAM,WAAW,OAAO,SAAS,IAAI,OAAK,OAAO,EAAE,QAAQ,GAAG,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,IAAI;AACzF,cAAM,KAAK;AAAA;AAAA,EAAa,QAAQ,EAAE;AAAA,MACpC;AAGA,YAAM,cAAwB,CAAC;AAC/B,UAAI,OAAO,QAAQ,QAAQ;AACzB,cAAM,aAAa,QAAQ,kBAAkB,MAAM,SAAS,gBAAgB,EAAE,QAAQ,YAAY,EAAE,CAAC;AACrG,cAAM,SAAS,YAAY,QAAQ,SAAS,IAAI;AAChD,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,gBAAM,UAAU,QAAQ,QAAQ,IAAI,QAAQ;AAC5C,wBAAc,SAAS,IAAI,IAAI;AAC/B,sBAAY,KAAK,OAAO;AAAA,QAC1B;AACA,cAAM,KAAK;AAAA,mDAAiB,YAAY,MAAM;AAAA,EAAQ,YAAY,IAAI,OAAK,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC,EAAE;AAAA,MACnG;AAEA,YAAM,KAAK;AAAA;AAAA,EAAO,OAAO,QAAQ,EAAE;AACnC,gBAAU,IAAI,EAAE,OAAO,QAAQ,OAAO,YAAY,OAAO,QAAQ,SAAS,mCAAyB,MAAM,EAAE,WAAW,UAAU,OAAO,SAAS,EAAE,CAAC;AAEnJ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,EAAE,EAAE,CAAC;AAAA,MAClD;AAAA,IACF,SAAS,KAAK;AACZ,gBAAU,IAAI,EAAE,OAAO,SAAS,OAAO,YAAY,OAAO,SAAS,SAAS,mCAAyB,OAAO,EAAE,SAAS,cAAc,GAAG,GAAG,MAAM,eAAe,QAAQ,IAAI,OAAO,SAAS,OAAO,eAAe,QAAQ,IAAI,QAAQ,OAAU,EAAE,CAAC;AACnP,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,UAAe,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,gEAAc;AAAA,IACxD,aAAe,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4FAAgC;AAAA,IAC1E,QAAe,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC,EAAE,SAAS,EAAE,QAAQ,MAAM,EAAE,SAAS,gDAAkB;AAAA,IAC9F,WAAe,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,0GAAoC;AAAA,IAClF,eAAe,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,uEAA0B;AAAA,EAC1E;AAAA,EACA,OAAO,EAAE,UAAU,aAAa,QAAQ,WAAW,cAAc,MAAM;AACrE,QAAI;AACF,YAAM,iBAAiB,QAAQ,WAAW;AAC1C,YAAM,YAAY,QAAQ,cAAc,EAAE,YAAY;AACtD,UAAI,cAAc,WAAW,cAAc,SAAS;AAClD,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,mIAAyC,aAAa,gBAAM,GAAG,CAAC;AAAA,UAChG,SAAS;AAAA,QACX;AAAA,MACF;AACA,YAAM,WAAqB,CAAC;AAG5B,YAAM,OAAO,SAAS,cAAc,EAAE,QAAQ,YAAY,EAAE;AAC5D,YAAM,gBAAgB,QAAQ,gBAAgB,MAAM,IAAI;AACxD,YAAM,SAAS,YAAY,QAAQ,SAAS,IAAI;AAChD,YAAM,SAA2B,CAAC;AAElC,UAAI,WAAW,MAAM,GAAG;AACtB,cAAM,UAAkC;AAAA,UACtC,KAAK;AAAA,UAAa,KAAK;AAAA,UAAc,MAAM;AAAA,UAC3C,KAAK;AAAA,UAAa,KAAK;AAAA,QACzB;AACA,mBAAW,SAAS,YAAY,QAAQ,EAAE,eAAe,KAAK,CAAC,GAAG;AAChE,cAAI,CAAC,MAAM,OAAO,EAAG;AACrB,gBAAM,QAAQ,MAAM;AACpB,gBAAM,MAAM,QAAQ,KAAK,EAAE,MAAM,CAAC,EAAE,YAAY;AAChD,cAAI,CAAC,QAAQ,GAAG,EAAG;AACnB,gBAAM,OAAO,aAAa,QAAQ,QAAQ,KAAK,CAAC;AAChD,iBAAO,KAAK,EAAE,UAAU,OAAO,MAAM,IAAI,WAAW,IAAI,GAAG,UAAU,QAAQ,GAAG,EAAE,CAAC;AAAA,QACrF;AAAA,MACF;AAEA,UAAI;AACJ,UAAI,WAAW,QAAQ;AACrB,YAAI,cAAe,UAAS,KAAK,8FAAuC;AACxE,cAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,OAAU,CAAC;AAAA,MAC/F,OAAO;AACL,YAAI;AACJ,YAAI,eAAe;AACjB,gBAAM,OAAO,aAAa,SAAS,aAAa,CAAC;AACjD,gCAAsB,KAAK,OAAO,MAAM,KAAK,YAAY,KAAK,aAAa,KAAK,UAAU;AAAA,QAC5F;AACA,cAAM,MAAM,eAAe,UAAU;AAAA,UACnC;AAAA,UACA,QAAQ,OAAO,SAAS,SAAS;AAAA,UACjC;AAAA,QACF,CAAC;AAAA,MACH;AAEA,oBAAc,gBAAgB,OAAO,KAAK,GAAG,CAAC;AAE9C,YAAM,QAAQ;AAAA,QACZ,qCAAY,cAAc;AAAA,QAC1B,iBAAO,OAAO,YAAY,CAAC,oBAAU,IAAI,aAAa,MAAM,QAAQ,CAAC,CAAC;AAAA,MACxE;AACA,UAAI,OAAO,OAAQ,OAAM,KAAK,0CAAY,OAAO,MAAM,WAAM,MAAM,GAAG;AACtE,UAAI,SAAS,OAAQ,OAAM,KAAK;AAAA,EAAQ,SAAS,IAAI,OAAK,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC,EAAE;AAEhF,aAAO,EAAE,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,IAAI,EAAE,CAAC,EAAE;AAAA,IAC/D,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,YAAY,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,0EAAwB;AAAA,IAC/D,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,qEAAmB;AAAA,IAC3D,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,mDAAqB;AAAA,IAC3D,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,SAAS,gEAAwB;AAAA,EAClE;AAAA,EACA,OAAO,EAAE,YAAY,aAAa,OAAO,QAAQ,MAAM;AACrD,QAAI;AACF,YAAM,gBAAgB,QAAQ,UAAU;AACxC,YAAM,iBAAiB,QAAQ,WAAW;AAE1C,YAAM,SAAS,MAAM,aAAa,eAAe;AAAA,QAC/C;AAAA,QACA,WAAW;AAAA,MACb,CAAC;AAED,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,oBAAc,gBAAgB,OAAO,GAAG;AAExC,aAAO;AAAA,QACL,SAAS,CAAC;AAAA,UACR,MAAM;AAAA,UACN,MAAM,qCAAY,cAAc;AAAA,gBAAS,OAAO,YAAY;AAAA,iBAAU,OAAO,IAAI,aAAa,MAAM,QAAQ,CAAC,CAAC;AAAA,QAChH,CAAC;AAAA,MACH;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,iEAAe;AAAA,EACvD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAC9C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,MAAM,GAAG,CAAC;AAAA,MAC7D;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mHAAyB;AAAA,EACjE;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAE9C,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAGA,YAAM,EAAE,OAAO,IAAI,kBAAkB,WAAW,sBAAsB;AAEtE,UAAI;AACJ,cAAQ,QAAQ;AAAA,QACd,KAAK;AACH,qBAAW,wBAAwB,OAAO,KAAK,MAAM,CAAC;AACtD;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,wBAAwB,MAAM;AAC/C;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,uBAAuB,MAAM;AAC9C;AAAA,MACJ;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,EAAE,QAAQ,GAAG,SAAS,GAAG,MAAM,CAAC,EAAE,CAAC;AAAA,MACpF;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4DAA8B;AAAA,EAClE;AAAA,EACA,OAAO,EAAE,WAAW,MAAM,MAAM;AAC9B,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,QAAQ,EAAE,MAAM,CAAC;AAE5C,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,iBAAO,KAAK;AAAA,QACZ,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,MAClD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,IAAI;AAAA;AAAA,EAAQ,OAAO,QAAQ,GAAG,CAAC;AAAA,MACrE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,SAAS,uFAAsB;AAAA,EACtE;AAAA,EACA,OAAO,EAAE,WAAW,YAAY,MAAM;AACpC,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,cAAc,OAAO,OAAO,OAAO,OAAK,EAAE,SAAS,WAAW,EAAE,KAAK;AAC3E,UAAI,YAAY,WAAW,GAAG;AAC5B,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wEAAiB,CAAC;AAAA,UAClD,SAAS;AAAA,QACX;AAAA,MACF;AAEA,UAAI,eAAe,YAAY,QAAQ;AACrC,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,uDAAe,WAAW,YAAO,YAAY,MAAM,6BAAS,CAAC;AAAA,UAC7F,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,aAAa,YAAY,WAAW;AAC1C,YAAM,gBAAgB,iBAAiB,CAAC,UAAU,CAAC;AAEnD,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wBAAS,WAAW,aAAQ,YAAY,MAAM;AAAA;AAAA,EAAS,aAAa,GAAG,CAAC;AAAA,MAC1G;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,IACzD,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EAC3D;AAAA,EACA,OAAO,EAAE,aAAa,YAAY,MAAM;AACtC,QAAI;AACF,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AACtD,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AAEtD,YAAM,SAAS,MAAM,QAAQ,MAAM,IAAI;AACvC,YAAM,EAAE,OAAO,MAAM,IAAI;AAEzB,YAAM,QAAkB;AAAA,QACtB;AAAA,QACA,iBAAO,MAAM,KAAK,oBAAU,MAAM,OAAO,oBAAU,MAAM,QAAQ,oBAAU,MAAM,SAAS;AAAA,QAC1F;AAAA,MACF;AAEA,iBAAW,KAAK,OAAO;AACrB,cAAM,SAAS,EAAE,SAAS,UAAU,MAAM,EAAE,SAAS,YAAY,MAAM,EAAE,SAAS,aAAa,MAAM;AACrG,cAAM,OAAO,EAAE,OAAO,QAAQ,EAAE,QAAQ,SAAS,EAAE,OAAO,QAAQ,yBAAU,EAAE,QAAQ,QAAQ,yBAAU;AACxG,cAAM,MAAM,EAAE,eAAe,SAAY,MAAM,EAAE,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AACpF,cAAM,KAAK,GAAG,MAAM,IAAI,KAAK,UAAU,GAAG,GAAG,CAAC,GAAG,GAAG,EAAE;AAAA,MACxD;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,IAAI,EAAE,CAAC;AAAA,MACpD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EACzD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO,kBAAkB,OAAO,MAAM;AAC5C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,MAAM,MAAM,CAAC,EAAE,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,eAAe,OAAO;AACpB,QAAM,YAAY,IAAI,qBAAqB;AAC3C,QAAM,OAAO,QAAQ,SAAS;AAChC;AAEA,KAAK,EAAE,MAAM,CAAC,QAAQ;AAAE,UAAQ,MAAM,GAAG;AAAG,UAAQ,KAAK,CAAC;AAAE,CAAC;","names":[]}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
createCliOcrProvider
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-S7BHLD2V.js";
|
|
5
5
|
import {
|
|
6
6
|
detectAvailableOcr,
|
|
7
7
|
validateOcrMode
|
|
@@ -22,7 +22,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
22
22
|
if (mode !== "auto") {
|
|
23
23
|
validateOcrMode(mode);
|
|
24
24
|
if (mode === "gemini" || mode === "claude" || mode === "codex") {
|
|
25
|
-
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-
|
|
25
|
+
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-XRF6F26E.js");
|
|
26
26
|
const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[mode];
|
|
27
27
|
if (effectiveBatch > 1) {
|
|
28
28
|
logger.log({ level: "info", event: "done", message: "Batch CLI provider \uC120\uD0DD", meta: { mode, batchSize: effectiveBatch } });
|
|
@@ -48,7 +48,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
48
48
|
});
|
|
49
49
|
}
|
|
50
50
|
if (detected === "gemini" || detected === "codex" || detected === "claude") {
|
|
51
|
-
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-
|
|
51
|
+
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-XRF6F26E.js");
|
|
52
52
|
const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[detected];
|
|
53
53
|
if (effectiveBatch > 1) {
|
|
54
54
|
logger.log({ level: "info", event: "done", message: "AUTO: Batch CLI provider \uC120\uD0DD", meta: { mode: detected, batchSize: effectiveBatch } });
|
|
@@ -63,4 +63,4 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
63
63
|
export {
|
|
64
64
|
resolveOcrProvider
|
|
65
65
|
};
|
|
66
|
-
//# sourceMappingURL=resolve-
|
|
66
|
+
//# sourceMappingURL=resolve-ZSUEJK3E.js.map
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
sanitizeError,
|
|
10
10
|
sanitizeHref,
|
|
11
11
|
toArrayBuffer
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-TND4YFBV.js";
|
|
13
13
|
import "./chunk-ZWE3DS7E.js";
|
|
14
14
|
export {
|
|
15
15
|
KordocError,
|
|
@@ -22,4 +22,4 @@ export {
|
|
|
22
22
|
sanitizeHref,
|
|
23
23
|
toArrayBuffer
|
|
24
24
|
};
|
|
25
|
-
//# sourceMappingURL=utils-
|
|
25
|
+
//# sourceMappingURL=utils-F66K7PXH.js.map
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
import {
|
|
3
3
|
detectFormat,
|
|
4
4
|
parse
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-TS3F57LY.js";
|
|
6
6
|
import {
|
|
7
7
|
toArrayBuffer
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-TND4YFBV.js";
|
|
9
9
|
import "./chunk-MOL7MDBG.js";
|
|
10
|
-
import "./chunk-
|
|
10
|
+
import "./chunk-S7BHLD2V.js";
|
|
11
11
|
import "./chunk-YW5G6BCJ.js";
|
|
12
12
|
import "./chunk-I6YC6ZGK.js";
|
|
13
13
|
import "./chunk-ZWE3DS7E.js";
|
|
@@ -136,4 +136,4 @@ async function sendWebhook(url, payload) {
|
|
|
136
136
|
export {
|
|
137
137
|
watchDirectory
|
|
138
138
|
};
|
|
139
|
-
//# sourceMappingURL=watch-
|
|
139
|
+
//# sourceMappingURL=watch-2S5ULHAM.js.map
|
package/package.json
CHANGED
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import "./chunk-ZWE3DS7E.js";
|
|
3
|
-
|
|
4
|
-
// src/ocr/batch-provider.ts
|
|
5
|
-
import { spawn, execSync } from "child_process";
|
|
6
|
-
import { writeFileSync, readFileSync, unlinkSync, mkdirSync } from "fs";
|
|
7
|
-
import { join } from "path";
|
|
8
|
-
import { tmpdir } from "os";
|
|
9
|
-
var BATCH_OCR_PROMPT = "\uB2E4\uC74C \uBB38\uC11C \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0\uB4E4\uC744 OCR\uD558\uC5EC \uC21C\uC218 Markdown\uC73C\uB85C \uBCC0\uD658\uD558\uC138\uC694.\n\n\uADDC\uCE59:\n- \uAC01 \uD398\uC774\uC9C0 \uACB0\uACFC \uC0AC\uC774\uC5D0 \uBC18\uB4DC\uC2DC \uC774 \uAD6C\uBD84\uC790\uB97C \uC0BD\uC785: <!-- PAGE_BREAK -->\n- \uD14C\uC774\uBE14\uC740 Markdown \uD14C\uC774\uBE14 \uBB38\uBC95 \uC0AC\uC6A9 (| \uAD6C\uBD84, |---|---| \uD5E4\uB354 \uAD6C\uBD84\uC120 \uD3EC\uD568)\n- \uBCD1\uD569\uB41C \uC140\uC740 \uD574\uB2F9 \uC704\uCE58\uC5D0 \uB0B4\uC6A9 \uAE30\uC7AC\n- \uD5E4\uB529\uC740 \uAE00\uC790 \uD06C\uAE30\uC5D0 \uB530\uB77C ## ~ ###### \uC0AC\uC6A9\n- \uB9AC\uC2A4\uD2B8\uB294 - \uB610\uB294 1. \uC0AC\uC6A9\n- \uC774\uBBF8\uC9C0, \uB3C4\uD615 \uB4F1 \uBE44\uD14D\uC2A4\uD2B8 \uC694\uC18C\uB294 \uBB34\uC2DC\n- \uC6D0\uBB38\uC758 \uC77D\uAE30 \uC21C\uC11C\uC640 \uAD6C\uC870\uB97C \uC720\uC9C0\n- ```\uB85C \uAC10\uC2F8\uC9C0 \uB9D0\uACE0 \uC21C\uC218 Markdown\uB9CC \uCD9C\uB825";
|
|
10
|
-
var DEFAULT_BATCH_SIZES = {
|
|
11
|
-
gemini: 5,
|
|
12
|
-
claude: 5,
|
|
13
|
-
codex: 10
|
|
14
|
-
};
|
|
15
|
-
var _batchTempDir = null;
|
|
16
|
-
function getBatchTempDir() {
|
|
17
|
-
if (!_batchTempDir) {
|
|
18
|
-
_batchTempDir = join(process.cwd(), ".kordoc_ocr_tmp");
|
|
19
|
-
mkdirSync(_batchTempDir, { recursive: true });
|
|
20
|
-
if (process.platform === "win32") {
|
|
21
|
-
try {
|
|
22
|
-
execSync(`attrib +h "${_batchTempDir}"`, { stdio: "ignore" });
|
|
23
|
-
} catch {
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
return _batchTempDir;
|
|
28
|
-
}
|
|
29
|
-
function createBatchCliProvider(mode, batchSize) {
|
|
30
|
-
return {
|
|
31
|
-
__batch: true,
|
|
32
|
-
batchSize,
|
|
33
|
-
async processBatch(pages) {
|
|
34
|
-
const results = /* @__PURE__ */ new Map();
|
|
35
|
-
const tempDir = getBatchTempDir();
|
|
36
|
-
const tempFiles = [];
|
|
37
|
-
try {
|
|
38
|
-
for (const { image, pageNum } of pages) {
|
|
39
|
-
const path = join(tempDir, `batch-p${pageNum}.png`);
|
|
40
|
-
writeFileSync(path, image);
|
|
41
|
-
tempFiles.push(path);
|
|
42
|
-
}
|
|
43
|
-
let output;
|
|
44
|
-
if (mode === "codex") {
|
|
45
|
-
output = await callBatchCodexCli(tempFiles);
|
|
46
|
-
} else {
|
|
47
|
-
output = await callBatchCli(mode, tempFiles);
|
|
48
|
-
}
|
|
49
|
-
const cleaned = stripCodeFence(output.trim());
|
|
50
|
-
const parts = cleaned.split(/<!--\s*PAGE_BREAK\s*-->/).map((p) => p.trim()).filter((p) => p.length > 0);
|
|
51
|
-
for (let i = 0; i < pages.length; i++) {
|
|
52
|
-
const pageNum = pages[i].pageNum;
|
|
53
|
-
if (i < parts.length) {
|
|
54
|
-
results.set(pageNum, { markdown: parts[i] });
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
} finally {
|
|
58
|
-
for (const f of tempFiles) {
|
|
59
|
-
try {
|
|
60
|
-
unlinkSync(f);
|
|
61
|
-
} catch {
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
return results;
|
|
66
|
-
}
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
function spawnAsync(cmd, args, opts) {
|
|
70
|
-
return new Promise((resolve, reject) => {
|
|
71
|
-
const child = spawn(cmd, args, {
|
|
72
|
-
cwd: opts.cwd,
|
|
73
|
-
env: process.env,
|
|
74
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
75
|
-
shell: process.platform === "win32"
|
|
76
|
-
});
|
|
77
|
-
let stdout = "";
|
|
78
|
-
let stderr = "";
|
|
79
|
-
let killed = false;
|
|
80
|
-
child.stdout.setEncoding("utf-8");
|
|
81
|
-
child.stderr.setEncoding("utf-8");
|
|
82
|
-
child.stdout.on("data", (d) => {
|
|
83
|
-
stdout += d;
|
|
84
|
-
});
|
|
85
|
-
child.stderr.on("data", (d) => {
|
|
86
|
-
stderr += d;
|
|
87
|
-
});
|
|
88
|
-
const timer = setTimeout(() => {
|
|
89
|
-
killed = true;
|
|
90
|
-
if (process.platform === "win32") {
|
|
91
|
-
child.kill();
|
|
92
|
-
} else {
|
|
93
|
-
child.kill("SIGTERM");
|
|
94
|
-
}
|
|
95
|
-
}, opts.timeoutMs);
|
|
96
|
-
if (opts.stdin !== void 0) {
|
|
97
|
-
child.stdin.end(opts.stdin);
|
|
98
|
-
} else {
|
|
99
|
-
child.stdin.end();
|
|
100
|
-
}
|
|
101
|
-
child.on("close", (code) => {
|
|
102
|
-
clearTimeout(timer);
|
|
103
|
-
if (killed) {
|
|
104
|
-
reject(new Error(`\uD0C0\uC784\uC544\uC6C3 (${Math.round(opts.timeoutMs / 1e3)}\uCD08)`));
|
|
105
|
-
} else {
|
|
106
|
-
resolve({ stdout, stderr, exitCode: code ?? 1 });
|
|
107
|
-
}
|
|
108
|
-
});
|
|
109
|
-
child.on("error", (err) => {
|
|
110
|
-
clearTimeout(timer);
|
|
111
|
-
reject(err);
|
|
112
|
-
});
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
async function callBatchCli(mode, imagePaths) {
|
|
116
|
-
const fileRefs = imagePaths.map((p) => `@${p.replace(/\\/g, "/")}`).join("\n");
|
|
117
|
-
const prompt = `${BATCH_OCR_PROMPT}
|
|
118
|
-
|
|
119
|
-
${fileRefs}`;
|
|
120
|
-
let args;
|
|
121
|
-
if (mode === "gemini") {
|
|
122
|
-
const model = process.env.KORDOC_GEMINI_MODEL ?? "gemini-2.5-flash";
|
|
123
|
-
args = ["--prompt", prompt, "--yolo", "--model", model];
|
|
124
|
-
} else {
|
|
125
|
-
args = ["--print", prompt];
|
|
126
|
-
const model = process.env.KORDOC_CLAUDE_MODEL;
|
|
127
|
-
if (model) args.push("--model", model);
|
|
128
|
-
}
|
|
129
|
-
const timeoutMs = 6e4 + imagePaths.length * 2e4;
|
|
130
|
-
const result = await spawnAsync(mode, args, {
|
|
131
|
-
timeoutMs,
|
|
132
|
-
...mode === "claude" ? { cwd: tmpdir() } : {}
|
|
133
|
-
});
|
|
134
|
-
if (result.exitCode !== 0) {
|
|
135
|
-
const errMsg = result.stderr?.trim() || `exit code ${result.exitCode}`;
|
|
136
|
-
throw new Error(`${mode} \uBC30\uCE58 OCR \uC2E4\uD328: ${errMsg}`);
|
|
137
|
-
}
|
|
138
|
-
const output = result.stdout || "";
|
|
139
|
-
checkForLimitError(output, mode);
|
|
140
|
-
return output;
|
|
141
|
-
}
|
|
142
|
-
async function callBatchCodexCli(imagePaths) {
|
|
143
|
-
const outPath = join(tmpdir(), `kordoc-codex-batch-${Date.now()}-${Math.random().toString(36).slice(2)}.txt`);
|
|
144
|
-
try {
|
|
145
|
-
const args = ["exec", BATCH_OCR_PROMPT];
|
|
146
|
-
for (const p of imagePaths) {
|
|
147
|
-
args.push("--image", p);
|
|
148
|
-
}
|
|
149
|
-
args.push("--output-last-message", outPath);
|
|
150
|
-
const model = process.env.KORDOC_CODEX_MODEL;
|
|
151
|
-
if (model) args.push("--model", model);
|
|
152
|
-
const timeoutMs = 6e4 + imagePaths.length * 2e4;
|
|
153
|
-
const result = await spawnAsync("codex", args, {
|
|
154
|
-
timeoutMs,
|
|
155
|
-
stdin: ""
|
|
156
|
-
});
|
|
157
|
-
if (result.exitCode !== 0) {
|
|
158
|
-
const errMsg = result.stderr?.trim() || `exit code ${result.exitCode}`;
|
|
159
|
-
throw new Error(`codex \uBC30\uCE58 OCR \uC2E4\uD328: ${errMsg}`);
|
|
160
|
-
}
|
|
161
|
-
let text;
|
|
162
|
-
try {
|
|
163
|
-
text = readFileSync(outPath, "utf-8");
|
|
164
|
-
} catch {
|
|
165
|
-
text = result.stdout || "";
|
|
166
|
-
}
|
|
167
|
-
checkForLimitError(text, "codex");
|
|
168
|
-
return text;
|
|
169
|
-
} finally {
|
|
170
|
-
try {
|
|
171
|
-
unlinkSync(outPath);
|
|
172
|
-
} catch {
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
function checkForLimitError(output, mode) {
|
|
177
|
-
const lower = output.toLowerCase();
|
|
178
|
-
if (lower.includes("usage limit") || lower.includes("rate limit")) {
|
|
179
|
-
throw new Error(`${mode} \uC0AC\uC6A9\uB7C9/\uC18D\uB3C4 \uC81C\uD55C: ${output.trim().slice(0, 200)}`);
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
function stripCodeFence(text) {
|
|
183
|
-
const match = text.match(/^```(?:markdown|md)?\s*\n([\s\S]*?)\n```\s*$/m);
|
|
184
|
-
return match ? match[1].trim() : text;
|
|
185
|
-
}
|
|
186
|
-
export {
|
|
187
|
-
DEFAULT_BATCH_SIZES,
|
|
188
|
-
createBatchCliProvider
|
|
189
|
-
};
|
|
190
|
-
//# sourceMappingURL=batch-provider-5BFJRKAZ.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/ocr/batch-provider.ts"],"sourcesContent":["/**\n * CLI 배치 OCR 프로바이더\n *\n * 여러 페이지 이미지를 단일 CLI 호출로 처리하여 API 호출 수를 대폭 감소.\n * gemini/claude: @file 멀티 참조, codex: --image 멀티 플래그\n *\n * 299페이지 기준:\n * - 기존: CLI 299회 호출 (~30분)\n * - 배치: CLI 3~6회 호출 (~3분)\n */\n\nimport { spawn, execSync } from \"child_process\"\nimport { writeFileSync, readFileSync, unlinkSync, mkdirSync } from \"fs\"\nimport { join } from \"path\"\nimport { tmpdir } from \"os\"\nimport type { StructuredOcrResult, BatchOcrProvider } from \"../types.js\"\n\n/** 배치 OCR 프롬프트 */\nconst BATCH_OCR_PROMPT =\n \"다음 문서 페이지 이미지들을 OCR하여 순수 Markdown으로 변환하세요.\\n\\n\" +\n \"규칙:\\n\" +\n \"- 각 페이지 결과 사이에 반드시 이 구분자를 삽입: <!-- PAGE_BREAK -->\\n\" +\n \"- 테이블은 Markdown 테이블 문법 사용 (| 구분, |---|---| 헤더 구분선 포함)\\n\" +\n \"- 병합된 셀은 해당 위치에 내용 기재\\n\" +\n \"- 헤딩은 글자 크기에 따라 ## ~ ###### 사용\\n\" +\n \"- 리스트는 - 또는 1. 사용\\n\" +\n \"- 이미지, 도형 등 비텍스트 요소는 무시\\n\" +\n \"- 원문의 읽기 순서와 구조를 유지\\n\" +\n \"- ```로 감싸지 말고 순수 Markdown만 출력\"\n\n/** 모드별 기본 배치 크기 (CLI 내부 타임아웃 + 실측 기반)\n *\n * gemini CLI: 10장 이상에서 AbortError 발생 (내부 타임아웃).\n * 5장 배치가 안정적으로 동작 확인 (35초/배치).\n * 299페이지 = 60배치 = 기존 299회 대비 80% 감소.\n */\nexport const DEFAULT_BATCH_SIZES: Record<string, number> = {\n gemini: 5,\n claude: 5,\n codex: 10,\n}\n\n/**\n * 임시 디렉토리 — gemini CLI는 cwd 하위 + gitignore 밖만 @참조 가능\n *\n * 숨김 처리:\n * - macOS/Linux: '.' 접두사로 기본 숨김 (ls -a 로만 표시)\n * - Windows: '.' 접두사 + attrib +h 로 숨김 속성 부여\n */\nlet _batchTempDir: string | null = null\nfunction getBatchTempDir(): string {\n if (!_batchTempDir) {\n _batchTempDir = join(process.cwd(), \".kordoc_ocr_tmp\")\n mkdirSync(_batchTempDir, { recursive: true })\n // Windows: dot-prefix만으로 숨김 처리 불충분 → attrib +h 추가\n if (process.platform === \"win32\") {\n try { execSync(`attrib +h \"${_batchTempDir}\"`, { stdio: \"ignore\" }) } catch { /* ignore */ }\n }\n }\n return _batchTempDir\n}\n\n/**\n * 배치 CLI 프로바이더 생성\n */\nexport function createBatchCliProvider(\n mode: \"gemini\" | \"claude\" | \"codex\",\n batchSize: number\n): BatchOcrProvider {\n return {\n __batch: true as const,\n batchSize,\n async processBatch(pages) {\n const results = new Map<number, StructuredOcrResult>()\n const tempDir = getBatchTempDir()\n const tempFiles: string[] = []\n\n try {\n // 1. Write all page images to temp files\n for (const { image, pageNum } of pages) {\n const path = join(tempDir, `batch-p${pageNum}.png`)\n writeFileSync(path, image)\n tempFiles.push(path)\n }\n\n // 2. Call CLI with all file references (비동기 — 병렬 배치 실행 가능)\n let output: string\n if (mode === \"codex\") {\n output = await callBatchCodexCli(tempFiles)\n } else {\n output = await callBatchCli(mode, tempFiles)\n }\n\n // 3. Parse response by PAGE_BREAK separator\n const cleaned = stripCodeFence(output.trim())\n const parts = cleaned.split(/<!--\\s*PAGE_BREAK\\s*-->/)\n .map(p => p.trim())\n .filter(p => p.length > 0)\n\n // 4. Map results to page numbers (best-effort if count mismatch)\n for (let i = 0; i < pages.length; i++) {\n const pageNum = pages[i].pageNum\n if (i < parts.length) {\n results.set(pageNum, { markdown: parts[i] })\n }\n // If fewer parts than pages, remaining pages get no result\n }\n } finally {\n // 5. Clean up temp files\n for (const f of tempFiles) {\n try { unlinkSync(f) } catch { /* ignore */ }\n }\n }\n\n return results\n },\n }\n}\n\n/**\n * 비동기 CLI 실행 헬퍼 — spawn + Promise 래핑.\n * spawnSync는 이벤트 루프를 차단하여 병렬 배치 실행 불가.\n */\nfunction spawnAsync(\n cmd: string,\n args: string[],\n opts: { timeoutMs: number; cwd?: string; stdin?: string }\n): Promise<{ stdout: string; stderr: string; exitCode: number }> {\n return new Promise((resolve, reject) => {\n const child = spawn(cmd, args, {\n cwd: opts.cwd,\n env: process.env,\n stdio: [\"pipe\", \"pipe\", \"pipe\"],\n shell: process.platform === \"win32\",\n })\n\n let stdout = \"\"\n let stderr = \"\"\n let killed = false\n\n child.stdout.setEncoding(\"utf-8\")\n child.stderr.setEncoding(\"utf-8\")\n child.stdout.on(\"data\", (d: string) => { stdout += d })\n child.stderr.on(\"data\", (d: string) => { stderr += d })\n\n const timer = setTimeout(() => {\n killed = true\n if (process.platform === \"win32\") {\n child.kill()\n } else {\n child.kill(\"SIGTERM\")\n }\n }, opts.timeoutMs)\n\n if (opts.stdin !== undefined) {\n child.stdin.end(opts.stdin)\n } else {\n child.stdin.end()\n }\n\n child.on(\"close\", (code) => {\n clearTimeout(timer)\n if (killed) {\n reject(new Error(`타임아웃 (${Math.round(opts.timeoutMs / 1000)}초)`))\n } else {\n resolve({ stdout, stderr, exitCode: code ?? 1 })\n }\n })\n child.on(\"error\", (err) => {\n clearTimeout(timer)\n reject(err)\n })\n })\n}\n\n/** gemini/claude 배치 호출 (비동기) */\nasync function callBatchCli(mode: \"gemini\" | \"claude\", imagePaths: string[]): Promise<string> {\n const fileRefs = imagePaths.map(p => `@${p.replace(/\\\\/g, \"/\")}`).join(\"\\n\")\n const prompt = `${BATCH_OCR_PROMPT}\\n\\n${fileRefs}`\n\n let args: string[]\n if (mode === \"gemini\") {\n const model = process.env.KORDOC_GEMINI_MODEL ?? \"gemini-2.5-flash\"\n args = [\"--prompt\", prompt, \"--yolo\", \"--model\", model]\n } else {\n args = [\"--print\", prompt]\n const model = process.env.KORDOC_CLAUDE_MODEL\n if (model) args.push(\"--model\", model)\n }\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = await spawnAsync(mode, args, {\n timeoutMs,\n ...(mode === \"claude\" ? { cwd: tmpdir() } : {}),\n })\n\n if (result.exitCode !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.exitCode}`\n throw new Error(`${mode} 배치 OCR 실패: ${errMsg}`)\n }\n\n const output = result.stdout || \"\"\n checkForLimitError(output, mode)\n return output\n}\n\n/** codex 배치 호출 (비동기) — --image를 여러 번 지정 */\nasync function callBatchCodexCli(imagePaths: string[]): Promise<string> {\n const outPath = join(tmpdir(), `kordoc-codex-batch-${Date.now()}-${Math.random().toString(36).slice(2)}.txt`)\n try {\n const args = [\"exec\", BATCH_OCR_PROMPT]\n for (const p of imagePaths) {\n args.push(\"--image\", p)\n }\n args.push(\"--output-last-message\", outPath)\n const model = process.env.KORDOC_CODEX_MODEL\n if (model) args.push(\"--model\", model)\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = await spawnAsync(\"codex\", args, {\n timeoutMs,\n stdin: \"\",\n })\n\n if (result.exitCode !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.exitCode}`\n throw new Error(`codex 배치 OCR 실패: ${errMsg}`)\n }\n\n let text: string\n try {\n text = readFileSync(outPath, \"utf-8\")\n } catch {\n text = result.stdout || \"\"\n }\n checkForLimitError(text, \"codex\")\n return text\n } finally {\n try { unlinkSync(outPath) } catch { /* ignore */ }\n }\n}\n\n/**\n * 출력 텍스트에서 사용량·속도 제한 에러 감지.\n * 해당 메시지가 포함된 경우 throw하여 다음 엔진으로 fallback 트리거.\n */\nfunction checkForLimitError(output: string, mode: string): void {\n const lower = output.toLowerCase()\n if (lower.includes(\"usage limit\") || lower.includes(\"rate limit\")) {\n throw new Error(`${mode} 사용량/속도 제한: ${output.trim().slice(0, 200)}`)\n }\n}\n\n/** LLM 출력에서 코드 펜스 제거 (cli-provider.ts와 동일 로직) */\nfunction stripCodeFence(text: string): string {\n const match = text.match(/^```(?:markdown|md)?\\s*\\n([\\s\\S]*?)\\n```\\s*$/m)\n return match ? match[1].trim() : text\n}\n"],"mappings":";;;;AAWA,SAAS,OAAO,gBAAgB;AAChC,SAAS,eAAe,cAAc,YAAY,iBAAiB;AACnE,SAAS,YAAY;AACrB,SAAS,cAAc;AAIvB,IAAM,mBACJ;AAiBK,IAAM,sBAA8C;AAAA,EACzD,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,OAAO;AACT;AASA,IAAI,gBAA+B;AACnC,SAAS,kBAA0B;AACjC,MAAI,CAAC,eAAe;AAClB,oBAAgB,KAAK,QAAQ,IAAI,GAAG,iBAAiB;AACrD,cAAU,eAAe,EAAE,WAAW,KAAK,CAAC;AAE5C,QAAI,QAAQ,aAAa,SAAS;AAChC,UAAI;AAAE,iBAAS,cAAc,aAAa,KAAK,EAAE,OAAO,SAAS,CAAC;AAAA,MAAE,QAAQ;AAAA,MAAe;AAAA,IAC7F;AAAA,EACF;AACA,SAAO;AACT;AAKO,SAAS,uBACd,MACA,WACkB;AAClB,SAAO;AAAA,IACL,SAAS;AAAA,IACT;AAAA,IACA,MAAM,aAAa,OAAO;AACxB,YAAM,UAAU,oBAAI,IAAiC;AACrD,YAAM,UAAU,gBAAgB;AAChC,YAAM,YAAsB,CAAC;AAE7B,UAAI;AAEF,mBAAW,EAAE,OAAO,QAAQ,KAAK,OAAO;AACtC,gBAAM,OAAO,KAAK,SAAS,UAAU,OAAO,MAAM;AAClD,wBAAc,MAAM,KAAK;AACzB,oBAAU,KAAK,IAAI;AAAA,QACrB;AAGA,YAAI;AACJ,YAAI,SAAS,SAAS;AACpB,mBAAS,MAAM,kBAAkB,SAAS;AAAA,QAC5C,OAAO;AACL,mBAAS,MAAM,aAAa,MAAM,SAAS;AAAA,QAC7C;AAGA,cAAM,UAAU,eAAe,OAAO,KAAK,CAAC;AAC5C,cAAM,QAAQ,QAAQ,MAAM,yBAAyB,EAClD,IAAI,OAAK,EAAE,KAAK,CAAC,EACjB,OAAO,OAAK,EAAE,SAAS,CAAC;AAG3B,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,gBAAM,UAAU,MAAM,CAAC,EAAE;AACzB,cAAI,IAAI,MAAM,QAAQ;AACpB,oBAAQ,IAAI,SAAS,EAAE,UAAU,MAAM,CAAC,EAAE,CAAC;AAAA,UAC7C;AAAA,QAEF;AAAA,MACF,UAAE;AAEA,mBAAW,KAAK,WAAW;AACzB,cAAI;AAAE,uBAAW,CAAC;AAAA,UAAE,QAAQ;AAAA,UAAe;AAAA,QAC7C;AAAA,MACF;AAEA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAMA,SAAS,WACP,KACA,MACA,MAC+D;AAC/D,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,QAAQ,MAAM,KAAK,MAAM;AAAA,MAC7B,KAAK,KAAK;AAAA,MACV,KAAK,QAAQ;AAAA,MACb,OAAO,CAAC,QAAQ,QAAQ,MAAM;AAAA,MAC9B,OAAO,QAAQ,aAAa;AAAA,IAC9B,CAAC;AAED,QAAI,SAAS;AACb,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,UAAM,OAAO,YAAY,OAAO;AAChC,UAAM,OAAO,YAAY,OAAO;AAChC,UAAM,OAAO,GAAG,QAAQ,CAAC,MAAc;AAAE,gBAAU;AAAA,IAAE,CAAC;AACtD,UAAM,OAAO,GAAG,QAAQ,CAAC,MAAc;AAAE,gBAAU;AAAA,IAAE,CAAC;AAEtD,UAAM,QAAQ,WAAW,MAAM;AAC7B,eAAS;AACT,UAAI,QAAQ,aAAa,SAAS;AAChC,cAAM,KAAK;AAAA,MACb,OAAO;AACL,cAAM,KAAK,SAAS;AAAA,MACtB;AAAA,IACF,GAAG,KAAK,SAAS;AAEjB,QAAI,KAAK,UAAU,QAAW;AAC5B,YAAM,MAAM,IAAI,KAAK,KAAK;AAAA,IAC5B,OAAO;AACL,YAAM,MAAM,IAAI;AAAA,IAClB;AAEA,UAAM,GAAG,SAAS,CAAC,SAAS;AAC1B,mBAAa,KAAK;AAClB,UAAI,QAAQ;AACV,eAAO,IAAI,MAAM,6BAAS,KAAK,MAAM,KAAK,YAAY,GAAI,CAAC,SAAI,CAAC;AAAA,MAClE,OAAO;AACL,gBAAQ,EAAE,QAAQ,QAAQ,UAAU,QAAQ,EAAE,CAAC;AAAA,MACjD;AAAA,IACF,CAAC;AACD,UAAM,GAAG,SAAS,CAAC,QAAQ;AACzB,mBAAa,KAAK;AAClB,aAAO,GAAG;AAAA,IACZ,CAAC;AAAA,EACH,CAAC;AACH;AAGA,eAAe,aAAa,MAA2B,YAAuC;AAC5F,QAAM,WAAW,WAAW,IAAI,OAAK,IAAI,EAAE,QAAQ,OAAO,GAAG,CAAC,EAAE,EAAE,KAAK,IAAI;AAC3E,QAAM,SAAS,GAAG,gBAAgB;AAAA;AAAA,EAAO,QAAQ;AAEjD,MAAI;AACJ,MAAI,SAAS,UAAU;AACrB,UAAM,QAAQ,QAAQ,IAAI,uBAAuB;AACjD,WAAO,CAAC,YAAY,QAAQ,UAAU,WAAW,KAAK;AAAA,EACxD,OAAO;AACL,WAAO,CAAC,WAAW,MAAM;AACzB,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAAA,EACvC;AAEA,QAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,QAAM,SAAS,MAAM,WAAW,MAAM,MAAM;AAAA,IAC1C;AAAA,IACA,GAAI,SAAS,WAAW,EAAE,KAAK,OAAO,EAAE,IAAI,CAAC;AAAA,EAC/C,CAAC;AAED,MAAI,OAAO,aAAa,GAAG;AACzB,UAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,QAAQ;AACpE,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,MAAM,EAAE;AAAA,EAChD;AAEA,QAAM,SAAS,OAAO,UAAU;AAChC,qBAAmB,QAAQ,IAAI;AAC/B,SAAO;AACT;AAGA,eAAe,kBAAkB,YAAuC;AACtE,QAAM,UAAU,KAAK,OAAO,GAAG,sBAAsB,KAAK,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,CAAC,CAAC,MAAM;AAC5G,MAAI;AACF,UAAM,OAAO,CAAC,QAAQ,gBAAgB;AACtC,eAAW,KAAK,YAAY;AAC1B,WAAK,KAAK,WAAW,CAAC;AAAA,IACxB;AACA,SAAK,KAAK,yBAAyB,OAAO;AAC1C,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAErC,UAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,UAAM,SAAS,MAAM,WAAW,SAAS,MAAM;AAAA,MAC7C;AAAA,MACA,OAAO;AAAA,IACT,CAAC;AAED,QAAI,OAAO,aAAa,GAAG;AACzB,YAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,QAAQ;AACpE,YAAM,IAAI,MAAM,wCAAoB,MAAM,EAAE;AAAA,IAC9C;AAEA,QAAI;AACJ,QAAI;AACF,aAAO,aAAa,SAAS,OAAO;AAAA,IACtC,QAAQ;AACN,aAAO,OAAO,UAAU;AAAA,IAC1B;AACA,uBAAmB,MAAM,OAAO;AAChC,WAAO;AAAA,EACT,UAAE;AACA,QAAI;AAAE,iBAAW,OAAO;AAAA,IAAE,QAAQ;AAAA,IAAe;AAAA,EACnD;AACF;AAMA,SAAS,mBAAmB,QAAgB,MAAoB;AAC9D,QAAM,QAAQ,OAAO,YAAY;AACjC,MAAI,MAAM,SAAS,aAAa,KAAK,MAAM,SAAS,YAAY,GAAG;AACjE,UAAM,IAAI,MAAM,GAAG,IAAI,kDAAe,OAAO,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,EACrE;AACF;AAGA,SAAS,eAAe,MAAsB;AAC5C,QAAM,QAAQ,KAAK,MAAM,+CAA+C;AACxE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;","names":[]}
|