kordoc 2.0.3 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +302 -291
- package/dist/chunk-5Y2Q3BRW.js +52 -0
- package/dist/chunk-5Y2Q3BRW.js.map +1 -0
- package/dist/{chunk-4UH6ABAY.js → chunk-LYFG7AUT.js} +971 -223
- package/dist/chunk-LYFG7AUT.js.map +1 -0
- package/dist/{chunk-3TBUDJDE.js → chunk-MOL7MDBG.js} +1 -1
- package/dist/chunk-MOL7MDBG.js.map +1 -0
- package/dist/cli.js +13 -9
- package/dist/cli.js.map +1 -1
- package/dist/detect-GYK3HKD5.js +18 -0
- package/dist/index.cjs +996 -189
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +996 -189
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +41 -12
- package/dist/mcp.js.map +1 -1
- package/dist/page-range-737B4EZW.js +8 -0
- package/dist/{provider-EU3CG724.js → provider-7H4CPZYS.js} +2 -1
- package/dist/provider-7H4CPZYS.js.map +1 -0
- package/dist/{watch-QD3PDNXQ.js → watch-Q5OXA73S.js} +38 -18
- package/dist/watch-Q5OXA73S.js.map +1 -0
- package/package.json +1 -1
- package/dist/chunk-25TXW6EP.js +0 -93
- package/dist/chunk-25TXW6EP.js.map +0 -1
- package/dist/chunk-3TBUDJDE.js.map +0 -1
- package/dist/chunk-4UH6ABAY.js.map +0 -1
- package/dist/page-range-OF5I4PQY.js +0 -8
- package/dist/provider-EU3CG724.js.map +0 -1
- package/dist/utils-BTZ4WSYX.js +0 -22
- package/dist/watch-QD3PDNXQ.js.map +0 -1
- /package/dist/{page-range-OF5I4PQY.js.map → detect-GYK3HKD5.js.map} +0 -0
- /package/dist/{utils-BTZ4WSYX.js.map → page-range-737B4EZW.js.map} +0 -0
package/dist/mcp.js
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
|
+
KordocError,
|
|
4
|
+
VERSION,
|
|
3
5
|
blocksToMarkdown,
|
|
4
6
|
compare,
|
|
5
|
-
detectFormat,
|
|
6
7
|
extractFormFields,
|
|
7
8
|
extractHwp5MetadataOnly,
|
|
8
9
|
extractHwpxMetadataOnly,
|
|
9
10
|
extractPdfMetadataOnly,
|
|
10
|
-
parse
|
|
11
|
-
} from "./chunk-4UH6ABAY.js";
|
|
12
|
-
import {
|
|
13
|
-
KordocError,
|
|
14
|
-
VERSION,
|
|
11
|
+
parse,
|
|
15
12
|
sanitizeError,
|
|
16
13
|
toArrayBuffer
|
|
17
|
-
} from "./chunk-
|
|
18
|
-
import
|
|
14
|
+
} from "./chunk-LYFG7AUT.js";
|
|
15
|
+
import {
|
|
16
|
+
detectFormat
|
|
17
|
+
} from "./chunk-5Y2Q3BRW.js";
|
|
18
|
+
import "./chunk-MOL7MDBG.js";
|
|
19
19
|
|
|
20
20
|
// src/mcp.ts
|
|
21
21
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
@@ -28,7 +28,14 @@ var MAX_FILE_SIZE = 500 * 1024 * 1024;
|
|
|
28
28
|
function safePath(filePath) {
|
|
29
29
|
if (!filePath) throw new KordocError("\uD30C\uC77C \uACBD\uB85C\uAC00 \uBE44\uC5B4\uC788\uC2B5\uB2C8\uB2E4");
|
|
30
30
|
const resolved = resolve(filePath);
|
|
31
|
-
|
|
31
|
+
let real;
|
|
32
|
+
try {
|
|
33
|
+
real = realpathSync(resolved);
|
|
34
|
+
} catch (err) {
|
|
35
|
+
if (err?.code === "ENOENT") throw new KordocError(`\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${resolved}`);
|
|
36
|
+
if (err?.code === "EACCES" || err?.code === "EPERM") throw new KordocError(`\uD30C\uC77C \uC811\uADFC \uAD8C\uD55C\uC774 \uC5C6\uC2B5\uB2C8\uB2E4: ${resolved}`);
|
|
37
|
+
throw new KordocError(`\uACBD\uB85C \uCC98\uB9AC \uC624\uB958 [${err?.code ?? "UNKNOWN"}]`);
|
|
38
|
+
}
|
|
32
39
|
if (!isAbsolute(real)) throw new KordocError("\uC808\uB300 \uACBD\uB85C\uB9CC \uD5C8\uC6A9\uB429\uB2C8\uB2E4");
|
|
33
40
|
const ext = extname(real).toLowerCase();
|
|
34
41
|
if (!ALLOWED_EXTENSIONS.has(ext)) throw new KordocError(`\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD655\uC7A5\uC790\uC785\uB2C8\uB2E4: ${ext} (\uD5C8\uC6A9: ${[...ALLOWED_EXTENSIONS].join(", ")})`);
|
|
@@ -37,11 +44,21 @@ function safePath(filePath) {
|
|
|
37
44
|
var MAX_METADATA_FILE_SIZE = 50 * 1024 * 1024;
|
|
38
45
|
function readValidatedFile(filePath, maxSize = MAX_FILE_SIZE) {
|
|
39
46
|
const resolved = safePath(filePath);
|
|
40
|
-
|
|
47
|
+
let fileSize;
|
|
48
|
+
try {
|
|
49
|
+
fileSize = statSync(resolved).size;
|
|
50
|
+
} catch (err) {
|
|
51
|
+
throw new KordocError(`\uD30C\uC77C \uC0C1\uD0DC \uC77D\uAE30 \uC2E4\uD328 [${err?.code ?? "UNKNOWN"}]: ${resolved}`);
|
|
52
|
+
}
|
|
41
53
|
if (fileSize > maxSize) {
|
|
42
54
|
throw new KordocError(`\uD30C\uC77C\uC774 \uB108\uBB34 \uD07D\uB2C8\uB2E4: ${(fileSize / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxSize / 1024 / 1024}MB)`);
|
|
43
55
|
}
|
|
44
|
-
|
|
56
|
+
let raw;
|
|
57
|
+
try {
|
|
58
|
+
raw = readFileSync(resolved);
|
|
59
|
+
} catch (err) {
|
|
60
|
+
throw new KordocError(`\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328 [${err?.code ?? "UNKNOWN"}]: ${resolved}`);
|
|
61
|
+
}
|
|
45
62
|
return { buffer: toArrayBuffer(raw), resolved };
|
|
46
63
|
}
|
|
47
64
|
function detectFormatFromHeader(resolved) {
|
|
@@ -154,7 +171,13 @@ server.tool(
|
|
|
154
171
|
}
|
|
155
172
|
const { buffer } = readValidatedFile(file_path, MAX_METADATA_FILE_SIZE);
|
|
156
173
|
let metadata;
|
|
157
|
-
|
|
174
|
+
let effectiveFormat = format;
|
|
175
|
+
if (format === "hwpx") {
|
|
176
|
+
const { detectZipFormat } = await import("./detect-GYK3HKD5.js");
|
|
177
|
+
const zipFormat = await detectZipFormat(buffer);
|
|
178
|
+
if (zipFormat === "xlsx" || zipFormat === "docx") effectiveFormat = zipFormat;
|
|
179
|
+
}
|
|
180
|
+
switch (effectiveFormat) {
|
|
158
181
|
case "hwp":
|
|
159
182
|
metadata = extractHwp5MetadataOnly(Buffer.from(buffer));
|
|
160
183
|
break;
|
|
@@ -164,6 +187,12 @@ server.tool(
|
|
|
164
187
|
case "pdf":
|
|
165
188
|
metadata = await extractPdfMetadataOnly(buffer);
|
|
166
189
|
break;
|
|
190
|
+
case "xlsx":
|
|
191
|
+
case "docx": {
|
|
192
|
+
const result = await parse(buffer);
|
|
193
|
+
metadata = result.success ? result.metadata : void 0;
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
167
196
|
}
|
|
168
197
|
return {
|
|
169
198
|
content: [{ type: "text", text: JSON.stringify({ format, ...metadata }, null, 2) }]
|
package/dist/mcp.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/mcp.ts"],"sourcesContent":["/** kordoc MCP 서버 — Claude/Cursor에서 문서 파싱 도구로 사용 */\r\n\r\nimport { McpServer } from \"@modelcontextprotocol/sdk/server/mcp.js\"\r\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\"\r\nimport { z } from \"zod\"\r\nimport { readFileSync, realpathSync, openSync, readSync, closeSync, statSync } from \"fs\"\r\nimport { resolve, isAbsolute, extname } from \"path\"\r\nimport { parse, detectFormat, blocksToMarkdown, compare, extractFormFields } from \"./index.js\"\r\nimport { VERSION, toArrayBuffer, sanitizeError, KordocError } from \"./utils.js\"\r\nimport { extractHwp5MetadataOnly } from \"./hwp5/parser.js\"\r\nimport { extractHwpxMetadataOnly } from \"./hwpx/parser.js\"\r\nimport { extractPdfMetadataOnly } from \"./pdf/parser.js\"\r\n\r\n/** 허용 파일 확장자 */\r\nconst ALLOWED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\r\n/** 최대 파일 크기 (500MB) */\r\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\r\n\r\n/** 경로 정규화 및 보안 검증 */\r\nfunction safePath(filePath: string): string {\r\n if (!filePath) throw new KordocError(\"파일 경로가 비어있습니다\")\r\n const resolved = resolve(filePath)\r\n const real = realpathSync(resolved)\r\n if (!isAbsolute(real)) throw new KordocError(\"절대 경로만 허용됩니다\")\r\n const ext = extname(real).toLowerCase()\r\n if (!ALLOWED_EXTENSIONS.has(ext)) throw new KordocError(`지원하지 않는 확장자입니다: ${ext} (허용: ${[...ALLOWED_EXTENSIONS].join(\", \")})`)\r\n return real\r\n}\r\n\r\n/** 최대 파일 크기 — metadata 전용 (50MB, 전체 파싱보다 보수적) */\r\nconst MAX_METADATA_FILE_SIZE = 50 * 1024 * 1024\r\n\r\n/** 파일 읽기 + 크기 검증 공통 로직 */\r\nfunction readValidatedFile(filePath: string, maxSize = MAX_FILE_SIZE): { buffer: ArrayBuffer; resolved: string } {\r\n const resolved = safePath(filePath)\r\n const fileSize = statSync(resolved).size\r\n if (fileSize > maxSize) {\r\n throw new KordocError(`파일이 너무 큽니다: ${(fileSize / 1024 / 1024).toFixed(1)}MB (최대 ${maxSize / 1024 / 1024}MB)`)\r\n }\r\n const raw = readFileSync(resolved)\r\n return { buffer: toArrayBuffer(raw), resolved }\r\n}\r\n\r\n/** 파일 헤더(16바이트)만 읽어 포맷 감지 — 전체 파일 로드 불필요 */\r\nfunction detectFormatFromHeader(resolved: string): ReturnType<typeof detectFormat> {\r\n const fd = openSync(resolved, \"r\")\r\n try {\r\n const headerBuf = Buffer.alloc(16)\r\n readSync(fd, headerBuf, 0, 16, 0)\r\n return detectFormat(toArrayBuffer(headerBuf))\r\n } finally {\r\n closeSync(fd)\r\n }\r\n}\r\n\r\nconst server = new McpServer({\r\n name: \"kordoc\",\r\n version: VERSION,\r\n})\r\n\r\n// ─── 도구: parse_document ────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_document\",\r\n \"한국 문서 파일(HWP, HWPX, PDF, XLSX, DOCX)을 마크다운으로 변환합니다. 파일 경로를 입력하면 포맷을 자동 감지하여 텍스트를 추출합니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const format = detectFormat(buffer)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const result = await parse(buffer)\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const meta = [\r\n `포맷: ${result.fileType.toUpperCase()}`,\r\n result.pageCount ? `페이지: ${result.pageCount}` : null,\r\n result.metadata?.title ? `제목: ${result.metadata.title}` : null,\r\n result.metadata?.author ? `작성자: ${result.metadata.author}` : null,\r\n result.isImageBased ? \"이미지 기반 PDF (텍스트 추출 불가)\" : null,\r\n ].filter(Boolean).join(\" | \")\r\n\r\n // outline/warnings 부가 정보 추가\r\n const parts: string[] = [`[${meta}]`]\r\n\r\n if (result.outline && result.outline.length > 0) {\r\n const outlineText = result.outline.map(o => `${\" \".repeat(o.level - 1)}- ${o.text}`).join(\"\\n\")\r\n parts.push(`\\n📑 문서 구조:\\n${outlineText}`)\r\n }\r\n\r\n if (result.warnings && result.warnings.length > 0) {\r\n const warnText = result.warnings.map(w => `- [p${w.page || \"?\"}] ${w.message}`).join(\"\\n\")\r\n parts.push(`\\n⚠️ 경고:\\n${warnText}`)\r\n }\r\n\r\n parts.push(`\\n\\n${result.markdown}`)\r\n\r\n return {\r\n content: [{ type: \"text\", text: parts.join(\"\") }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: detect_format ─────────────────────────────\r\n\r\nserver.tool(\r\n \"detect_format\",\r\n \"파일의 포맷을 매직 바이트로 감지합니다 (hwpx, hwp, pdf, unknown).\",\r\n {\r\n file_path: z.string().min(1).describe(\"감지할 파일의 절대 경로\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const resolved = safePath(file_path)\r\n const format = detectFormatFromHeader(resolved)\r\n return {\r\n content: [{ type: \"text\", text: `${file_path}: ${format}` }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_metadata ────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_metadata\",\r\n \"문서의 메타데이터(제목, 작성자, 날짜 등)만 빠르게 추출합니다. 전체 파싱 없이 헤더/매니페스트만 읽습니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"메타데이터를 추출할 문서 파일의 절대 경로\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const resolved = safePath(file_path)\r\n const format = detectFormatFromHeader(resolved)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n // metadata 전용 크기 제한 (50MB)\r\n const { buffer } = readValidatedFile(file_path, MAX_METADATA_FILE_SIZE)\r\n\r\n let metadata\r\n switch (format) {\r\n case \"hwp\":\r\n metadata = extractHwp5MetadataOnly(Buffer.from(buffer))\r\n break\r\n case \"hwpx\":\r\n metadata = await extractHwpxMetadataOnly(buffer)\r\n break\r\n case \"pdf\":\r\n metadata = await extractPdfMetadataOnly(buffer)\r\n break\r\n }\r\n\r\n return {\r\n content: [{ type: \"text\", text: JSON.stringify({ format, ...metadata }, null, 2) }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_pages ──────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_pages\",\r\n \"문서의 특정 페이지/섹션 범위만 파싱합니다. PDF는 정확한 페이지, HWP/HWPX는 섹션 단위 근사치입니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\r\n pages: z.string().min(1).describe(\"페이지 범위 (예: '1-3', '1,3,5-7')\"),\r\n },\r\n async ({ file_path, pages }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const format = detectFormat(buffer)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const result = await parse(buffer, { pages })\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const meta = [\r\n `포맷: ${result.fileType.toUpperCase()}`,\r\n `범위: ${pages}`,\r\n result.pageCount ? `페이지: ${result.pageCount}` : null,\r\n ].filter(Boolean).join(\" | \")\r\n\r\n return {\r\n content: [{ type: \"text\", text: `[${meta}]\\n\\n${result.markdown}` }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_table ──────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_table\",\r\n \"문서에서 N번째 테이블만 추출합니다 (0-based index). 테이블이 없거나 인덱스 범위를 초과하면 오류를 반환합니다.\",\r\n {\r\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\r\n table_index: z.number().int().min(0).describe(\"추출할 테이블 인덱스 (0부터 시작)\"),\r\n },\r\n async ({ file_path, table_index }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const format = detectFormat(buffer)\r\n\r\n if (format === \"unknown\") {\r\n return {\r\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const result = await parse(buffer)\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const tableBlocks = result.blocks.filter(b => b.type === \"table\" && b.table)\r\n if (tableBlocks.length === 0) {\r\n return {\r\n content: [{ type: \"text\", text: `문서에 테이블이 없습니다.` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n if (table_index >= tableBlocks.length) {\r\n return {\r\n content: [{ type: \"text\", text: `테이블 인덱스 초과: ${table_index} (총 ${tableBlocks.length}개 테이블)` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const tableBlock = tableBlocks[table_index]\r\n const tableMarkdown = blocksToMarkdown([tableBlock])\r\n\r\n return {\r\n content: [{ type: \"text\", text: `[테이블 #${table_index} / 총 ${tableBlocks.length}개]\\n\\n${tableMarkdown}` }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: compare_documents ─────────────────────────\r\n\r\nserver.tool(\r\n \"compare_documents\",\r\n \"두 한국 문서 파일을 비교하여 추가/삭제/변경된 블록을 표시합니다. 신구대조표 생성에 활용됩니다. 크로스 포맷(HWP↔HWPX) 비교 가능.\",\r\n {\r\n file_path_a: z.string().min(1).describe(\"비교 원본 문서의 절대 경로\"),\r\n file_path_b: z.string().min(1).describe(\"비교 대상 문서의 절대 경로\"),\r\n },\r\n async ({ file_path_a, file_path_b }) => {\r\n try {\r\n const { buffer: bufA } = readValidatedFile(file_path_a)\r\n const { buffer: bufB } = readValidatedFile(file_path_b)\r\n\r\n const result = await compare(bufA, bufB)\r\n const { stats, diffs } = result\r\n\r\n const lines: string[] = [\r\n `## 문서 비교 결과`,\r\n `추가: ${stats.added} | 삭제: ${stats.removed} | 변경: ${stats.modified} | 동일: ${stats.unchanged}`,\r\n \"\",\r\n ]\r\n\r\n for (const d of diffs) {\r\n const prefix = d.type === \"added\" ? \"+\" : d.type === \"removed\" ? \"-\" : d.type === \"modified\" ? \"~\" : \" \"\r\n const text = d.after?.text || d.before?.text || (d.after?.table ? \"[테이블]\" : d.before?.table ? \"[테이블]\" : \"\")\r\n const sim = d.similarity !== undefined ? ` (${(d.similarity * 100).toFixed(0)}%)` : \"\"\r\n lines.push(`${prefix} ${text.substring(0, 200)}${sim}`)\r\n }\r\n\r\n return {\r\n content: [{ type: \"text\", text: lines.join(\"\\n\") }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 도구: parse_form ───────────────────────────────\r\n\r\nserver.tool(\r\n \"parse_form\",\r\n \"한국 서식 문서에서 레이블-값 쌍을 구조화된 JSON으로 추출합니다. 양식/서식 문서에 최적화.\",\r\n {\r\n file_path: z.string().min(1).describe(\"서식 문서 파일의 절대 경로\"),\r\n },\r\n async ({ file_path }) => {\r\n try {\r\n const { buffer } = readValidatedFile(file_path)\r\n const result = await parse(buffer)\r\n\r\n if (!result.success) {\r\n return {\r\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\r\n isError: true,\r\n }\r\n }\r\n\r\n const form = extractFormFields(result.blocks)\r\n return {\r\n content: [{ type: \"text\", text: JSON.stringify(form, null, 2) }],\r\n }\r\n } catch (err) {\r\n return {\r\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\r\n isError: true,\r\n }\r\n }\r\n }\r\n)\r\n\r\n// ─── 서버 시작 ───────────────────────────────────────\r\n\r\nasync function main() {\r\n const transport = new StdioServerTransport()\r\n await server.connect(transport)\r\n}\r\n\r\nmain().catch((err) => { console.error(err); process.exit(1) })\r\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAEA,SAAS,iBAAiB;AAC1B,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAClB,SAAS,cAAc,cAAc,UAAU,UAAU,WAAW,gBAAgB;AACpF,SAAS,SAAS,YAAY,eAAe;AAQ7C,IAAM,qBAAqB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAE9E,IAAM,gBAAgB,MAAM,OAAO;AAGnC,SAAS,SAAS,UAA0B;AAC1C,MAAI,CAAC,SAAU,OAAM,IAAI,YAAY,sEAAe;AACpD,QAAM,WAAW,QAAQ,QAAQ;AACjC,QAAM,OAAO,aAAa,QAAQ;AAClC,MAAI,CAAC,WAAW,IAAI,EAAG,OAAM,IAAI,YAAY,gEAAc;AAC3D,QAAM,MAAM,QAAQ,IAAI,EAAE,YAAY;AACtC,MAAI,CAAC,mBAAmB,IAAI,GAAG,EAAG,OAAM,IAAI,YAAY,+EAAmB,GAAG,mBAAS,CAAC,GAAG,kBAAkB,EAAE,KAAK,IAAI,CAAC,GAAG;AAC5H,SAAO;AACT;AAGA,IAAM,yBAAyB,KAAK,OAAO;AAG3C,SAAS,kBAAkB,UAAkB,UAAU,eAA0D;AAC/G,QAAM,WAAW,SAAS,QAAQ;AAClC,QAAM,WAAW,SAAS,QAAQ,EAAE;AACpC,MAAI,WAAW,SAAS;AACtB,UAAM,IAAI,YAAY,wDAAgB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC,oBAAU,UAAU,OAAO,IAAI,KAAK;AAAA,EAC9G;AACA,QAAM,MAAM,aAAa,QAAQ;AACjC,SAAO,EAAE,QAAQ,cAAc,GAAG,GAAG,SAAS;AAChD;AAGA,SAAS,uBAAuB,UAAmD;AACjF,QAAM,KAAK,SAAS,UAAU,GAAG;AACjC,MAAI;AACF,UAAM,YAAY,OAAO,MAAM,EAAE;AACjC,aAAS,IAAI,WAAW,GAAG,IAAI,CAAC;AAChC,WAAO,aAAa,cAAc,SAAS,CAAC;AAAA,EAC9C,UAAE;AACA,cAAU,EAAE;AAAA,EACd;AACF;AAEA,IAAM,SAAS,IAAI,UAAU;AAAA,EAC3B,MAAM;AAAA,EACN,SAAS;AACX,CAAC;AAID,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,EACvF;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,QAChD,OAAO,UAAU,QAAQ,iBAAO,OAAO,SAAS,KAAK,KAAK;AAAA,QAC1D,OAAO,UAAU,SAAS,uBAAQ,OAAO,SAAS,MAAM,KAAK;AAAA,QAC7D,OAAO,eAAe,uFAA2B;AAAA,MACnD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAG5B,YAAM,QAAkB,CAAC,IAAI,IAAI,GAAG;AAEpC,UAAI,OAAO,WAAW,OAAO,QAAQ,SAAS,GAAG;AAC/C,cAAM,cAAc,OAAO,QAAQ,IAAI,OAAK,GAAG,KAAK,OAAO,EAAE,QAAQ,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,KAAK,IAAI;AAC/F,cAAM,KAAK;AAAA;AAAA,EAAgB,WAAW,EAAE;AAAA,MAC1C;AAEA,UAAI,OAAO,YAAY,OAAO,SAAS,SAAS,GAAG;AACjD,cAAM,WAAW,OAAO,SAAS,IAAI,OAAK,OAAO,EAAE,QAAQ,GAAG,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,IAAI;AACzF,cAAM,KAAK;AAAA;AAAA,EAAa,QAAQ,EAAE;AAAA,MACpC;AAEA,YAAM,KAAK;AAAA;AAAA,EAAO,OAAO,QAAQ,EAAE;AAEnC,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,EAAE,EAAE,CAAC;AAAA,MAClD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,iEAAe;AAAA,EACvD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAC9C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,MAAM,GAAG,CAAC;AAAA,MAC7D;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mHAAyB;AAAA,EACjE;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAE9C,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAGA,YAAM,EAAE,OAAO,IAAI,kBAAkB,WAAW,sBAAsB;AAEtE,UAAI;AACJ,cAAQ,QAAQ;AAAA,QACd,KAAK;AACH,qBAAW,wBAAwB,OAAO,KAAK,MAAM,CAAC;AACtD;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,wBAAwB,MAAM;AAC/C;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,uBAAuB,MAAM;AAC9C;AAAA,MACJ;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,EAAE,QAAQ,GAAG,SAAS,GAAG,MAAM,CAAC,EAAE,CAAC;AAAA,MACpF;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4DAA8B;AAAA,EAClE;AAAA,EACA,OAAO,EAAE,WAAW,MAAM,MAAM;AAC9B,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,QAAQ,EAAE,MAAM,CAAC;AAE5C,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,iBAAO,KAAK;AAAA,QACZ,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,MAClD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,IAAI;AAAA;AAAA,EAAQ,OAAO,QAAQ,GAAG,CAAC;AAAA,MACrE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,SAAS,uFAAsB;AAAA,EACtE;AAAA,EACA,OAAO,EAAE,WAAW,YAAY,MAAM;AACpC,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,cAAc,OAAO,OAAO,OAAO,OAAK,EAAE,SAAS,WAAW,EAAE,KAAK;AAC3E,UAAI,YAAY,WAAW,GAAG;AAC5B,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wEAAiB,CAAC;AAAA,UAClD,SAAS;AAAA,QACX;AAAA,MACF;AAEA,UAAI,eAAe,YAAY,QAAQ;AACrC,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,uDAAe,WAAW,YAAO,YAAY,MAAM,6BAAS,CAAC;AAAA,UAC7F,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,aAAa,YAAY,WAAW;AAC1C,YAAM,gBAAgB,iBAAiB,CAAC,UAAU,CAAC;AAEnD,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wBAAS,WAAW,aAAQ,YAAY,MAAM;AAAA;AAAA,EAAS,aAAa,GAAG,CAAC;AAAA,MAC1G;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,IACzD,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EAC3D;AAAA,EACA,OAAO,EAAE,aAAa,YAAY,MAAM;AACtC,QAAI;AACF,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AACtD,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AAEtD,YAAM,SAAS,MAAM,QAAQ,MAAM,IAAI;AACvC,YAAM,EAAE,OAAO,MAAM,IAAI;AAEzB,YAAM,QAAkB;AAAA,QACtB;AAAA,QACA,iBAAO,MAAM,KAAK,oBAAU,MAAM,OAAO,oBAAU,MAAM,QAAQ,oBAAU,MAAM,SAAS;AAAA,QAC1F;AAAA,MACF;AAEA,iBAAW,KAAK,OAAO;AACrB,cAAM,SAAS,EAAE,SAAS,UAAU,MAAM,EAAE,SAAS,YAAY,MAAM,EAAE,SAAS,aAAa,MAAM;AACrG,cAAM,OAAO,EAAE,OAAO,QAAQ,EAAE,QAAQ,SAAS,EAAE,OAAO,QAAQ,yBAAU,EAAE,QAAQ,QAAQ,yBAAU;AACxG,cAAM,MAAM,EAAE,eAAe,SAAY,MAAM,EAAE,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AACpF,cAAM,KAAK,GAAG,MAAM,IAAI,KAAK,UAAU,GAAG,GAAG,CAAC,GAAG,GAAG,EAAE;AAAA,MACxD;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,IAAI,EAAE,CAAC;AAAA,MACpD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EACzD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO,kBAAkB,OAAO,MAAM;AAC5C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,MAAM,MAAM,CAAC,EAAE,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,eAAe,OAAO;AACpB,QAAM,YAAY,IAAI,qBAAqB;AAC3C,QAAM,OAAO,QAAQ,SAAS;AAChC;AAEA,KAAK,EAAE,MAAM,CAAC,QAAQ;AAAE,UAAQ,MAAM,GAAG;AAAG,UAAQ,KAAK,CAAC;AAAE,CAAC;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/mcp.ts"],"sourcesContent":["/** kordoc MCP 서버 — Claude/Cursor에서 문서 파싱 도구로 사용 */\n\nimport { McpServer } from \"@modelcontextprotocol/sdk/server/mcp.js\"\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\"\nimport { z } from \"zod\"\nimport { readFileSync, realpathSync, openSync, readSync, closeSync, statSync } from \"fs\"\nimport { resolve, isAbsolute, extname } from \"path\"\nimport { parse, detectFormat, blocksToMarkdown, compare, extractFormFields } from \"./index.js\"\nimport { VERSION, toArrayBuffer, sanitizeError, KordocError } from \"./utils.js\"\nimport { extractHwp5MetadataOnly } from \"./hwp5/parser.js\"\nimport { extractHwpxMetadataOnly } from \"./hwpx/parser.js\"\nimport { extractPdfMetadataOnly } from \"./pdf/parser.js\"\n\n/** 허용 파일 확장자 */\nconst ALLOWED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\n/** 최대 파일 크기 (500MB) */\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\n\n/** 경로 정규화 및 보안 검증 */\nfunction safePath(filePath: string): string {\n if (!filePath) throw new KordocError(\"파일 경로가 비어있습니다\")\n const resolved = resolve(filePath)\n let real: string\n try {\n real = realpathSync(resolved)\n } catch (err: any) {\n if (err?.code === \"ENOENT\") throw new KordocError(`파일을 찾을 수 없습니다: ${resolved}`)\n if (err?.code === \"EACCES\" || err?.code === \"EPERM\") throw new KordocError(`파일 접근 권한이 없습니다: ${resolved}`)\n throw new KordocError(`경로 처리 오류 [${err?.code ?? \"UNKNOWN\"}]`)\n }\n if (!isAbsolute(real)) throw new KordocError(\"절대 경로만 허용됩니다\")\n const ext = extname(real).toLowerCase()\n if (!ALLOWED_EXTENSIONS.has(ext)) throw new KordocError(`지원하지 않는 확장자입니다: ${ext} (허용: ${[...ALLOWED_EXTENSIONS].join(\", \")})`)\n return real\n}\n\n/** 최대 파일 크기 — metadata 전용 (50MB, 전체 파싱보다 보수적) */\nconst MAX_METADATA_FILE_SIZE = 50 * 1024 * 1024\n\n/** 파일 읽기 + 크기 검증 공통 로직 */\nfunction readValidatedFile(filePath: string, maxSize = MAX_FILE_SIZE): { buffer: ArrayBuffer; resolved: string } {\n const resolved = safePath(filePath)\n let fileSize: number\n try {\n fileSize = statSync(resolved).size\n } catch (err: any) {\n throw new KordocError(`파일 상태 읽기 실패 [${err?.code ?? \"UNKNOWN\"}]: ${resolved}`)\n }\n if (fileSize > maxSize) {\n throw new KordocError(`파일이 너무 큽니다: ${(fileSize / 1024 / 1024).toFixed(1)}MB (최대 ${maxSize / 1024 / 1024}MB)`)\n }\n let raw: Buffer\n try {\n raw = readFileSync(resolved)\n } catch (err: any) {\n throw new KordocError(`파일 읽기 실패 [${err?.code ?? \"UNKNOWN\"}]: ${resolved}`)\n }\n return { buffer: toArrayBuffer(raw), resolved }\n}\n\n/** 파일 헤더(16바이트)만 읽어 포맷 감지 — 전체 파일 로드 불필요 */\nfunction detectFormatFromHeader(resolved: string): ReturnType<typeof detectFormat> {\n const fd = openSync(resolved, \"r\")\n try {\n const headerBuf = Buffer.alloc(16)\n readSync(fd, headerBuf, 0, 16, 0)\n return detectFormat(toArrayBuffer(headerBuf))\n } finally {\n closeSync(fd)\n }\n}\n\nconst server = new McpServer({\n name: \"kordoc\",\n version: VERSION,\n})\n\n// ─── 도구: parse_document ────────────────────────────\n\nserver.tool(\n \"parse_document\",\n \"한국 문서 파일(HWP, HWPX, PDF, XLSX, DOCX)을 마크다운으로 변환합니다. 파일 경로를 입력하면 포맷을 자동 감지하여 텍스트를 추출합니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로 (HWP, HWPX, PDF, XLSX, DOCX)\"),\n },\n async ({ file_path }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const meta = [\n `포맷: ${result.fileType.toUpperCase()}`,\n result.pageCount ? `페이지: ${result.pageCount}` : null,\n result.metadata?.title ? `제목: ${result.metadata.title}` : null,\n result.metadata?.author ? `작성자: ${result.metadata.author}` : null,\n result.isImageBased ? \"이미지 기반 PDF (텍스트 추출 불가)\" : null,\n ].filter(Boolean).join(\" | \")\n\n // outline/warnings 부가 정보 추가\n const parts: string[] = [`[${meta}]`]\n\n if (result.outline && result.outline.length > 0) {\n const outlineText = result.outline.map(o => `${\" \".repeat(o.level - 1)}- ${o.text}`).join(\"\\n\")\n parts.push(`\\n📑 문서 구조:\\n${outlineText}`)\n }\n\n if (result.warnings && result.warnings.length > 0) {\n const warnText = result.warnings.map(w => `- [p${w.page || \"?\"}] ${w.message}`).join(\"\\n\")\n parts.push(`\\n⚠️ 경고:\\n${warnText}`)\n }\n\n parts.push(`\\n\\n${result.markdown}`)\n\n return {\n content: [{ type: \"text\", text: parts.join(\"\") }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: detect_format ─────────────────────────────\n\nserver.tool(\n \"detect_format\",\n \"파일의 포맷을 매직 바이트로 감지합니다 (hwpx, hwp, pdf, unknown).\",\n {\n file_path: z.string().min(1).describe(\"감지할 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const resolved = safePath(file_path)\n const format = detectFormatFromHeader(resolved)\n return {\n content: [{ type: \"text\", text: `${file_path}: ${format}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_metadata ────────────────────────────\n\nserver.tool(\n \"parse_metadata\",\n \"문서의 메타데이터(제목, 작성자, 날짜 등)만 빠르게 추출합니다. 전체 파싱 없이 헤더/매니페스트만 읽습니다.\",\n {\n file_path: z.string().min(1).describe(\"메타데이터를 추출할 문서 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const resolved = safePath(file_path)\n const format = detectFormatFromHeader(resolved)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n // metadata 전용 크기 제한 (50MB)\n const { buffer } = readValidatedFile(file_path, MAX_METADATA_FILE_SIZE)\n\n let metadata\n // ZIP 기반 포맷(hwpx)은 내부 구조로 세분화 (XLSX/DOCX 구분)\n let effectiveFormat = format\n if (format === \"hwpx\") {\n const { detectZipFormat } = await import(\"./detect.js\")\n const zipFormat = await detectZipFormat(buffer)\n if (zipFormat === \"xlsx\" || zipFormat === \"docx\") effectiveFormat = zipFormat as any\n }\n switch (effectiveFormat) {\n case \"hwp\":\n metadata = extractHwp5MetadataOnly(Buffer.from(buffer))\n break\n case \"hwpx\":\n metadata = await extractHwpxMetadataOnly(buffer)\n break\n case \"pdf\":\n metadata = await extractPdfMetadataOnly(buffer)\n break\n case \"xlsx\":\n case \"docx\": {\n // XLSX/DOCX는 전용 metadata 추출기가 없으므로 전체 파싱 후 metadata 반환\n const result = await parse(buffer)\n metadata = result.success ? result.metadata : undefined\n break\n }\n }\n\n return {\n content: [{ type: \"text\", text: JSON.stringify({ format, ...metadata }, null, 2) }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_pages ──────────────────────────────\n\nserver.tool(\n \"parse_pages\",\n \"문서의 특정 페이지/섹션 범위만 파싱합니다. PDF는 정확한 페이지, HWP/HWPX는 섹션 단위 근사치입니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\n pages: z.string().min(1).describe(\"페이지 범위 (예: '1-3', '1,3,5-7')\"),\n },\n async ({ file_path, pages }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer, { pages })\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const meta = [\n `포맷: ${result.fileType.toUpperCase()}`,\n `범위: ${pages}`,\n result.pageCount ? `페이지: ${result.pageCount}` : null,\n ].filter(Boolean).join(\" | \")\n\n return {\n content: [{ type: \"text\", text: `[${meta}]\\n\\n${result.markdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_table ──────────────────────────────\n\nserver.tool(\n \"parse_table\",\n \"문서에서 N번째 테이블만 추출합니다 (0-based index). 테이블이 없거나 인덱스 범위를 초과하면 오류를 반환합니다.\",\n {\n file_path: z.string().min(1).describe(\"파싱할 문서 파일의 절대 경로\"),\n table_index: z.number().int().min(0).describe(\"추출할 테이블 인덱스 (0부터 시작)\"),\n },\n async ({ file_path, table_index }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const format = detectFormat(buffer)\n\n if (format === \"unknown\") {\n return {\n content: [{ type: \"text\", text: `지원하지 않는 파일 형식입니다: ${file_path}` }],\n isError: true,\n }\n }\n\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패 (${result.fileType}): ${result.error}` }],\n isError: true,\n }\n }\n\n const tableBlocks = result.blocks.filter(b => b.type === \"table\" && b.table)\n if (tableBlocks.length === 0) {\n return {\n content: [{ type: \"text\", text: `문서에 테이블이 없습니다.` }],\n isError: true,\n }\n }\n\n if (table_index >= tableBlocks.length) {\n return {\n content: [{ type: \"text\", text: `테이블 인덱스 초과: ${table_index} (총 ${tableBlocks.length}개 테이블)` }],\n isError: true,\n }\n }\n\n const tableBlock = tableBlocks[table_index]\n const tableMarkdown = blocksToMarkdown([tableBlock])\n\n return {\n content: [{ type: \"text\", text: `[테이블 #${table_index} / 총 ${tableBlocks.length}개]\\n\\n${tableMarkdown}` }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: compare_documents ─────────────────────────\n\nserver.tool(\n \"compare_documents\",\n \"두 한국 문서 파일을 비교하여 추가/삭제/변경된 블록을 표시합니다. 신구대조표 생성에 활용됩니다. 크로스 포맷(HWP↔HWPX) 비교 가능.\",\n {\n file_path_a: z.string().min(1).describe(\"비교 원본 문서의 절대 경로\"),\n file_path_b: z.string().min(1).describe(\"비교 대상 문서의 절대 경로\"),\n },\n async ({ file_path_a, file_path_b }) => {\n try {\n const { buffer: bufA } = readValidatedFile(file_path_a)\n const { buffer: bufB } = readValidatedFile(file_path_b)\n\n const result = await compare(bufA, bufB)\n const { stats, diffs } = result\n\n const lines: string[] = [\n `## 문서 비교 결과`,\n `추가: ${stats.added} | 삭제: ${stats.removed} | 변경: ${stats.modified} | 동일: ${stats.unchanged}`,\n \"\",\n ]\n\n for (const d of diffs) {\n const prefix = d.type === \"added\" ? \"+\" : d.type === \"removed\" ? \"-\" : d.type === \"modified\" ? \"~\" : \" \"\n const text = d.after?.text || d.before?.text || (d.after?.table ? \"[테이블]\" : d.before?.table ? \"[테이블]\" : \"\")\n const sim = d.similarity !== undefined ? ` (${(d.similarity * 100).toFixed(0)}%)` : \"\"\n lines.push(`${prefix} ${text.substring(0, 200)}${sim}`)\n }\n\n return {\n content: [{ type: \"text\", text: lines.join(\"\\n\") }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 도구: parse_form ───────────────────────────────\n\nserver.tool(\n \"parse_form\",\n \"한국 서식 문서에서 레이블-값 쌍을 구조화된 JSON으로 추출합니다. 양식/서식 문서에 최적화.\",\n {\n file_path: z.string().min(1).describe(\"서식 문서 파일의 절대 경로\"),\n },\n async ({ file_path }) => {\n try {\n const { buffer } = readValidatedFile(file_path)\n const result = await parse(buffer)\n\n if (!result.success) {\n return {\n content: [{ type: \"text\", text: `파싱 실패: ${result.error}` }],\n isError: true,\n }\n }\n\n const form = extractFormFields(result.blocks)\n return {\n content: [{ type: \"text\", text: JSON.stringify(form, null, 2) }],\n }\n } catch (err) {\n return {\n content: [{ type: \"text\", text: `오류: ${sanitizeError(err)}` }],\n isError: true,\n }\n }\n }\n)\n\n// ─── 서버 시작 ───────────────────────────────────────\n\nasync function main() {\n const transport = new StdioServerTransport()\n await server.connect(transport)\n}\n\nmain().catch((err) => { console.error(err); process.exit(1) })\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAEA,SAAS,iBAAiB;AAC1B,SAAS,4BAA4B;AACrC,SAAS,SAAS;AAClB,SAAS,cAAc,cAAc,UAAU,UAAU,WAAW,gBAAgB;AACpF,SAAS,SAAS,YAAY,eAAe;AAQ7C,IAAM,qBAAqB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAE9E,IAAM,gBAAgB,MAAM,OAAO;AAGnC,SAAS,SAAS,UAA0B;AAC1C,MAAI,CAAC,SAAU,OAAM,IAAI,YAAY,sEAAe;AACpD,QAAM,WAAW,QAAQ,QAAQ;AACjC,MAAI;AACJ,MAAI;AACF,WAAO,aAAa,QAAQ;AAAA,EAC9B,SAAS,KAAU;AACjB,QAAI,KAAK,SAAS,SAAU,OAAM,IAAI,YAAY,oEAAkB,QAAQ,EAAE;AAC9E,QAAI,KAAK,SAAS,YAAY,KAAK,SAAS,QAAS,OAAM,IAAI,YAAY,0EAAmB,QAAQ,EAAE;AACxG,UAAM,IAAI,YAAY,2CAAa,KAAK,QAAQ,SAAS,GAAG;AAAA,EAC9D;AACA,MAAI,CAAC,WAAW,IAAI,EAAG,OAAM,IAAI,YAAY,gEAAc;AAC3D,QAAM,MAAM,QAAQ,IAAI,EAAE,YAAY;AACtC,MAAI,CAAC,mBAAmB,IAAI,GAAG,EAAG,OAAM,IAAI,YAAY,+EAAmB,GAAG,mBAAS,CAAC,GAAG,kBAAkB,EAAE,KAAK,IAAI,CAAC,GAAG;AAC5H,SAAO;AACT;AAGA,IAAM,yBAAyB,KAAK,OAAO;AAG3C,SAAS,kBAAkB,UAAkB,UAAU,eAA0D;AAC/G,QAAM,WAAW,SAAS,QAAQ;AAClC,MAAI;AACJ,MAAI;AACF,eAAW,SAAS,QAAQ,EAAE;AAAA,EAChC,SAAS,KAAU;AACjB,UAAM,IAAI,YAAY,wDAAgB,KAAK,QAAQ,SAAS,MAAM,QAAQ,EAAE;AAAA,EAC9E;AACA,MAAI,WAAW,SAAS;AACtB,UAAM,IAAI,YAAY,wDAAgB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC,oBAAU,UAAU,OAAO,IAAI,KAAK;AAAA,EAC9G;AACA,MAAI;AACJ,MAAI;AACF,UAAM,aAAa,QAAQ;AAAA,EAC7B,SAAS,KAAU;AACjB,UAAM,IAAI,YAAY,2CAAa,KAAK,QAAQ,SAAS,MAAM,QAAQ,EAAE;AAAA,EAC3E;AACA,SAAO,EAAE,QAAQ,cAAc,GAAG,GAAG,SAAS;AAChD;AAGA,SAAS,uBAAuB,UAAmD;AACjF,QAAM,KAAK,SAAS,UAAU,GAAG;AACjC,MAAI;AACF,UAAM,YAAY,OAAO,MAAM,EAAE;AACjC,aAAS,IAAI,WAAW,GAAG,IAAI,CAAC;AAChC,WAAO,aAAa,cAAc,SAAS,CAAC;AAAA,EAC9C,UAAE;AACA,cAAU,EAAE;AAAA,EACd;AACF;AAEA,IAAM,SAAS,IAAI,UAAU;AAAA,EAC3B,MAAM;AAAA,EACN,SAAS;AACX,CAAC;AAID,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,2GAA+C;AAAA,EACvF;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,QAChD,OAAO,UAAU,QAAQ,iBAAO,OAAO,SAAS,KAAK,KAAK;AAAA,QAC1D,OAAO,UAAU,SAAS,uBAAQ,OAAO,SAAS,MAAM,KAAK;AAAA,QAC7D,OAAO,eAAe,uFAA2B;AAAA,MACnD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAG5B,YAAM,QAAkB,CAAC,IAAI,IAAI,GAAG;AAEpC,UAAI,OAAO,WAAW,OAAO,QAAQ,SAAS,GAAG;AAC/C,cAAM,cAAc,OAAO,QAAQ,IAAI,OAAK,GAAG,KAAK,OAAO,EAAE,QAAQ,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,KAAK,IAAI;AAC/F,cAAM,KAAK;AAAA;AAAA,EAAgB,WAAW,EAAE;AAAA,MAC1C;AAEA,UAAI,OAAO,YAAY,OAAO,SAAS,SAAS,GAAG;AACjD,cAAM,WAAW,OAAO,SAAS,IAAI,OAAK,OAAO,EAAE,QAAQ,GAAG,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,IAAI;AACzF,cAAM,KAAK;AAAA;AAAA,EAAa,QAAQ,EAAE;AAAA,MACpC;AAEA,YAAM,KAAK;AAAA;AAAA,EAAO,OAAO,QAAQ,EAAE;AAEnC,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,EAAE,EAAE,CAAC;AAAA,MAClD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,iEAAe;AAAA,EACvD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAC9C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,GAAG,SAAS,KAAK,MAAM,GAAG,CAAC;AAAA,MAC7D;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,mHAAyB;AAAA,EACjE;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,SAAS,uBAAuB,QAAQ;AAE9C,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAGA,YAAM,EAAE,OAAO,IAAI,kBAAkB,WAAW,sBAAsB;AAEtE,UAAI;AAEJ,UAAI,kBAAkB;AACtB,UAAI,WAAW,QAAQ;AACrB,cAAM,EAAE,gBAAgB,IAAI,MAAM,OAAO,sBAAa;AACtD,cAAM,YAAY,MAAM,gBAAgB,MAAM;AAC9C,YAAI,cAAc,UAAU,cAAc,OAAQ,mBAAkB;AAAA,MACtE;AACA,cAAQ,iBAAiB;AAAA,QACvB,KAAK;AACH,qBAAW,wBAAwB,OAAO,KAAK,MAAM,CAAC;AACtD;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,wBAAwB,MAAM;AAC/C;AAAA,QACF,KAAK;AACH,qBAAW,MAAM,uBAAuB,MAAM;AAC9C;AAAA,QACF,KAAK;AAAA,QACL,KAAK,QAAQ;AAEX,gBAAM,SAAS,MAAM,MAAM,MAAM;AACjC,qBAAW,OAAO,UAAU,OAAO,WAAW;AAC9C;AAAA,QACF;AAAA,MACF;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,EAAE,QAAQ,GAAG,SAAS,GAAG,MAAM,CAAC,EAAE,CAAC;AAAA,MACpF;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,4DAA8B;AAAA,EAClE;AAAA,EACA,OAAO,EAAE,WAAW,MAAM,MAAM;AAC9B,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,QAAQ,EAAE,MAAM,CAAC;AAE5C,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO;AAAA,QACX,iBAAO,OAAO,SAAS,YAAY,CAAC;AAAA,QACpC,iBAAO,KAAK;AAAA,QACZ,OAAO,YAAY,uBAAQ,OAAO,SAAS,KAAK;AAAA,MAClD,EAAE,OAAO,OAAO,EAAE,KAAK,KAAK;AAE5B,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,IAAI,IAAI;AAAA;AAAA,EAAQ,OAAO,QAAQ,GAAG,CAAC;AAAA,MACrE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,8EAAkB;AAAA,IACxD,aAAa,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,EAAE,SAAS,uFAAsB;AAAA,EACtE;AAAA,EACA,OAAO,EAAE,WAAW,YAAY,MAAM;AACpC,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,aAAa,MAAM;AAElC,UAAI,WAAW,WAAW;AACxB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,sFAAqB,SAAS,GAAG,CAAC;AAAA,UAClE,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,QAAQ,MAAM,OAAO,KAAK,GAAG,CAAC;AAAA,UAC/E,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,cAAc,OAAO,OAAO,OAAO,OAAK,EAAE,SAAS,WAAW,EAAE,KAAK;AAC3E,UAAI,YAAY,WAAW,GAAG;AAC5B,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wEAAiB,CAAC;AAAA,UAClD,SAAS;AAAA,QACX;AAAA,MACF;AAEA,UAAI,eAAe,YAAY,QAAQ;AACrC,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,uDAAe,WAAW,YAAO,YAAY,MAAM,6BAAS,CAAC;AAAA,UAC7F,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,aAAa,YAAY,WAAW;AAC1C,YAAM,gBAAgB,iBAAiB,CAAC,UAAU,CAAC;AAEnD,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,wBAAS,WAAW,aAAQ,YAAY,MAAM;AAAA;AAAA,EAAS,aAAa,GAAG,CAAC;AAAA,MAC1G;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,IACzD,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EAC3D;AAAA,EACA,OAAO,EAAE,aAAa,YAAY,MAAM;AACtC,QAAI;AACF,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AACtD,YAAM,EAAE,QAAQ,KAAK,IAAI,kBAAkB,WAAW;AAEtD,YAAM,SAAS,MAAM,QAAQ,MAAM,IAAI;AACvC,YAAM,EAAE,OAAO,MAAM,IAAI;AAEzB,YAAM,QAAkB;AAAA,QACtB;AAAA,QACA,iBAAO,MAAM,KAAK,oBAAU,MAAM,OAAO,oBAAU,MAAM,QAAQ,oBAAU,MAAM,SAAS;AAAA,QAC1F;AAAA,MACF;AAEA,iBAAW,KAAK,OAAO;AACrB,cAAM,SAAS,EAAE,SAAS,UAAU,MAAM,EAAE,SAAS,YAAY,MAAM,EAAE,SAAS,aAAa,MAAM;AACrG,cAAM,OAAO,EAAE,OAAO,QAAQ,EAAE,QAAQ,SAAS,EAAE,OAAO,QAAQ,yBAAU,EAAE,QAAQ,QAAQ,yBAAU;AACxG,cAAM,MAAM,EAAE,eAAe,SAAY,MAAM,EAAE,aAAa,KAAK,QAAQ,CAAC,CAAC,OAAO;AACpF,cAAM,KAAK,GAAG,MAAM,IAAI,KAAK,UAAU,GAAG,GAAG,CAAC,GAAG,GAAG,EAAE;AAAA,MACxD;AAEA,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,MAAM,KAAK,IAAI,EAAE,CAAC;AAAA,MACpD;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,OAAO;AAAA,EACL;AAAA,EACA;AAAA,EACA;AAAA,IACE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS,wEAAiB;AAAA,EACzD;AAAA,EACA,OAAO,EAAE,UAAU,MAAM;AACvB,QAAI;AACF,YAAM,EAAE,OAAO,IAAI,kBAAkB,SAAS;AAC9C,YAAM,SAAS,MAAM,MAAM,MAAM;AAEjC,UAAI,CAAC,OAAO,SAAS;AACnB,eAAO;AAAA,UACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,8BAAU,OAAO,KAAK,GAAG,CAAC;AAAA,UAC1D,SAAS;AAAA,QACX;AAAA,MACF;AAEA,YAAM,OAAO,kBAAkB,OAAO,MAAM;AAC5C,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,KAAK,UAAU,MAAM,MAAM,CAAC,EAAE,CAAC;AAAA,MACjE;AAAA,IACF,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,SAAS,CAAC,EAAE,MAAM,QAAQ,MAAM,iBAAO,cAAc,GAAG,CAAC,GAAG,CAAC;AAAA,QAC7D,SAAS;AAAA,MACX;AAAA,IACF;AAAA,EACF;AACF;AAIA,eAAe,OAAO;AACpB,QAAM,YAAY,IAAI,qBAAqB;AAC3C,QAAM,OAAO,QAAQ,SAAS;AAChC;AAEA,KAAK,EAAE,MAAM,CAAC,QAAQ;AAAE,UAAQ,MAAM,GAAG;AAAG,UAAQ,KAAK,CAAC;AAAE,CAAC;","names":[]}
|
|
@@ -13,6 +13,7 @@ async function ocrPages(doc, provider, pageFilter, effectivePageCount) {
|
|
|
13
13
|
blocks.push({ type: "paragraph", text: text.trim(), pageNumber: i });
|
|
14
14
|
}
|
|
15
15
|
} catch {
|
|
16
|
+
blocks.push({ type: "paragraph", text: `[OCR \uC2E4\uD328: \uD398\uC774\uC9C0 ${i}]` });
|
|
16
17
|
}
|
|
17
18
|
}
|
|
18
19
|
return blocks;
|
|
@@ -35,4 +36,4 @@ async function renderPageToPng(page) {
|
|
|
35
36
|
export {
|
|
36
37
|
ocrPages
|
|
37
38
|
};
|
|
38
|
-
//# sourceMappingURL=provider-
|
|
39
|
+
//# sourceMappingURL=provider-7H4CPZYS.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/ocr/provider.ts"],"sourcesContent":["/**\r\n * OCR 프로바이더 브릿지 — PDF 페이지를 이미지로 렌더링하여 OCR 호출\r\n *\r\n * kordoc은 OCR 라이브러리를 번들하지 않음.\r\n * 사용자가 OcrProvider 함수를 제공하면 이미지 기반 PDF도 텍스트 추출 가능.\r\n *\r\n * @example\r\n * ```ts\r\n * import { parse } from \"kordoc\"\r\n *\r\n * const result = await parse(buffer, {\r\n * ocr: async (pageImage, pageNumber, mimeType) => {\r\n * // Tesseract, Claude Vision, Google Vision 등 사용\r\n * return await myOcrService.recognize(pageImage)\r\n * }\r\n * })\r\n * ```\r\n */\r\n\r\nimport type { OcrProvider, IRBlock } from \"../types.js\"\r\n\r\n/**\r\n * 이미지 기반 PDF 페이지에 OCR을 적용하여 IRBlock[] 반환.\r\n *\r\n * pdfjs page 객체에서 viewport + render를 통해 PNG 생성 후\r\n * 사용자 제공 OcrProvider 호출.\r\n *\r\n * canvas 미설치 시 pdfjs render 불가하므로 에러 반환.\r\n */\r\nexport async function ocrPages(\r\n doc: { numPages: number; getPage(n: number): Promise<PdfPageProxy> },\r\n provider: OcrProvider,\r\n pageFilter: Set<number> | null,\r\n effectivePageCount: number\r\n): Promise<IRBlock[]> {\r\n const blocks: IRBlock[] = []\r\n\r\n for (let i = 1; i <= effectivePageCount; i++) {\r\n if (pageFilter && !pageFilter.has(i)) continue\r\n const page = await doc.getPage(i)\r\n try {\r\n const imageData = await renderPageToPng(page)\r\n const text = await provider(imageData, i, \"image/png\")\r\n if (text.trim()) {\r\n blocks.push({ type: \"paragraph\", text: text.trim(), pageNumber: i })\r\n }\r\n } catch {\r\n blocks.push({ type: \"paragraph\" as const, text: `[OCR 실패: 페이지 ${i}]` })\r\n }\r\n }\r\n\r\n return blocks\r\n}\r\n\r\ninterface PdfPageProxy {\r\n getViewport(params: { scale: number }): { width: number; height: number }\r\n render(params: { canvasContext: unknown; viewport: unknown }): { promise: Promise<void> }\r\n}\r\n\r\n/**\r\n * PDF 페이지를 PNG로 렌더링.\r\n * node-canvas가 설치되어 있어야 동작.\r\n * 미설치 시 에러 throw → 호출측에서 catch.\r\n */\r\nasync function renderPageToPng(page: PdfPageProxy): Promise<Uint8Array> {\r\n // node-canvas 동적 로드 (선택적 의존성)\r\n let createCanvas: (w: number, h: number) => { getContext(t: string): unknown; toBuffer(t: string): Buffer }\r\n try {\r\n const canvasModule = await import(\"canvas\")\r\n createCanvas = canvasModule.createCanvas\r\n } catch {\r\n throw new Error(\"OCR을 사용하려면 'canvas' 패키지를 설치하세요: npm install canvas\")\r\n }\r\n\r\n const scale = 2.0 // 300 DPI 근사\r\n const viewport = page.getViewport({ scale })\r\n const canvas = createCanvas(Math.floor(viewport.width), Math.floor(viewport.height))\r\n const ctx = canvas.getContext(\"2d\")\r\n\r\n await page.render({ canvasContext: ctx, viewport }).promise\r\n return new Uint8Array(canvas.toBuffer(\"image/png\"))\r\n}\r\n"],"mappings":";;;AA6BA,eAAsB,SACpB,KACA,UACA,YACA,oBACoB;AACpB,QAAM,SAAoB,CAAC;AAE3B,WAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,QAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,UAAM,OAAO,MAAM,IAAI,QAAQ,CAAC;AAChC,QAAI;AACF,YAAM,YAAY,MAAM,gBAAgB,IAAI;AAC5C,YAAM,OAAO,MAAM,SAAS,WAAW,GAAG,WAAW;AACrD,UAAI,KAAK,KAAK,GAAG;AACf,eAAO,KAAK,EAAE,MAAM,aAAa,MAAM,KAAK,KAAK,GAAG,YAAY,EAAE,CAAC;AAAA,MACrE;AAAA,IACF,QAAQ;AACN,aAAO,KAAK,EAAE,MAAM,aAAsB,MAAM,yCAAgB,CAAC,IAAI,CAAC;AAAA,IACxE;AAAA,EACF;AAEA,SAAO;AACT;AAYA,eAAe,gBAAgB,MAAyC;AAEtE,MAAI;AACJ,MAAI;AACF,UAAM,eAAe,MAAM,OAAO,QAAQ;AAC1C,mBAAe,aAAa;AAAA,EAC9B,QAAQ;AACN,UAAM,IAAI,MAAM,+HAAoD;AAAA,EACtE;AAEA,QAAM,QAAQ;AACd,QAAM,WAAW,KAAK,YAAY,EAAE,MAAM,CAAC;AAC3C,QAAM,SAAS,aAAa,KAAK,MAAM,SAAS,KAAK,GAAG,KAAK,MAAM,SAAS,MAAM,CAAC;AACnF,QAAM,MAAM,OAAO,WAAW,IAAI;AAElC,QAAM,KAAK,OAAO,EAAE,eAAe,KAAK,SAAS,CAAC,EAAE;AACpD,SAAO,IAAI,WAAW,OAAO,SAAS,WAAW,CAAC;AACpD;","names":[]}
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
|
-
|
|
4
|
-
parse
|
|
5
|
-
} from "./chunk-4UH6ABAY.js";
|
|
6
|
-
import {
|
|
3
|
+
parse,
|
|
7
4
|
toArrayBuffer
|
|
8
|
-
} from "./chunk-
|
|
9
|
-
import
|
|
5
|
+
} from "./chunk-LYFG7AUT.js";
|
|
6
|
+
import {
|
|
7
|
+
detectFormat
|
|
8
|
+
} from "./chunk-5Y2Q3BRW.js";
|
|
9
|
+
import "./chunk-MOL7MDBG.js";
|
|
10
10
|
|
|
11
11
|
// src/watch.ts
|
|
12
|
-
import { watch, readFileSync, writeFileSync, mkdirSync, statSync, existsSync } from "fs";
|
|
13
|
-
import { basename, resolve, extname } from "path";
|
|
12
|
+
import { watch, readFileSync, writeFileSync, mkdirSync, statSync, existsSync, realpathSync } from "fs";
|
|
13
|
+
import { basename, resolve, extname, sep } from "path";
|
|
14
14
|
var SUPPORTED_EXTENSIONS = /* @__PURE__ */ new Set([".hwp", ".hwpx", ".pdf", ".xlsx", ".docx"]);
|
|
15
15
|
var DEBOUNCE_MS = 1e3;
|
|
16
16
|
var STABLE_CHECK_MS = 300;
|
|
@@ -26,6 +26,9 @@ async function watchDirectory(options) {
|
|
|
26
26
|
if (outDir) log(`[kordoc watch] \uCD9C\uB825: ${resolve(outDir)}`);
|
|
27
27
|
if (webhook) log(`[kordoc watch] \uC6F9\uD6C5: ${webhook}`);
|
|
28
28
|
const pending = /* @__PURE__ */ new Map();
|
|
29
|
+
const MAX_CONCURRENT = 3;
|
|
30
|
+
let activeCount = 0;
|
|
31
|
+
const inProgress = /* @__PURE__ */ new Set();
|
|
29
32
|
const waitForStableSize = async (absPath) => {
|
|
30
33
|
let prevSize = statSync(absPath).size;
|
|
31
34
|
await new Promise((r) => setTimeout(r, STABLE_CHECK_MS));
|
|
@@ -41,12 +44,21 @@ async function watchDirectory(options) {
|
|
|
41
44
|
const processFile = async (filePath) => {
|
|
42
45
|
const ext = extname(filePath).toLowerCase();
|
|
43
46
|
if (!SUPPORTED_EXTENSIONS.has(ext)) return;
|
|
47
|
+
if (inProgress.has(filePath) || activeCount >= MAX_CONCURRENT) return;
|
|
48
|
+
inProgress.add(filePath);
|
|
49
|
+
activeCount++;
|
|
44
50
|
const fileName = basename(filePath);
|
|
45
51
|
try {
|
|
46
|
-
const
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
52
|
+
const rawPath = resolve(dir, filePath);
|
|
53
|
+
if (!existsSync(rawPath)) return;
|
|
54
|
+
let absPath;
|
|
55
|
+
try {
|
|
56
|
+
absPath = realpathSync(rawPath);
|
|
57
|
+
} catch {
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
const realDir = realpathSync(resolve(dir));
|
|
61
|
+
if (!absPath.startsWith(realDir + sep) && absPath !== realDir) return;
|
|
50
62
|
const fileSize = await waitForStableSize(absPath);
|
|
51
63
|
if (fileSize > MAX_FILE_SIZE || fileSize === 0) return;
|
|
52
64
|
log(`[kordoc watch] \uBCC0\uD658 \uC911: ${fileName}`);
|
|
@@ -76,6 +88,9 @@ async function watchDirectory(options) {
|
|
|
76
88
|
});
|
|
77
89
|
} catch (err) {
|
|
78
90
|
log(`[kordoc watch] \uC5D0\uB7EC: ${fileName} \u2014 ${err instanceof Error ? err.message : err}`);
|
|
91
|
+
} finally {
|
|
92
|
+
inProgress.delete(filePath);
|
|
93
|
+
activeCount--;
|
|
79
94
|
}
|
|
80
95
|
};
|
|
81
96
|
watch(dir, { recursive: true }, (event, filename) => {
|
|
@@ -85,7 +100,9 @@ async function watchDirectory(options) {
|
|
|
85
100
|
if (existing) clearTimeout(existing);
|
|
86
101
|
pending.set(filePath, setTimeout(() => {
|
|
87
102
|
pending.delete(filePath);
|
|
88
|
-
processFile(filePath).catch(() => {
|
|
103
|
+
processFile(filePath).catch((err) => {
|
|
104
|
+
process.stderr.write(`[kordoc watch] \uCC98\uB9AC \uC2E4\uD328: ${filePath} \u2014 ${err instanceof Error ? err.message : String(err)}
|
|
105
|
+
`);
|
|
89
106
|
});
|
|
90
107
|
}, DEBOUNCE_MS));
|
|
91
108
|
});
|
|
@@ -105,8 +122,8 @@ function validateWebhookUrl(url) {
|
|
|
105
122
|
const hostname = parsed.hostname.toLowerCase();
|
|
106
123
|
if (hostname === "localhost" || hostname === "[::1]" || hostname.startsWith("127.") || hostname.startsWith("10.") || hostname.startsWith("192.168.") || /^172\.(1[6-9]|2\d|3[01])\./.test(hostname) || hostname === "0.0.0.0" || hostname.startsWith("169.254.") || hostname.endsWith(".local") || // IPv6 사설 대역
|
|
107
124
|
hostname.startsWith("[fc") || hostname.startsWith("[fd") || hostname.startsWith("[fe80:") || hostname === "[::0]" || hostname === "[::]" || // 클라우드 메타데이터 엔드포인트
|
|
108
|
-
hostname === "metadata.google.internal" || hostname === "metadata.google" || // 16진수/8진수 IP 인코딩 우회 방지
|
|
109
|
-
/^0x[0-9a-f]+$/i.test(hostname) || /^0[0-7]+$/.test(hostname)) {
|
|
125
|
+
hostname === "metadata.google.internal" || hostname === "metadata.google" || // 16진수/8진수/10진수 정수 IP 인코딩 우회 방지
|
|
126
|
+
/^0x[0-9a-f]+$/i.test(hostname) || /^0[0-7]+$/.test(hostname) || /^\d+$/.test(hostname)) {
|
|
110
127
|
throw new Error(`\uB0B4\uBD80 \uB124\uD2B8\uC6CC\uD06C \uB300\uC0C1 webhook\uC740 \uD5C8\uC6A9\uB418\uC9C0 \uC54A\uC2B5\uB2C8\uB2E4: ${hostname}`);
|
|
111
128
|
}
|
|
112
129
|
}
|
|
@@ -117,12 +134,15 @@ async function sendWebhook(url, payload) {
|
|
|
117
134
|
await fetch(url, {
|
|
118
135
|
method: "POST",
|
|
119
136
|
headers: { "Content-Type": "application/json" },
|
|
120
|
-
body: JSON.stringify({ ...payload, timestamp: (/* @__PURE__ */ new Date()).toISOString() })
|
|
137
|
+
body: JSON.stringify({ ...payload, timestamp: (/* @__PURE__ */ new Date()).toISOString() }),
|
|
138
|
+
redirect: "error"
|
|
121
139
|
});
|
|
122
|
-
} catch {
|
|
140
|
+
} catch (err) {
|
|
141
|
+
process.stderr.write(`[kordoc watch] webhook \uC804\uC1A1 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}
|
|
142
|
+
`);
|
|
123
143
|
}
|
|
124
144
|
}
|
|
125
145
|
export {
|
|
126
146
|
watchDirectory
|
|
127
147
|
};
|
|
128
|
-
//# sourceMappingURL=watch-
|
|
148
|
+
//# sourceMappingURL=watch-Q5OXA73S.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/watch.ts"],"sourcesContent":["/** 디렉토리 감시 모드 — 새 문서 자동 변환 + Webhook 알림 */\r\n\r\nimport { watch, readFileSync, writeFileSync, mkdirSync, statSync, existsSync, realpathSync } from \"fs\"\r\nimport { basename, resolve, extname, sep } from \"path\"\r\nimport { parse, detectFormat } from \"./index.js\"\r\nimport { toArrayBuffer } from \"./utils.js\"\r\nimport type { WatchOptions } from \"./types.js\"\r\n\r\nconst SUPPORTED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\r\nconst DEBOUNCE_MS = 1000\r\n/** 파일 쓰기 완료 판정: 연속 2회 동일 크기 확인 간격 */\r\nconst STABLE_CHECK_MS = 300\r\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\r\n\r\n/**\r\n * 디렉토리를 감시하여 새 문서 파일을 자동 변환.\r\n *\r\n * @example\r\n * ```bash\r\n * kordoc watch ./incoming -d ./output --webhook https://api.example.com/docs\r\n * ```\r\n */\r\nexport async function watchDirectory(options: WatchOptions): Promise<void> {\r\n const { dir, outDir, webhook, format = \"markdown\", pages, silent } = options\r\n\r\n if (!existsSync(dir)) throw new Error(`디렉토리를 찾을 수 없습니다: ${dir}`)\r\n if (webhook) validateWebhookUrl(webhook)\r\n if (outDir) mkdirSync(outDir, { recursive: true })\r\n\r\n const log = silent ? () => {} : (msg: string) => process.stderr.write(msg + \"\\n\")\r\n log(`[kordoc watch] 감시 시작: ${resolve(dir)}`)\r\n if (outDir) log(`[kordoc watch] 출력: ${resolve(outDir)}`)\r\n if (webhook) log(`[kordoc watch] 웹훅: ${webhook}`)\r\n\r\n // 디바운스 맵\r\n const pending = new Map<string, ReturnType<typeof setTimeout>>()\r\n // 동시 처리 제한 — 메모리 폭주 방지\r\n const MAX_CONCURRENT = 3\r\n let activeCount = 0\r\n const inProgress = new Set<string>()\r\n\r\n /** 파일 크기가 안정화될 때까지 대기 (쓰기 완료 감지) */\r\n const waitForStableSize = async (absPath: string): Promise<number> => {\r\n let prevSize = statSync(absPath).size\r\n await new Promise(r => setTimeout(r, STABLE_CHECK_MS))\r\n if (!existsSync(absPath)) return 0\r\n const currSize = statSync(absPath).size\r\n if (currSize !== prevSize) {\r\n // 크기가 변했으면 한 번 더 대기\r\n await new Promise(r => setTimeout(r, STABLE_CHECK_MS))\r\n if (!existsSync(absPath)) return 0\r\n return statSync(absPath).size\r\n }\r\n return currSize\r\n }\r\n\r\n const processFile = async (filePath: string) => {\r\n const ext = extname(filePath).toLowerCase()\r\n if (!SUPPORTED_EXTENSIONS.has(ext)) return\r\n // 동일 파일 동시 처리 방지 + 동시 처리 수 제한\r\n if (inProgress.has(filePath) || activeCount >= MAX_CONCURRENT) return\r\n inProgress.add(filePath)\r\n activeCount++\r\n\r\n const fileName = basename(filePath)\r\n try {\r\n const rawPath = resolve(dir, filePath)\r\n if (!existsSync(rawPath)) return\r\n // 심볼릭 링크 해석 후 감시 디렉토리 외부 파일 차단\r\n let absPath: string\r\n try { absPath = realpathSync(rawPath) } catch { return }\r\n const realDir = realpathSync(resolve(dir))\r\n if (!absPath.startsWith(realDir + sep) && absPath !== realDir) return\r\n\r\n const fileSize = await waitForStableSize(absPath)\r\n if (fileSize > MAX_FILE_SIZE || fileSize === 0) return\r\n\r\n log(`[kordoc watch] 변환 중: ${fileName}`)\r\n\r\n const buffer = readFileSync(absPath)\r\n const arrayBuffer = toArrayBuffer(buffer)\r\n const parseOptions = pages ? { pages } : undefined\r\n const result = await parse(arrayBuffer, parseOptions)\r\n\r\n if (!result.success) {\r\n log(`[kordoc watch] 실패: ${fileName} — ${result.error}`)\r\n await sendWebhook(webhook, { file: fileName, format: detectFormat(arrayBuffer), success: false, error: result.error })\r\n return\r\n }\r\n\r\n const output = format === \"json\" ? JSON.stringify(result, null, 2) : result.markdown\r\n\r\n if (outDir) {\r\n const outExt = format === \"json\" ? \".json\" : \".md\"\r\n const outPath = resolve(outDir, fileName.replace(/\\.[^.]+$/, outExt))\r\n writeFileSync(outPath, output, \"utf-8\")\r\n log(`[kordoc watch] 완료: ${fileName} → ${basename(outPath)}`)\r\n } else {\r\n process.stdout.write(output + \"\\n\")\r\n }\r\n\r\n await sendWebhook(webhook, {\r\n file: fileName,\r\n format: result.fileType,\r\n success: true,\r\n markdown: format === \"markdown\" ? output.substring(0, 1000) : undefined,\r\n })\r\n } catch (err) {\r\n log(`[kordoc watch] 에러: ${fileName} — ${err instanceof Error ? err.message : err}`)\r\n } finally {\r\n inProgress.delete(filePath)\r\n activeCount--\r\n }\r\n }\r\n\r\n // fs.watch recursive (Node 18+ Windows/macOS, Node 19+ Linux)\r\n watch(dir, { recursive: true }, (event, filename) => {\r\n if (!filename) return\r\n const filePath = filename.toString()\r\n\r\n // 디바운스\r\n const existing = pending.get(filePath)\r\n if (existing) clearTimeout(existing)\r\n pending.set(filePath, setTimeout(() => {\r\n pending.delete(filePath)\r\n processFile(filePath).catch((err) => {\r\n process.stderr.write(`[kordoc watch] 처리 실패: ${filePath} — ${err instanceof Error ? err.message : String(err)}\\n`)\r\n })\r\n }, DEBOUNCE_MS))\r\n })\r\n\r\n // 프로세스 종료 방지 (Ctrl+C로 종료)\r\n return new Promise(() => {})\r\n}\r\n\r\n/** Webhook URL 검증 — SSRF 방지: http/https만 허용, localhost/private IP 차단 */\r\nfunction validateWebhookUrl(url: string): void {\r\n let parsed: URL\r\n try {\r\n parsed = new URL(url)\r\n } catch {\r\n throw new Error(`유효하지 않은 webhook URL: ${url}`)\r\n }\r\n if (parsed.protocol !== \"http:\" && parsed.protocol !== \"https:\") {\r\n throw new Error(`허용되지 않는 webhook 프로토콜: ${parsed.protocol}`)\r\n }\r\n const hostname = parsed.hostname.toLowerCase()\r\n if (\r\n hostname === \"localhost\" ||\r\n hostname === \"[::1]\" ||\r\n hostname.startsWith(\"127.\") ||\r\n hostname.startsWith(\"10.\") ||\r\n hostname.startsWith(\"192.168.\") ||\r\n /^172\\.(1[6-9]|2\\d|3[01])\\./.test(hostname) ||\r\n hostname === \"0.0.0.0\" ||\r\n hostname.startsWith(\"169.254.\") ||\r\n hostname.endsWith(\".local\") ||\r\n // IPv6 사설 대역\r\n hostname.startsWith(\"[fc\") ||\r\n hostname.startsWith(\"[fd\") ||\r\n hostname.startsWith(\"[fe80:\") ||\r\n hostname === \"[::0]\" ||\r\n hostname === \"[::]\" ||\r\n // 클라우드 메타데이터 엔드포인트\r\n hostname === \"metadata.google.internal\" ||\r\n hostname === \"metadata.google\" ||\r\n // 16진수/8진수/10진수 정수 IP 인코딩 우회 방지\r\n /^0x[0-9a-f]+$/i.test(hostname) ||\r\n /^0[0-7]+$/.test(hostname) ||\r\n /^\\d+$/.test(hostname)\r\n ) {\r\n throw new Error(`내부 네트워크 대상 webhook은 허용되지 않습니다: ${hostname}`)\r\n }\r\n}\r\n\r\nasync function sendWebhook(url: string | undefined, payload: Record<string, unknown>): Promise<void> {\r\n if (!url) return\r\n try {\r\n validateWebhookUrl(url)\r\n await fetch(url, {\r\n method: \"POST\",\r\n headers: { \"Content-Type\": \"application/json\" },\r\n body: JSON.stringify({ ...payload, timestamp: new Date().toISOString() }),\r\n redirect: \"error\",\r\n })\r\n } catch (err) {\r\n process.stderr.write(`[kordoc watch] webhook 전송 실패: ${err instanceof Error ? err.message : String(err)}\\n`)\r\n }\r\n}\r\n"],"mappings":";;;;;;;;;;;AAEA,SAAS,OAAO,cAAc,eAAe,WAAW,UAAU,YAAY,oBAAoB;AAClG,SAAS,UAAU,SAAS,SAAS,WAAW;AAKhD,IAAM,uBAAuB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAChF,IAAM,cAAc;AAEpB,IAAM,kBAAkB;AACxB,IAAM,gBAAgB,MAAM,OAAO;AAUnC,eAAsB,eAAe,SAAsC;AACzE,QAAM,EAAE,KAAK,QAAQ,SAAS,SAAS,YAAY,OAAO,OAAO,IAAI;AAErE,MAAI,CAAC,WAAW,GAAG,EAAG,OAAM,IAAI,MAAM,gFAAoB,GAAG,EAAE;AAC/D,MAAI,QAAS,oBAAmB,OAAO;AACvC,MAAI,OAAQ,WAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AAEjD,QAAM,MAAM,SAAS,MAAM;AAAA,EAAC,IAAI,CAAC,QAAgB,QAAQ,OAAO,MAAM,MAAM,IAAI;AAChF,MAAI,6CAAyB,QAAQ,GAAG,CAAC,EAAE;AAC3C,MAAI,OAAQ,KAAI,gCAAsB,QAAQ,MAAM,CAAC,EAAE;AACvD,MAAI,QAAS,KAAI,gCAAsB,OAAO,EAAE;AAGhD,QAAM,UAAU,oBAAI,IAA2C;AAE/D,QAAM,iBAAiB;AACvB,MAAI,cAAc;AAClB,QAAM,aAAa,oBAAI,IAAY;AAGnC,QAAM,oBAAoB,OAAO,YAAqC;AACpE,QAAI,WAAW,SAAS,OAAO,EAAE;AACjC,UAAM,IAAI,QAAQ,OAAK,WAAW,GAAG,eAAe,CAAC;AACrD,QAAI,CAAC,WAAW,OAAO,EAAG,QAAO;AACjC,UAAM,WAAW,SAAS,OAAO,EAAE;AACnC,QAAI,aAAa,UAAU;AAEzB,YAAM,IAAI,QAAQ,OAAK,WAAW,GAAG,eAAe,CAAC;AACrD,UAAI,CAAC,WAAW,OAAO,EAAG,QAAO;AACjC,aAAO,SAAS,OAAO,EAAE;AAAA,IAC3B;AACA,WAAO;AAAA,EACT;AAEA,QAAM,cAAc,OAAO,aAAqB;AAC9C,UAAM,MAAM,QAAQ,QAAQ,EAAE,YAAY;AAC1C,QAAI,CAAC,qBAAqB,IAAI,GAAG,EAAG;AAEpC,QAAI,WAAW,IAAI,QAAQ,KAAK,eAAe,eAAgB;AAC/D,eAAW,IAAI,QAAQ;AACvB;AAEA,UAAM,WAAW,SAAS,QAAQ;AAClC,QAAI;AACF,YAAM,UAAU,QAAQ,KAAK,QAAQ;AACrC,UAAI,CAAC,WAAW,OAAO,EAAG;AAE1B,UAAI;AACJ,UAAI;AAAE,kBAAU,aAAa,OAAO;AAAA,MAAE,QAAQ;AAAE;AAAA,MAAO;AACvD,YAAM,UAAU,aAAa,QAAQ,GAAG,CAAC;AACzC,UAAI,CAAC,QAAQ,WAAW,UAAU,GAAG,KAAK,YAAY,QAAS;AAE/D,YAAM,WAAW,MAAM,kBAAkB,OAAO;AAChD,UAAI,WAAW,iBAAiB,aAAa,EAAG;AAEhD,UAAI,uCAAwB,QAAQ,EAAE;AAEtC,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,eAAe,QAAQ,EAAE,MAAM,IAAI;AACzC,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,YAAI,gCAAsB,QAAQ,WAAM,OAAO,KAAK,EAAE;AACtD,cAAM,YAAY,SAAS,EAAE,MAAM,UAAU,QAAQ,aAAa,WAAW,GAAG,SAAS,OAAO,OAAO,OAAO,MAAM,CAAC;AACrH;AAAA,MACF;AAEA,YAAM,SAAS,WAAW,SAAS,KAAK,UAAU,QAAQ,MAAM,CAAC,IAAI,OAAO;AAE5E,UAAI,QAAQ;AACV,cAAM,SAAS,WAAW,SAAS,UAAU;AAC7C,cAAM,UAAU,QAAQ,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACpE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,gCAAsB,QAAQ,WAAM,SAAS,OAAO,CAAC,EAAE;AAAA,MAC7D,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAEA,YAAM,YAAY,SAAS;AAAA,QACzB,MAAM;AAAA,QACN,QAAQ,OAAO;AAAA,QACf,SAAS;AAAA,QACT,UAAU,WAAW,aAAa,OAAO,UAAU,GAAG,GAAI,IAAI;AAAA,MAChE,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,UAAI,gCAAsB,QAAQ,WAAM,eAAe,QAAQ,IAAI,UAAU,GAAG,EAAE;AAAA,IACpF,UAAE;AACA,iBAAW,OAAO,QAAQ;AAC1B;AAAA,IACF;AAAA,EACF;AAGA,QAAM,KAAK,EAAE,WAAW,KAAK,GAAG,CAAC,OAAO,aAAa;AACnD,QAAI,CAAC,SAAU;AACf,UAAM,WAAW,SAAS,SAAS;AAGnC,UAAM,WAAW,QAAQ,IAAI,QAAQ;AACrC,QAAI,SAAU,cAAa,QAAQ;AACnC,YAAQ,IAAI,UAAU,WAAW,MAAM;AACrC,cAAQ,OAAO,QAAQ;AACvB,kBAAY,QAAQ,EAAE,MAAM,CAAC,QAAQ;AACnC,gBAAQ,OAAO,MAAM,6CAAyB,QAAQ,WAAM,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,CAAI;AAAA,MAClH,CAAC;AAAA,IACH,GAAG,WAAW,CAAC;AAAA,EACjB,CAAC;AAGD,SAAO,IAAI,QAAQ,MAAM;AAAA,EAAC,CAAC;AAC7B;AAGA,SAAS,mBAAmB,KAAmB;AAC7C,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;AAAA,EACtB,QAAQ;AACN,UAAM,IAAI,MAAM,sDAAwB,GAAG,EAAE;AAAA,EAC/C;AACA,MAAI,OAAO,aAAa,WAAW,OAAO,aAAa,UAAU;AAC/D,UAAM,IAAI,MAAM,2EAAyB,OAAO,QAAQ,EAAE;AAAA,EAC5D;AACA,QAAM,WAAW,OAAO,SAAS,YAAY;AAC7C,MACE,aAAa,eACb,aAAa,WACb,SAAS,WAAW,MAAM,KAC1B,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,UAAU,KAC9B,6BAA6B,KAAK,QAAQ,KAC1C,aAAa,aACb,SAAS,WAAW,UAAU,KAC9B,SAAS,SAAS,QAAQ;AAAA,EAE1B,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,QAAQ,KAC5B,aAAa,WACb,aAAa;AAAA,EAEb,aAAa,8BACb,aAAa;AAAA,EAEb,iBAAiB,KAAK,QAAQ,KAC9B,YAAY,KAAK,QAAQ,KACzB,QAAQ,KAAK,QAAQ,GACrB;AACA,UAAM,IAAI,MAAM,uHAAkC,QAAQ,EAAE;AAAA,EAC9D;AACF;AAEA,eAAe,YAAY,KAAyB,SAAiD;AACnG,MAAI,CAAC,IAAK;AACV,MAAI;AACF,uBAAmB,GAAG;AACtB,UAAM,MAAM,KAAK;AAAA,MACf,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,EAAE,GAAG,SAAS,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE,CAAC;AAAA,MACxE,UAAU;AAAA,IACZ,CAAC;AAAA,EACH,SAAS,KAAK;AACZ,YAAQ,OAAO,MAAM,qDAAiC,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,CAAI;AAAA,EAC5G;AACF;","names":[]}
|
package/package.json
CHANGED
package/dist/chunk-25TXW6EP.js
DELETED
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
// src/utils.ts
|
|
4
|
-
var VERSION = true ? "2.0.3" : "0.0.0-dev";
|
|
5
|
-
function toArrayBuffer(buf) {
|
|
6
|
-
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
7
|
-
return buf.buffer;
|
|
8
|
-
}
|
|
9
|
-
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
10
|
-
}
|
|
11
|
-
var KordocError = class extends Error {
|
|
12
|
-
constructor(message) {
|
|
13
|
-
super(message);
|
|
14
|
-
this.name = "KordocError";
|
|
15
|
-
}
|
|
16
|
-
};
|
|
17
|
-
function sanitizeError(err) {
|
|
18
|
-
if (err instanceof KordocError) return err.message;
|
|
19
|
-
return "\uBB38\uC11C \uCC98\uB9AC \uC911 \uC624\uB958\uAC00 \uBC1C\uC0DD\uD588\uC2B5\uB2C8\uB2E4";
|
|
20
|
-
}
|
|
21
|
-
function isPathTraversal(name) {
|
|
22
|
-
if (name.includes("\0")) return true;
|
|
23
|
-
const normalized = name.replace(/\\/g, "/");
|
|
24
|
-
return normalized.includes("..") || normalized.startsWith("/") || /^[A-Za-z]:/.test(normalized);
|
|
25
|
-
}
|
|
26
|
-
function precheckZipSize(buffer, maxUncompressedSize = 100 * 1024 * 1024, maxEntries = 500) {
|
|
27
|
-
try {
|
|
28
|
-
const data = new DataView(buffer);
|
|
29
|
-
const len = buffer.byteLength;
|
|
30
|
-
let eocdOffset = -1;
|
|
31
|
-
for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {
|
|
32
|
-
if (data.getUint32(i, true) === 101010256) {
|
|
33
|
-
eocdOffset = i;
|
|
34
|
-
break;
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 };
|
|
38
|
-
const entryCount = data.getUint16(eocdOffset + 10, true);
|
|
39
|
-
if (entryCount > maxEntries) {
|
|
40
|
-
throw new KordocError(`ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC: ${entryCount} (\uCD5C\uB300 ${maxEntries})`);
|
|
41
|
-
}
|
|
42
|
-
const cdSize = data.getUint32(eocdOffset + 12, true);
|
|
43
|
-
const cdOffset = data.getUint32(eocdOffset + 16, true);
|
|
44
|
-
if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount };
|
|
45
|
-
let totalUncompressed = 0;
|
|
46
|
-
let pos = cdOffset;
|
|
47
|
-
for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {
|
|
48
|
-
if (data.getUint32(pos, true) !== 33639248) break;
|
|
49
|
-
totalUncompressed += data.getUint32(pos + 24, true);
|
|
50
|
-
const nameLen = data.getUint16(pos + 28, true);
|
|
51
|
-
const extraLen = data.getUint16(pos + 30, true);
|
|
52
|
-
const commentLen = data.getUint16(pos + 32, true);
|
|
53
|
-
pos += 46 + nameLen + extraLen + commentLen;
|
|
54
|
-
}
|
|
55
|
-
if (totalUncompressed > maxUncompressedSize) {
|
|
56
|
-
throw new KordocError(`ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 ${maxUncompressedSize / 1024 / 1024}MB)`);
|
|
57
|
-
}
|
|
58
|
-
return { totalUncompressed, entryCount };
|
|
59
|
-
} catch (err) {
|
|
60
|
-
if (err instanceof KordocError) throw err;
|
|
61
|
-
return { totalUncompressed: 0, entryCount: 0 };
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
var SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i;
|
|
65
|
-
function sanitizeHref(href) {
|
|
66
|
-
const trimmed = href.trim();
|
|
67
|
-
if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null;
|
|
68
|
-
return trimmed;
|
|
69
|
-
}
|
|
70
|
-
function classifyError(err) {
|
|
71
|
-
if (!(err instanceof Error)) return "PARSE_ERROR";
|
|
72
|
-
const msg = err.message;
|
|
73
|
-
if (msg.includes("\uC554\uD638\uD654")) return "ENCRYPTED";
|
|
74
|
-
if (msg.includes("DRM")) return "DRM_PROTECTED";
|
|
75
|
-
if (msg.includes("ZIP bomb") || msg.includes("ZIP \uBE44\uC555\uCD95 \uD06C\uAE30 \uCD08\uACFC") || msg.includes("ZIP \uC5D4\uD2B8\uB9AC \uC218 \uCD08\uACFC")) return "ZIP_BOMB";
|
|
76
|
-
if (msg.includes("bomb") || msg.includes("\uD06C\uAE30 \uCD08\uACFC") || msg.includes("\uC555\uCD95 \uD574\uC81C")) return "DECOMPRESSION_BOMB";
|
|
77
|
-
if (msg.includes("\uC774\uBBF8\uC9C0 \uAE30\uBC18")) return "IMAGE_BASED_PDF";
|
|
78
|
-
if (msg.includes("\uC139\uC158") && (msg.includes("\uCC3E\uC744 \uC218 \uC5C6") || msg.includes("\uC5C6\uC74C"))) return "NO_SECTIONS";
|
|
79
|
-
if (msg.includes("\uC2DC\uADF8\uB2C8\uCC98") || msg.includes("\uBCF5\uAD6C\uD560 \uC218 \uC5C6")) return "CORRUPTED";
|
|
80
|
-
return "PARSE_ERROR";
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
export {
|
|
84
|
-
VERSION,
|
|
85
|
-
toArrayBuffer,
|
|
86
|
-
KordocError,
|
|
87
|
-
sanitizeError,
|
|
88
|
-
isPathTraversal,
|
|
89
|
-
precheckZipSize,
|
|
90
|
-
sanitizeHref,
|
|
91
|
-
classifyError
|
|
92
|
-
};
|
|
93
|
-
//# sourceMappingURL=chunk-25TXW6EP.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/utils.ts"],"sourcesContent":["/** kordoc 공용 유틸리티 */\r\n\r\n/** 빌드 타임에 tsup define으로 주입되는 버전 */\r\ndeclare const __KORDOC_VERSION__: string\r\nexport const VERSION: string = typeof __KORDOC_VERSION__ !== \"undefined\" ? __KORDOC_VERSION__ : \"0.0.0-dev\"\r\n\r\n/**\r\n * Node.js Buffer → ArrayBuffer 변환\r\n * pool Buffer의 공유 ArrayBuffer 문제를 안전하게 처리.\r\n * offset=0이고 전체 ArrayBuffer를 차지하면 복사 없이 직접 반환.\r\n */\r\nexport function toArrayBuffer(buf: Buffer): ArrayBuffer {\r\n if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {\r\n return buf.buffer as ArrayBuffer\r\n }\r\n return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength) as ArrayBuffer\r\n}\r\n\r\n/**\r\n * kordoc 내부 에러 클래스 — 사용자에게 노출해도 안전한 메시지만 포함.\r\n * MCP 에러 정제에서 instanceof로 판별하여 allowlist 패턴 매칭 없이 안전하게 통과.\r\n */\r\nexport class KordocError extends Error {\r\n constructor(message: string) {\r\n super(message)\r\n this.name = \"KordocError\"\r\n }\r\n}\r\n\r\n/**\r\n * 에러 메시지 정제 — KordocError는 그대로, 나머지는 일반 메시지로 대체.\r\n * 파일시스템 경로, 스택 트레이스 등 내부 정보 노출 방지.\r\n */\r\nexport function sanitizeError(err: unknown): string {\r\n if (err instanceof KordocError) return err.message\r\n return \"문서 처리 중 오류가 발생했습니다\"\r\n}\r\n\r\n/**\r\n * ZIP 엔트리 경로의 경로 순회 여부 판별.\r\n * 백슬래시 정규화, .., 절대경로, Windows 드라이브 문자 모두 차단.\r\n */\r\nexport function isPathTraversal(name: string): boolean {\r\n if (name.includes(\"\\x00\")) return true\r\n const normalized = name.replace(/\\\\/g, \"/\")\r\n return normalized.includes(\"..\") || normalized.startsWith(\"/\") || /^[A-Za-z]:/.test(normalized)\r\n}\r\n\r\n// ─── ZIP 안전 로딩 (ZIP bomb 방지) ────────────────────\r\n\r\n/**\r\n * ZIP bomb 사전 검사 — Central Directory에서 비압축 합계와 엔트리 수 확인.\r\n * HWPX/XLSX/DOCX 등 모든 ZIP 기반 포맷에서 공통 사용.\r\n */\r\nexport function precheckZipSize(\r\n buffer: ArrayBuffer,\r\n maxUncompressedSize = 100 * 1024 * 1024,\r\n maxEntries = 500,\r\n): { totalUncompressed: number; entryCount: number } {\r\n try {\r\n const data = new DataView(buffer)\r\n const len = buffer.byteLength\r\n // EOCD 시그니처 역방향 스캔\r\n let eocdOffset = -1\r\n for (let i = len - 22; i >= Math.max(0, len - 65557); i--) {\r\n if (data.getUint32(i, true) === 0x06054b50) { eocdOffset = i; break }\r\n }\r\n if (eocdOffset < 0) return { totalUncompressed: 0, entryCount: 0 }\r\n\r\n const entryCount = data.getUint16(eocdOffset + 10, true)\r\n if (entryCount > maxEntries) {\r\n throw new KordocError(`ZIP 엔트리 수 초과: ${entryCount} (최대 ${maxEntries})`)\r\n }\r\n\r\n const cdSize = data.getUint32(eocdOffset + 12, true)\r\n const cdOffset = data.getUint32(eocdOffset + 16, true)\r\n if (cdOffset + cdSize > len) return { totalUncompressed: 0, entryCount }\r\n\r\n let totalUncompressed = 0\r\n let pos = cdOffset\r\n for (let i = 0; i < entryCount && pos + 46 <= cdOffset + cdSize; i++) {\r\n if (data.getUint32(pos, true) !== 0x02014b50) break\r\n totalUncompressed += data.getUint32(pos + 24, true)\r\n const nameLen = data.getUint16(pos + 28, true)\r\n const extraLen = data.getUint16(pos + 30, true)\r\n const commentLen = data.getUint16(pos + 32, true)\r\n pos += 46 + nameLen + extraLen + commentLen\r\n }\r\n\r\n if (totalUncompressed > maxUncompressedSize) {\r\n throw new KordocError(`ZIP 비압축 크기 초과: ${(totalUncompressed / 1024 / 1024).toFixed(1)}MB (최대 ${maxUncompressedSize / 1024 / 1024}MB)`)\r\n }\r\n\r\n return { totalUncompressed, entryCount }\r\n } catch (err) {\r\n if (err instanceof KordocError) throw err\r\n return { totalUncompressed: 0, entryCount: 0 }\r\n }\r\n}\r\n\r\n/** 하이퍼링크 URL 살균 — javascript: 등 XSS 위험 스킴 차단 */\r\nconst SAFE_HREF_RE = /^(?:https?:|mailto:|tel:|#)/i\r\nexport function sanitizeHref(href: string): string | null {\r\n const trimmed = href.trim()\r\n if (!trimmed || !SAFE_HREF_RE.test(trimmed)) return null\r\n return trimmed\r\n}\r\n\r\n// ─── 에러 분류 ──────────────────────────────────────\r\n\r\nimport type { ErrorCode } from \"./types.js\"\r\n\r\n/** 에러를 구조화된 ErrorCode로 분류 — KordocError 메시지 패턴 매칭 */\r\nexport function classifyError(err: unknown): ErrorCode {\r\n if (!(err instanceof Error)) return \"PARSE_ERROR\"\r\n const msg = err.message\r\n if (msg.includes(\"암호화\")) return \"ENCRYPTED\"\r\n if (msg.includes(\"DRM\")) return \"DRM_PROTECTED\"\r\n if (msg.includes(\"ZIP bomb\") || msg.includes(\"ZIP 비압축 크기 초과\") || msg.includes(\"ZIP 엔트리 수 초과\")) return \"ZIP_BOMB\"\r\n if (msg.includes(\"bomb\") || msg.includes(\"크기 초과\") || msg.includes(\"압축 해제\")) return \"DECOMPRESSION_BOMB\"\r\n if (msg.includes(\"이미지 기반\")) return \"IMAGE_BASED_PDF\"\r\n if (msg.includes(\"섹션\") && (msg.includes(\"찾을 수 없\") || msg.includes(\"없음\"))) return \"NO_SECTIONS\"\r\n if (msg.includes(\"시그니처\") || msg.includes(\"복구할 수 없\")) return \"CORRUPTED\"\r\n return \"PARSE_ERROR\"\r\n}\r\n"],"mappings":";;;AAIO,IAAM,UAAkB,OAA4C,UAAqB;AAOzF,SAAS,cAAc,KAA0B;AACtD,MAAI,IAAI,eAAe,KAAK,IAAI,eAAe,IAAI,OAAO,YAAY;AACpE,WAAO,IAAI;AAAA,EACb;AACA,SAAO,IAAI,OAAO,MAAM,IAAI,YAAY,IAAI,aAAa,IAAI,UAAU;AACzE;AAMO,IAAM,cAAN,cAA0B,MAAM;AAAA,EACrC,YAAY,SAAiB;AAC3B,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAMO,SAAS,cAAc,KAAsB;AAClD,MAAI,eAAe,YAAa,QAAO,IAAI;AAC3C,SAAO;AACT;AAMO,SAAS,gBAAgB,MAAuB;AACrD,MAAI,KAAK,SAAS,IAAM,EAAG,QAAO;AAClC,QAAM,aAAa,KAAK,QAAQ,OAAO,GAAG;AAC1C,SAAO,WAAW,SAAS,IAAI,KAAK,WAAW,WAAW,GAAG,KAAK,aAAa,KAAK,UAAU;AAChG;AAQO,SAAS,gBACd,QACA,sBAAsB,MAAM,OAAO,MACnC,aAAa,KACsC;AACnD,MAAI;AACF,UAAM,OAAO,IAAI,SAAS,MAAM;AAChC,UAAM,MAAM,OAAO;AAEnB,QAAI,aAAa;AACjB,aAAS,IAAI,MAAM,IAAI,KAAK,KAAK,IAAI,GAAG,MAAM,KAAK,GAAG,KAAK;AACzD,UAAI,KAAK,UAAU,GAAG,IAAI,MAAM,WAAY;AAAE,qBAAa;AAAG;AAAA,MAAM;AAAA,IACtE;AACA,QAAI,aAAa,EAAG,QAAO,EAAE,mBAAmB,GAAG,YAAY,EAAE;AAEjE,UAAM,aAAa,KAAK,UAAU,aAAa,IAAI,IAAI;AACvD,QAAI,aAAa,YAAY;AAC3B,YAAM,IAAI,YAAY,+CAAiB,UAAU,kBAAQ,UAAU,GAAG;AAAA,IACxE;AAEA,UAAM,SAAS,KAAK,UAAU,aAAa,IAAI,IAAI;AACnD,UAAM,WAAW,KAAK,UAAU,aAAa,IAAI,IAAI;AACrD,QAAI,WAAW,SAAS,IAAK,QAAO,EAAE,mBAAmB,GAAG,WAAW;AAEvE,QAAI,oBAAoB;AACxB,QAAI,MAAM;AACV,aAAS,IAAI,GAAG,IAAI,cAAc,MAAM,MAAM,WAAW,QAAQ,KAAK;AACpE,UAAI,KAAK,UAAU,KAAK,IAAI,MAAM,SAAY;AAC9C,2BAAqB,KAAK,UAAU,MAAM,IAAI,IAAI;AAClD,YAAM,UAAU,KAAK,UAAU,MAAM,IAAI,IAAI;AAC7C,YAAM,WAAW,KAAK,UAAU,MAAM,IAAI,IAAI;AAC9C,YAAM,aAAa,KAAK,UAAU,MAAM,IAAI,IAAI;AAChD,aAAO,KAAK,UAAU,WAAW;AAAA,IACnC;AAEA,QAAI,oBAAoB,qBAAqB;AAC3C,YAAM,IAAI,YAAY,sDAAmB,oBAAoB,OAAO,MAAM,QAAQ,CAAC,CAAC,oBAAU,sBAAsB,OAAO,IAAI,KAAK;AAAA,IACtI;AAEA,WAAO,EAAE,mBAAmB,WAAW;AAAA,EACzC,SAAS,KAAK;AACZ,QAAI,eAAe,YAAa,OAAM;AACtC,WAAO,EAAE,mBAAmB,GAAG,YAAY,EAAE;AAAA,EAC/C;AACF;AAGA,IAAM,eAAe;AACd,SAAS,aAAa,MAA6B;AACxD,QAAM,UAAU,KAAK,KAAK;AAC1B,MAAI,CAAC,WAAW,CAAC,aAAa,KAAK,OAAO,EAAG,QAAO;AACpD,SAAO;AACT;AAOO,SAAS,cAAc,KAAyB;AACrD,MAAI,EAAE,eAAe,OAAQ,QAAO;AACpC,QAAM,MAAM,IAAI;AAChB,MAAI,IAAI,SAAS,oBAAK,EAAG,QAAO;AAChC,MAAI,IAAI,SAAS,KAAK,EAAG,QAAO;AAChC,MAAI,IAAI,SAAS,UAAU,KAAK,IAAI,SAAS,kDAAe,KAAK,IAAI,SAAS,4CAAc,EAAG,QAAO;AACtG,MAAI,IAAI,SAAS,MAAM,KAAK,IAAI,SAAS,2BAAO,KAAK,IAAI,SAAS,2BAAO,EAAG,QAAO;AACnF,MAAI,IAAI,SAAS,iCAAQ,EAAG,QAAO;AACnC,MAAI,IAAI,SAAS,cAAI,MAAM,IAAI,SAAS,4BAAQ,KAAK,IAAI,SAAS,cAAI,GAAI,QAAO;AACjF,MAAI,IAAI,SAAS,0BAAM,KAAK,IAAI,SAAS,kCAAS,EAAG,QAAO;AAC5D,SAAO;AACT;","names":[]}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/page-range.ts"],"sourcesContent":["/** 페이지/섹션 범위 파싱 유틸리티 */\r\n\r\n/**\r\n * 페이지 범위 지정을 1-based Set<number>로 변환.\r\n *\r\n * @param spec - [1,2,3] 또는 \"1-3\" 또는 \"1,3,5-7\"\r\n * @param maxPages - 최대 페이지 수 (클램핑 상한)\r\n * @returns 1-based 페이지 번호 Set\r\n */\r\nexport function parsePageRange(spec: number[] | string, maxPages: number): Set<number> {\r\n const result = new Set<number>()\r\n if (maxPages <= 0) return result\r\n\r\n if (Array.isArray(spec)) {\r\n for (const n of spec) {\r\n const page = Math.round(n)\r\n if (page >= 1 && page <= maxPages) result.add(page)\r\n }\r\n return result\r\n }\r\n\r\n if (typeof spec !== \"string\" || spec.trim() === \"\") return result\r\n\r\n const parts = spec.split(\",\")\r\n for (const part of parts) {\r\n const trimmed = part.trim()\r\n if (!trimmed) continue\r\n\r\n const rangeMatch = trimmed.match(/^(\\d+)\\s*-\\s*(\\d+)$/)\r\n if (rangeMatch) {\r\n const start = Math.max(1, parseInt(rangeMatch[1], 10))\r\n const end = Math.min(maxPages, parseInt(rangeMatch[2], 10))\r\n for (let i = start; i <= end; i++) result.add(i)\r\n } else {\r\n const page = parseInt(trimmed, 10)\r\n if (!isNaN(page) && page >= 1 && page <= maxPages) result.add(page)\r\n }\r\n }\r\n\r\n return result\r\n}\r\n"],"mappings":";;;AASO,SAAS,eAAe,MAAyB,UAA+B;AACrF,QAAM,SAAS,oBAAI,IAAY;AAC/B,MAAI,YAAY,EAAG,QAAO;AAE1B,MAAI,MAAM,QAAQ,IAAI,GAAG;AACvB,eAAW,KAAK,MAAM;AACpB,YAAM,OAAO,KAAK,MAAM,CAAC;AACzB,UAAI,QAAQ,KAAK,QAAQ,SAAU,QAAO,IAAI,IAAI;AAAA,IACpD;AACA,WAAO;AAAA,EACT;AAEA,MAAI,OAAO,SAAS,YAAY,KAAK,KAAK,MAAM,GAAI,QAAO;AAE3D,QAAM,QAAQ,KAAK,MAAM,GAAG;AAC5B,aAAW,QAAQ,OAAO;AACxB,UAAM,UAAU,KAAK,KAAK;AAC1B,QAAI,CAAC,QAAS;AAEd,UAAM,aAAa,QAAQ,MAAM,qBAAqB;AACtD,QAAI,YAAY;AACd,YAAM,QAAQ,KAAK,IAAI,GAAG,SAAS,WAAW,CAAC,GAAG,EAAE,CAAC;AACrD,YAAM,MAAM,KAAK,IAAI,UAAU,SAAS,WAAW,CAAC,GAAG,EAAE,CAAC;AAC1D,eAAS,IAAI,OAAO,KAAK,KAAK,IAAK,QAAO,IAAI,CAAC;AAAA,IACjD,OAAO;AACL,YAAM,OAAO,SAAS,SAAS,EAAE;AACjC,UAAI,CAAC,MAAM,IAAI,KAAK,QAAQ,KAAK,QAAQ,SAAU,QAAO,IAAI,IAAI;AAAA,IACpE;AAAA,EACF;AAEA,SAAO;AACT;","names":[]}
|