@clazic/kordoc 2.4.18 → 2.4.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{auto-detect-2YGFYQCN.js → auto-detect-CBYICI6B.js} +4 -4
- package/dist/{chunk-T7EBS5XP.js → chunk-463YQ2WL.js} +8 -18
- package/dist/chunk-463YQ2WL.js.map +1 -0
- package/dist/{chunk-7NOZFYH6.js → chunk-CLK4PNZ7.js} +7 -8
- package/dist/chunk-CLK4PNZ7.js.map +1 -0
- package/dist/{chunk-W2KDIKDF.js → chunk-MZN7PLTZ.js} +2 -2
- package/dist/{chunk-34WIGIQC.js → chunk-Y4WFKJ5P.js} +1 -1
- package/dist/chunk-Y4WFKJ5P.js.map +1 -0
- package/dist/cli.js +9 -13
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +16 -105
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +5 -6
- package/dist/index.d.ts +5 -6
- package/dist/index.js +16 -104
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +5 -6
- package/dist/mcp.js.map +1 -1
- package/dist/{resolve-673XFZQ6.js → resolve-XWYJYKKH.js} +15 -36
- package/dist/resolve-XWYJYKKH.js.map +1 -0
- package/dist/{utils-DHOODYKU.js → utils-YUAT7LFD.js} +2 -2
- package/dist/{watch-YGIU7RN7.js → watch-WEOFVVDO.js} +5 -6
- package/dist/{watch-YGIU7RN7.js.map → watch-WEOFVVDO.js.map} +1 -1
- package/package.json +1 -2
- package/dist/chunk-34WIGIQC.js.map +0 -1
- package/dist/chunk-7FMKAV4P.js +0 -56
- package/dist/chunk-7FMKAV4P.js.map +0 -1
- package/dist/chunk-7NOZFYH6.js.map +0 -1
- package/dist/chunk-T7EBS5XP.js.map +0 -1
- package/dist/resolve-673XFZQ6.js.map +0 -1
- package/dist/tesseract-provider-MNMZPSGF.js +0 -11
- package/dist/utils-DHOODYKU.js.map +0 -1
- /package/dist/{auto-detect-2YGFYQCN.js.map → auto-detect-CBYICI6B.js.map} +0 -0
- /package/dist/{chunk-W2KDIKDF.js.map → chunk-MZN7PLTZ.js.map} +0 -0
- /package/dist/{tesseract-provider-MNMZPSGF.js.map → utils-YUAT7LFD.js.map} +0 -0
|
@@ -7,7 +7,7 @@ function detectAvailableOcr() {
|
|
|
7
7
|
for (const cli of CLI_PRIORITY) {
|
|
8
8
|
if (isCliInstalled(cli)) return cli;
|
|
9
9
|
}
|
|
10
|
-
return
|
|
10
|
+
return null;
|
|
11
11
|
}
|
|
12
12
|
function isCliInstalled(name) {
|
|
13
13
|
try {
|
|
@@ -23,11 +23,10 @@ function getAutoFallbackChain() {
|
|
|
23
23
|
for (const cli of CLI_PRIORITY) {
|
|
24
24
|
if (isCliInstalled(cli)) chain.push(cli);
|
|
25
25
|
}
|
|
26
|
-
chain.push("tesseract");
|
|
27
26
|
return chain;
|
|
28
27
|
}
|
|
29
28
|
function validateOcrMode(mode) {
|
|
30
|
-
if (mode === "auto" || mode === "off"
|
|
29
|
+
if (mode === "auto" || mode === "off") return;
|
|
31
30
|
if (!isCliInstalled(mode)) {
|
|
32
31
|
throw new Error(`'${mode}' CLI\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4.
|
|
33
32
|
${getInstallGuide(mode)}`);
|
|
@@ -42,10 +41,10 @@ function getInstallGuide(mode) {
|
|
|
42
41
|
};
|
|
43
42
|
return guides[mode] || `'${mode}'\uC744(\uB97C) \uC124\uCE58\uD574\uC8FC\uC138\uC694.`;
|
|
44
43
|
}
|
|
45
|
-
function
|
|
44
|
+
function getNoCliMessage() {
|
|
46
45
|
return [
|
|
47
|
-
"\uC124\uCE58\uB41C AI CLI\uAC00 \uC5C6\uC5B4
|
|
48
|
-
"\
|
|
46
|
+
"\uC124\uCE58\uB41C AI CLI\uAC00 \uC5C6\uC5B4 OCR\uC744 \uC218\uD589\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.",
|
|
47
|
+
"\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF \uCC98\uB9AC\uB97C \uC704\uD574 AI CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4:",
|
|
49
48
|
"",
|
|
50
49
|
" [\uAD8C\uC7A5] Codex CLI: npm install -g @openai/codex",
|
|
51
50
|
" Gemini CLI: https://ai.google.dev/gemini-api/docs/cli",
|
|
@@ -58,6 +57,6 @@ export {
|
|
|
58
57
|
detectAvailableOcr,
|
|
59
58
|
getAutoFallbackChain,
|
|
60
59
|
validateOcrMode,
|
|
61
|
-
|
|
60
|
+
getNoCliMessage
|
|
62
61
|
};
|
|
63
|
-
//# sourceMappingURL=chunk-
|
|
62
|
+
//# sourceMappingURL=chunk-CLK4PNZ7.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/ocr/auto-detect.ts"],"sourcesContent":["/**\n * OCR CLI 자동 탐색\n *\n * 탐색 순서: codex → gemini → claude → ollama\n * CLI는 which(unix) / where(win) 명령어로 PATH 존재 확인.\n */\n\nimport { execSync } from \"child_process\"\nimport type { OcrMode } from \"../types.js\"\n\n/** CLI 탐색 우선순위 */\nconst CLI_PRIORITY = [\"codex\", \"gemini\", \"claude\", \"ollama\"] as const\n\n/**\n * 시스템에 설치된 OCR 도구를 우선순위대로 탐색.\n * @returns 사용 가능한 OcrMode, 없으면 null\n */\nexport function detectAvailableOcr(): OcrMode | null {\n for (const cli of CLI_PRIORITY) {\n if (isCliInstalled(cli)) return cli\n }\n return null\n}\n\n/**\n * 특정 CLI가 시스템 PATH에 있는지 확인.\n * which(unix) 또는 where(win32) 사용.\n */\nfunction isCliInstalled(name: string): boolean {\n try {\n const cmd = process.platform === \"win32\" ? \"where\" : \"which\"\n execSync(`${cmd} ${name}`, { stdio: \"ignore\", timeout: 3000 })\n return true\n } catch {\n return false\n }\n}\n\n/**\n * auto 모드에서 시도할 fallback 체인 반환.\n * 설치된 CLI만 포함.\n */\nexport function getAutoFallbackChain(): OcrMode[] {\n const chain: OcrMode[] = []\n for (const cli of CLI_PRIORITY) {\n if (isCliInstalled(cli)) chain.push(cli)\n }\n return chain\n}\n\n/**\n * 수동 지정된 OcrMode 유효성 검증.\n * --ocr gemini 등 강제 지정 시 호출.\n * @throws 해당 CLI가 설치되지 않은 경우 Error\n */\nexport function validateOcrMode(mode: OcrMode): void {\n if (mode === \"auto\" || mode === \"off\") return\n\n if (!isCliInstalled(mode)) {\n throw new Error(`'${mode}' CLI가 설치되지 않았습니다.\\n${getInstallGuide(mode)}`)\n }\n}\n\n/** CLI별 설치 안내 메시지 */\nfunction getInstallGuide(mode: string): string {\n const guides: Record<string, string> = {\n gemini: \"설치: https://ai.google.dev/gemini-api/docs/cli\",\n claude: \"설치: npm install -g @anthropic-ai/claude-code 또는 https://claude.ai/code\",\n codex: \"설치: npm install -g @openai/codex 또는 https://github.com/openai/codex\",\n ollama: \"설치: brew install ollama 또는 https://ollama.com/download\",\n }\n return guides[mode] || `'${mode}'을(를) 설치해주세요.`\n}\n\n/**\n * AI CLI가 없어 OCR 불가 시 표시할 안내 메시지.\n */\nexport function getNoCliMessage(): string {\n return [\n \"설치된 AI CLI가 없어 OCR을 수행할 수 없습니다.\",\n \"이미지 기반 PDF 처리를 위해 AI CLI 설치를 권장합니다:\",\n \"\",\n \" [권장] Codex CLI: npm install -g @openai/codex\",\n \" Gemini CLI: https://ai.google.dev/gemini-api/docs/cli\",\n \" Claude CLI: npm install -g @anthropic-ai/claude-code\",\n \" Ollama: brew install ollama (+ ollama pull gemma4:27b)\",\n ].join(\"\\n\")\n}\n"],"mappings":";;;AAOA,SAAS,gBAAgB;AAIzB,IAAM,eAAe,CAAC,SAAS,UAAU,UAAU,QAAQ;AAMpD,SAAS,qBAAqC;AACnD,aAAW,OAAO,cAAc;AAC9B,QAAI,eAAe,GAAG,EAAG,QAAO;AAAA,EAClC;AACA,SAAO;AACT;AAMA,SAAS,eAAe,MAAuB;AAC7C,MAAI;AACF,UAAM,MAAM,QAAQ,aAAa,UAAU,UAAU;AACrD,aAAS,GAAG,GAAG,IAAI,IAAI,IAAI,EAAE,OAAO,UAAU,SAAS,IAAK,CAAC;AAC7D,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAMO,SAAS,uBAAkC;AAChD,QAAM,QAAmB,CAAC;AAC1B,aAAW,OAAO,cAAc;AAC9B,QAAI,eAAe,GAAG,EAAG,OAAM,KAAK,GAAG;AAAA,EACzC;AACA,SAAO;AACT;AAOO,SAAS,gBAAgB,MAAqB;AACnD,MAAI,SAAS,UAAU,SAAS,MAAO;AAEvC,MAAI,CAAC,eAAe,IAAI,GAAG;AACzB,UAAM,IAAI,MAAM,IAAI,IAAI;AAAA,EAAuB,gBAAgB,IAAI,CAAC,EAAE;AAAA,EACxE;AACF;AAGA,SAAS,gBAAgB,MAAsB;AAC7C,QAAM,SAAiC;AAAA,IACrC,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,OAAQ;AAAA,IACR,QAAQ;AAAA,EACV;AACA,SAAO,OAAO,IAAI,KAAK,IAAI,IAAI;AACjC;AAKO,SAAS,kBAA0B;AACxC,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;","names":[]}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/utils.ts
|
|
4
|
-
var VERSION = true ? "2.4.
|
|
4
|
+
var VERSION = true ? "2.4.19" : "0.0.0-dev";
|
|
5
5
|
function toArrayBuffer(buf) {
|
|
6
6
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
7
7
|
return buf.buffer;
|
|
@@ -105,4 +105,4 @@ export {
|
|
|
105
105
|
classifyError,
|
|
106
106
|
normalizeKordocError
|
|
107
107
|
};
|
|
108
|
-
//# sourceMappingURL=chunk-
|
|
108
|
+
//# sourceMappingURL=chunk-MZN7PLTZ.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/ocr/cli-provider.ts"],"sourcesContent":["/**\n * CLI 기반 OCR 프로바이더\n *\n * gemini / claude / codex / ollama CLI를 subprocess로 호출하여\n * PDF 페이지 이미지를 Markdown으로 변환.\n *\n * 이미지 전달 방식:\n * - gemini: -p \"프롬프트 @이미지경로\" (@ 파일 참조)\n * - claude: -p \"프롬프트 @이미지경로\" (@ 파일 참조, --print 모드)\n * - codex: exec -i 이미지경로 \"프롬프트\" (-i/--image 플래그)\n * - ollama: REST API (localhost:11434) — CLI는 이미지 입력 미지원\n */\n\nimport { spawnSync } from \"child_process\"\nimport { writeFileSync, readFileSync, unlinkSync, mkdirSync } from \"fs\"\nimport { join } from \"path\"\nimport { tmpdir } from \"os\"\nimport type { OcrMode, StructuredOcrResult } from \"../types.js\"\n\n/** OCR 프롬프트 — 모든 CLI 공통 */\nconst OCR_PROMPT = `이 PDF 페이지 이미지에서 텍스트와 테이블을 추출하여 순수 Markdown으로 변환하세요.\n규칙:\n- 테이블은 Markdown 테이블 문법 사용 (| 구분, |---|---| 헤더 구분선 포함)\n- 병합된 셀은 해당 위치에 내용 기재\n- 헤딩은 글자 크기에 따라 ## ~ ###### 사용\n- 리스트는 - 또는 1. 사용\n- 이미지, 도형 등 비텍스트 요소는 무시\n- 원문의 읽기 순서와 구조를 유지\n- \\`\\`\\`로 감싸지 말고 순수 Markdown만 출력`\n\n/** 임시 디렉토리 (프로세스당 1회 생성)\n *\n * gemini CLI는 /tmp/ 등 시스템 임시 디렉토리를 워크스페이스 외부로 간주하여\n * @파일참조 시 접근을 거부할 수 있음. cwd 하위 폴더를 사용하면 모든 CLI에서 접근 가능.\n *\n * ⚠️ .gitignore에 포함된 경로(예: .kordoc-tmp/)는 gemini CLI가 무시하므로\n * 반드시 gitignore되지 않는 이름 사용. 파일은 try/finally로 즉시 정리.\n */\nlet _tempDir: string | null = null\nfunction getTempDir(): string {\n if (!_tempDir) {\n _tempDir = join(process.cwd(), \".kordoc_ocr_tmp\")\n mkdirSync(_tempDir, { recursive: true })\n }\n return _tempDir\n}\n\n/**\n * CLI OcrProvider 생성.\n *\n * @param mode - 사용할 CLI (gemini, claude, codex, ollama)\n * @returns OcrProvider 함수 (StructuredOcrResult 반환)\n */\nexport function createCliOcrProvider(\n mode: Exclude<OcrMode, \"auto\" | \"off\">\n): (pageImage: Uint8Array, pageNumber: number, mimeType: \"image/png\") => Promise<StructuredOcrResult> {\n return async (pageImage: Uint8Array, pageNumber: number): Promise<StructuredOcrResult> => {\n const tempPath = join(getTempDir(), `page-${pageNumber}.png`)\n\n try {\n writeFileSync(tempPath, pageImage)\n\n let output: string\n if (mode === \"ollama\") {\n output = await callOllamaApi(tempPath)\n } else {\n output = callCli(mode, tempPath)\n }\n\n return { markdown: stripCodeFence(output.trim()) }\n } finally {\n try { unlinkSync(tempPath) } catch { /* 임시 파일 정리 실패 무시 */ }\n }\n }\n}\n\n/**\n * 출력 텍스트에서 사용량·속도 제한 에러 감지.\n * 해당 메시지가 포함된 경우 throw하여 다음 엔진으로 fallback 트리거.\n */\nfunction checkForLimitError(output: string, mode: string): void {\n const lower = output.toLowerCase()\n if (lower.includes(\"usage limit\") || lower.includes(\"rate limit\")) {\n throw new Error(`${mode} 사용량/속도 제한: ${output.trim().slice(0, 200)}`)\n }\n}\n\n/**\n * CLI 실행 — gemini / claude / codex\n *\n * @throws CLI 실행 실패 또는 타임아웃(180초) 시 Error\n */\nfunction callCli(mode: string, imagePath: string): string {\n // codex는 --output-last-message로 대화 헤더 없는 깔끔한 출력 사용\n if (mode === \"codex\") {\n return callCodexCli(imagePath)\n }\n\n const args = buildCliArgs(mode, imagePath)\n\n const result = spawnSync(mode, args, {\n encoding: \"utf-8\",\n timeout: 600_000,\n maxBuffer: 10 * 1024 * 1024,\n shell: process.platform === \"win32\",\n // claude: /tmp에서 실행하여 프로젝트 CLAUDE.md의 규칙 간섭 방지\n ...(mode === \"claude\" ? { cwd: tmpdir() } : {}),\n })\n\n if (result.error) {\n throw new Error(`${mode} CLI 실행 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`${mode} OCR 실패: ${errMsg}`)\n }\n\n const output = result.stdout || \"\"\n checkForLimitError(output, mode)\n return output\n}\n\n/**\n * codex exec 실행 — --output-last-message로 대화 헤더 없는 깔끔한 출력.\n * 인자 순서: `codex exec <prompt> --image <file> --output-last-message <outfile>`\n */\nfunction callCodexCli(imagePath: string): string {\n // 출력 파일은 /tmp/ 사용 — codex sandbox는 cwd 내 쓰기를 막을 수 있음\n const outPath = join(tmpdir(), `kordoc-codex-out-${Date.now()}.txt`)\n try {\n const args = [\"exec\", OCR_PROMPT, \"--image\", imagePath, \"--output-last-message\", outPath]\n const model = process.env.KORDOC_CODEX_MODEL\n if (model) args.push(\"--model\", model)\n\n const result = spawnSync(\"codex\", args, {\n encoding: \"utf-8\",\n timeout: 180_000,\n maxBuffer: 10 * 1024 * 1024,\n input: \"\", // stdin EOF 즉시 전달 (대화형 입력 차단)\n shell: process.platform === \"win32\",\n })\n\n if (result.error) {\n throw new Error(`codex CLI 실행 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`codex OCR 실패: ${errMsg}`)\n }\n\n // --output-last-message 파일에서 읽기 (없으면 stdout 폴백)\n let text: string\n try {\n text = readFileSync(outPath, \"utf-8\")\n } catch {\n text = result.stdout || \"\"\n }\n checkForLimitError(text, \"codex\")\n return text\n } finally {\n try { unlinkSync(outPath) } catch { /* 무시 */ }\n }\n}\n\n/**\n * CLI별 인자 배열 생성.\n *\n * gemini: [\"--prompt\", \"프롬프트 @이미지경로\", \"--yolo\"]\n * - -y/--yolo: 자동 승인 (OCR은 도구 사용 없으므로 실질적 영향 없음)\n * - @ 파일 참조로 이미지를 컨텍스트에 포함\n *\n * claude: [\"--print\", \"프롬프트 @이미지경로\"]\n * - --print(-p): 비대화형 출력 모드\n * - @ 파일 참조로 이미지를 컨텍스트에 포함\n *\n * codex: callCodexCli()에서 별도 처리\n * - `codex exec <prompt> --image <file> --output-last-message <outfile>`\n * - 프롬프트가 --image보다 앞에 위치해야 함 (인자 순서 중요)\n *\n * ⚠️ CLI 버전에 따라 문법이 다를 수 있음. 업데이트 시 --help 재확인 필요.\n */\nfunction buildCliArgs(mode: string, imagePath: string): string[] {\n const normalizedPath = imagePath.replace(/\\\\/g, \"/\")\n const promptWithImage = `${OCR_PROMPT}\n\n이미지: @${normalizedPath}`\n\n switch (mode) {\n case \"gemini\": {\n const args = [\"--prompt\", promptWithImage, \"--yolo\"]\n const model = process.env.KORDOC_GEMINI_MODEL\n if (model) args.push(\"--model\", model)\n return args\n }\n\n case \"claude\": {\n const args = [\"--print\", promptWithImage]\n const model = process.env.KORDOC_CLAUDE_MODEL\n if (model) args.push(\"--model\", model)\n return args\n }\n\n default:\n throw new Error(`지원하지 않는 CLI: ${mode}`)\n }\n}\n\n/**\n * Ollama REST API 호출 — CLI는 이미지 입력을 지원하지 않으므로 API 직접 사용.\n *\n * 기본 모델: KORDOC_OLLAMA_MODEL 환경변수 또는 \"gemma4:27b\"\n * 기본 호스트: KORDOC_OLLAMA_HOST 환경변수 또는 \"http://localhost:11434\"\n *\n * @throws Ollama 서버 미실행 또는 응답 오류 시 Error\n */\nasync function callOllamaApi(imagePath: string): Promise<string> {\n const { readFileSync } = await import(\"fs\")\n const imageBase64 = readFileSync(imagePath).toString(\"base64\")\n\n const model = process.env.KORDOC_OLLAMA_MODEL || \"qwen3-vl:8b\"\n const host = process.env.KORDOC_OLLAMA_HOST || \"http://localhost:11434\"\n const timeoutMs = Number(process.env.KORDOC_OLLAMA_TIMEOUT) || 120_000\n\n const response = await fetch(`${host}/api/chat`, {\n method: \"POST\",\n headers: { \"Content-Type\": \"application/json\" },\n body: JSON.stringify({\n model,\n messages: [{\n role: \"user\",\n content: OCR_PROMPT,\n images: [imageBase64],\n }],\n stream: false,\n }),\n signal: AbortSignal.timeout(timeoutMs),\n })\n\n if (!response.ok) {\n throw new Error(`Ollama API 오류: ${response.status} ${response.statusText}`)\n }\n\n const data = await response.json() as { message?: { content?: string } }\n return data.message?.content || \"\"\n}\n\n/**\n * LLM 출력에서 코드 펜스 제거.\n * LLM이 가끔 결과를 ```markdown ... ``` 으로 감싸는 경우 처리.\n */\nfunction stripCodeFence(text: string): string {\n const match = text.match(/^```(?:markdown|md)?\\s*([\\s\\S]*?)```\\s*$/m)\n return match ? match[1].trim() : text\n}\n"],"mappings":";;;AAaA,SAAS,iBAAiB;AAC1B,SAAS,eAAe,cAAc,YAAY,iBAAiB;AACnE,SAAS,YAAY;AACrB,SAAS,cAAc;AAIvB,IAAM,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAkBnB,IAAI,WAA0B;AAC9B,SAAS,aAAqB;AAC5B,MAAI,CAAC,UAAU;AACb,eAAW,KAAK,QAAQ,IAAI,GAAG,iBAAiB;AAChD,cAAU,UAAU,EAAE,WAAW,KAAK,CAAC;AAAA,EACzC;AACA,SAAO;AACT;AAQO,SAAS,qBACd,MACoG;AACpG,SAAO,OAAO,WAAuB,eAAqD;AACxF,UAAM,WAAW,KAAK,WAAW,GAAG,QAAQ,UAAU,MAAM;AAE5D,QAAI;AACF,oBAAc,UAAU,SAAS;AAEjC,UAAI;AACJ,UAAI,SAAS,UAAU;AACrB,iBAAS,MAAM,cAAc,QAAQ;AAAA,MACvC,OAAO;AACL,iBAAS,QAAQ,MAAM,QAAQ;AAAA,MACjC;AAEA,aAAO,EAAE,UAAU,eAAe,OAAO,KAAK,CAAC,EAAE;AAAA,IACnD,UAAE;AACA,UAAI;AAAE,mBAAW,QAAQ;AAAA,MAAE,QAAQ;AAAA,MAAuB;AAAA,IAC5D;AAAA,EACF;AACF;AAMA,SAAS,mBAAmB,QAAgB,MAAoB;AAC9D,QAAM,QAAQ,OAAO,YAAY;AACjC,MAAI,MAAM,SAAS,aAAa,KAAK,MAAM,SAAS,YAAY,GAAG;AACjE,UAAM,IAAI,MAAM,GAAG,IAAI,kDAAe,OAAO,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,EACrE;AACF;AAOA,SAAS,QAAQ,MAAc,WAA2B;AAExD,MAAI,SAAS,SAAS;AACpB,WAAO,aAAa,SAAS;AAAA,EAC/B;AAEA,QAAM,OAAO,aAAa,MAAM,SAAS;AAEzC,QAAM,SAAS,UAAU,MAAM,MAAM;AAAA,IACnC,UAAU;AAAA,IACV,SAAS;AAAA,IACT,WAAW,KAAK,OAAO;AAAA,IACvB,OAAO,QAAQ,aAAa;AAAA;AAAA,IAE5B,GAAI,SAAS,WAAW,EAAE,KAAK,OAAO,EAAE,IAAI,CAAC;AAAA,EAC/C,CAAC;AAED,MAAI,OAAO,OAAO;AAChB,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,OAAO,MAAM,OAAO,EAAE;AAAA,EAC9D;AACA,MAAI,OAAO,WAAW,GAAG;AACvB,UAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,UAAM,IAAI,MAAM,GAAG,IAAI,sBAAY,MAAM,EAAE;AAAA,EAC7C;AAEA,QAAM,SAAS,OAAO,UAAU;AAChC,qBAAmB,QAAQ,IAAI;AAC/B,SAAO;AACT;AAMA,SAAS,aAAa,WAA2B;AAE/C,QAAM,UAAU,KAAK,OAAO,GAAG,oBAAoB,KAAK,IAAI,CAAC,MAAM;AACnE,MAAI;AACF,UAAM,OAAO,CAAC,QAAQ,YAAY,WAAW,WAAW,yBAAyB,OAAO;AACxF,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAErC,UAAM,SAAS,UAAU,SAAS,MAAM;AAAA,MACtC,UAAU;AAAA,MACV,SAAS;AAAA,MACT,WAAW,KAAK,OAAO;AAAA,MACvB,OAAO;AAAA;AAAA,MACP,OAAO,QAAQ,aAAa;AAAA,IAC9B,CAAC;AAED,QAAI,OAAO,OAAO;AAChB,YAAM,IAAI,MAAM,wCAAoB,OAAO,MAAM,OAAO,EAAE;AAAA,IAC5D;AACA,QAAI,OAAO,WAAW,GAAG;AACvB,YAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,YAAM,IAAI,MAAM,2BAAiB,MAAM,EAAE;AAAA,IAC3C;AAGA,QAAI;AACJ,QAAI;AACF,aAAO,aAAa,SAAS,OAAO;AAAA,IACtC,QAAQ;AACN,aAAO,OAAO,UAAU;AAAA,IAC1B;AACA,uBAAmB,MAAM,OAAO;AAChC,WAAO;AAAA,EACT,UAAE;AACA,QAAI;AAAE,iBAAW,OAAO;AAAA,IAAE,QAAQ;AAAA,IAAW;AAAA,EAC/C;AACF;AAmBA,SAAS,aAAa,MAAc,WAA6B;AAC/D,QAAM,iBAAiB,UAAU,QAAQ,OAAO,GAAG;AACnD,QAAM,kBAAkB,GAAG,UAAU;AAAA;AAAA,uBAE/B,cAAc;AAEpB,UAAQ,MAAM;AAAA,IACZ,KAAK,UAAU;AACb,YAAM,OAAO,CAAC,YAAY,iBAAiB,QAAQ;AACnD,YAAM,QAAQ,QAAQ,IAAI;AAC1B,UAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AACrC,aAAO;AAAA,IACT;AAAA,IAEA,KAAK,UAAU;AACb,YAAM,OAAO,CAAC,WAAW,eAAe;AACxC,YAAM,QAAQ,QAAQ,IAAI;AAC1B,UAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AACrC,aAAO;AAAA,IACT;AAAA,IAEA;AACE,YAAM,IAAI,MAAM,8CAAgB,IAAI,EAAE;AAAA,EAC1C;AACF;AAUA,eAAe,cAAc,WAAoC;AAC/D,QAAM,EAAE,cAAAA,cAAa,IAAI,MAAM,OAAO,IAAI;AAC1C,QAAM,cAAcA,cAAa,SAAS,EAAE,SAAS,QAAQ;AAE7D,QAAM,QAAQ,QAAQ,IAAI,uBAAuB;AACjD,QAAM,OAAO,QAAQ,IAAI,sBAAsB;AAC/C,QAAM,YAAY,OAAO,QAAQ,IAAI,qBAAqB,KAAK;AAE/D,QAAM,WAAW,MAAM,MAAM,GAAG,IAAI,aAAa;AAAA,IAC/C,QAAQ;AAAA,IACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,IAC9C,MAAM,KAAK,UAAU;AAAA,MACnB;AAAA,MACA,UAAU,CAAC;AAAA,QACT,MAAM;AAAA,QACN,SAAS;AAAA,QACT,QAAQ,CAAC,WAAW;AAAA,MACtB,CAAC;AAAA,MACD,QAAQ;AAAA,IACV,CAAC;AAAA,IACD,QAAQ,YAAY,QAAQ,SAAS;AAAA,EACvC,CAAC;AAED,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,IAAI,MAAM,4BAAkB,SAAS,MAAM,IAAI,SAAS,UAAU,EAAE;AAAA,EAC5E;AAEA,QAAM,OAAO,MAAM,SAAS,KAAK;AACjC,SAAO,KAAK,SAAS,WAAW;AAClC;AAMA,SAAS,eAAe,MAAsB;AAC5C,QAAM,QAAQ,KAAK,MAAM,2CAA2C;AACpE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;","names":["readFileSync"]}
|
package/dist/cli.js
CHANGED
|
@@ -4,15 +4,14 @@ import {
|
|
|
4
4
|
markdownToHwpx,
|
|
5
5
|
markdownToXlsx,
|
|
6
6
|
parse
|
|
7
|
-
} from "./chunk-
|
|
8
|
-
import "./chunk-YW5G6BCJ.js";
|
|
7
|
+
} from "./chunk-463YQ2WL.js";
|
|
9
8
|
import {
|
|
10
9
|
VERSION,
|
|
11
10
|
toArrayBuffer
|
|
12
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-MZN7PLTZ.js";
|
|
13
12
|
import "./chunk-MOL7MDBG.js";
|
|
14
|
-
import "./chunk-
|
|
15
|
-
import "./chunk-
|
|
13
|
+
import "./chunk-Y4WFKJ5P.js";
|
|
14
|
+
import "./chunk-YW5G6BCJ.js";
|
|
16
15
|
import {
|
|
17
16
|
createLoggerFromEnv,
|
|
18
17
|
generateRunId
|
|
@@ -22,7 +21,6 @@ import "./chunk-ZWE3DS7E.js";
|
|
|
22
21
|
// src/cli.ts
|
|
23
22
|
import { readFileSync, writeFileSync, mkdirSync, statSync, existsSync, readdirSync } from "fs";
|
|
24
23
|
import { basename, resolve, extname } from "path";
|
|
25
|
-
import { cpus } from "os";
|
|
26
24
|
import { Command } from "commander";
|
|
27
25
|
var program = new Command();
|
|
28
26
|
function buildEnvTemplate() {
|
|
@@ -85,7 +83,7 @@ async function runParse(files, opts) {
|
|
|
85
83
|
const parseOptions = {};
|
|
86
84
|
if (opts.pages) parseOptions.pages = opts.pages;
|
|
87
85
|
if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false;
|
|
88
|
-
const validOcrModes = ["auto", "gemini", "claude", "codex", "ollama", "
|
|
86
|
+
const validOcrModes = ["auto", "gemini", "claude", "codex", "ollama", "off"];
|
|
89
87
|
if (opts.ocr) {
|
|
90
88
|
if (!validOcrModes.includes(opts.ocr)) {
|
|
91
89
|
process.stderr.write(`[kordoc] \uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 OCR \uBAA8\uB4DC: ${opts.ocr}
|
|
@@ -101,8 +99,6 @@ async function runParse(files, opts) {
|
|
|
101
99
|
if (opts.ocrJobs) {
|
|
102
100
|
const n = parseInt(opts.ocrJobs, 10);
|
|
103
101
|
if (n > 0) parseOptions.ocrConcurrency = n;
|
|
104
|
-
} else if (parseOptions.ocrMode === "tesseract") {
|
|
105
|
-
parseOptions.ocrConcurrency = cpus().length;
|
|
106
102
|
} else {
|
|
107
103
|
parseOptions.ocrConcurrency = 4;
|
|
108
104
|
}
|
|
@@ -177,7 +173,7 @@ async function runParse(files, opts) {
|
|
|
177
173
|
saveImages(absPath);
|
|
178
174
|
}
|
|
179
175
|
} catch (err) {
|
|
180
|
-
const { sanitizeError } = await import("./utils-
|
|
176
|
+
const { sanitizeError } = await import("./utils-YUAT7LFD.js");
|
|
181
177
|
process.stderr.write(`
|
|
182
178
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
183
179
|
`);
|
|
@@ -187,7 +183,7 @@ async function runParse(files, opts) {
|
|
|
187
183
|
}
|
|
188
184
|
}
|
|
189
185
|
function addParseOptions(cmd) {
|
|
190
|
-
return cmd.option("-o, --output <path>", "\uCD9C\uB825 \uD30C\uC77C \uACBD\uB85C (\uB2E8\uC77C \uD30C\uC77C \uC2DC)").option("-d, --out-dir <dir>", "\uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC (\uB2E4\uC911 \uD30C\uC77C \uC2DC)").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704 (\uC608: 1-3, 1,3,5)").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown (\uAE30\uBCF8) \uB610\uB294 json", "markdown").option("--no-header-footer", "PDF \uBA38\uB9AC\uAE00/\uBC14\uB2E5\uAE00 \uC790\uB3D9 \uC81C\uAC70").option("--image-dir <dir>", "\uC774\uBBF8\uC9C0 \uC800\uC7A5 \uD3F4\uB354 (\uAE30\uBCF8: \uC785\uB825 \uD30C\uC77C\uBA85_images \uD3F4\uB354)").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").option("--ocr <mode>", "OCR \uBAA8\uB4DC: auto(\uAE30\uBCF8), gemini, claude, codex, ollama,
|
|
186
|
+
return cmd.option("-o, --output <path>", "\uCD9C\uB825 \uD30C\uC77C \uACBD\uB85C (\uB2E8\uC77C \uD30C\uC77C \uC2DC)").option("-d, --out-dir <dir>", "\uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC (\uB2E4\uC911 \uD30C\uC77C \uC2DC)").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704 (\uC608: 1-3, 1,3,5)").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown (\uAE30\uBCF8) \uB610\uB294 json", "markdown").option("--no-header-footer", "PDF \uBA38\uB9AC\uAE00/\uBC14\uB2E5\uAE00 \uC790\uB3D9 \uC81C\uAC70").option("--image-dir <dir>", "\uC774\uBBF8\uC9C0 \uC800\uC7A5 \uD3F4\uB354 (\uAE30\uBCF8: \uC785\uB825 \uD30C\uC77C\uBA85_images \uD3F4\uB354)").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").option("--ocr <mode>", "OCR \uBAA8\uB4DC: auto(\uAE30\uBCF8), gemini, claude, codex, ollama, off").option("--ocr-jobs <n>", "OCR \uBCD1\uB82C \uCC98\uB9AC \uC218 (\uAE30\uBCF8: 4)").option("--ocr-batch-size <n>", "OCR \uBC30\uCE58 \uD06C\uAE30 \u2014 CLI\uB2F9 \uD398\uC774\uC9C0 \uC218 (\uAE30\uBCF8: gemini/claude 50, codex 100)");
|
|
191
187
|
}
|
|
192
188
|
program.enablePositionalOptions().name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\uACA0\uB2E4 \u2014 HWP, HWPX, PDF, XLSX, DOCX \u2192 Markdown").version(VERSION);
|
|
193
189
|
addParseOptions(
|
|
@@ -259,7 +255,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
259
255
|
`));
|
|
260
256
|
}
|
|
261
257
|
} catch (err) {
|
|
262
|
-
const { sanitizeError } = await import("./utils-
|
|
258
|
+
const { sanitizeError } = await import("./utils-YUAT7LFD.js");
|
|
263
259
|
process.stderr.write(` FAIL
|
|
264
260
|
`);
|
|
265
261
|
process.stderr.write(` \u2192 ${sanitizeError(err)}
|
|
@@ -291,7 +287,7 @@ program.command("init-env").description("kordoc\uC6A9 .env \uD15C\uD50C\uB9BF \u
|
|
|
291
287
|
}
|
|
292
288
|
});
|
|
293
289
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
294
|
-
const { watchDirectory } = await import("./watch-
|
|
290
|
+
const { watchDirectory } = await import("./watch-WEOFVVDO.js");
|
|
295
291
|
await watchDirectory({
|
|
296
292
|
dir,
|
|
297
293
|
outDir: opts.outDir,
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\n\nimport { readFileSync, writeFileSync, mkdirSync, statSync, existsSync, readdirSync } from \"fs\"\nimport { basename, resolve, extname } from \"path\"\nimport { cpus } from \"os\"\nimport { Command } from \"commander\"\nimport { parse, detectFormat, markdownToHwpx, markdownToXlsx } from \"./index.js\"\nimport type { ParseOptions, OcrMode } from \"./types.js\"\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\nimport { createLoggerFromEnv, generateRunId } from \"./logging/logger.js\"\n\nconst program = new Command()\n\nfunction buildEnvTemplate(): string {\n return [\n \"# kordoc 환경 변수 템플릿\",\n \"# 필요 항목만 주석 해제해서 사용하세요.\",\n \"\",\n \"# ---------- OCR ----------\",\n \"# CLI OCR 모드에서 Ollama 모델 지정\",\n \"# KORDOC_OLLAMA_MODEL=qwen3-vl:8b\",\n \"\",\n \"# Unified OCR 파이프라인 모델 기본값\",\n \"KORDOC_UNIFIED_MODEL_CANDIDATES=mistralai/mistral-medium-3-instruct,moonshotai/kimi-k2.5,qwen/qwen3.5-122b-a10b,qwen/qwen3.5-397b-a17b,moonshotai/kimi-k2-thinking,moonshotai/kimi-k2-instruct,moonshotai/kimi-k2-instruct-0905\",\n \"KORDOC_UNIFIED_MODEL_MAX_TOKENS={\\\"mistralai/mistral-medium-3-instruct\\\":8192,\\\"moonshotai/kimi-k2.5\\\":64000,\\\"qwen/qwen3.5-122b-a10b\\\":64000,\\\"qwen/qwen3.5-397b-a17b\\\":64000,\\\"moonshotai/kimi-k2-thinking\\\":64000,\\\"moonshotai/kimi-k2-instruct\\\":64000,\\\"moonshotai/kimi-k2-instruct-0905\\\":64000}\",\n \"\",\n \"# NIM API 키 (단일/다중)\",\n \"NVIDIA_API_KEY=\",\n \"# NVIDIA_API_KEYS=nvapi-key1,nvapi-key2,nvapi-key3\",\n \"\",\n \"# ---------- 로깅 ----------\",\n \"# KORDOC_LOG_LEVEL=error\",\n \"# KORDOC_LOG_FILE=./logs/kordoc.jsonl\",\n \"# KORDOC_LOG_STACK=0\",\n \"# KORDOC_LOG_PROGRESS_SAMPLE_MS=1000\",\n \"# KORDOC_LOG_BASENAME_PATHS=1\",\n \"# KORDOC_LOG_TEXT_LIMIT=400\",\n \"\",\n ].join(\"\\n\")\n}\n\n/** 공통 parse 옵션 타입 */\ninterface ParseOpts {\n output?: string\n outDir?: string\n pages?: string\n format: string\n headerFooter: boolean\n imageDir?: string\n silent?: boolean\n ocr?: string\n ocrJobs?: string\n ocrBatchSize?: string\n}\n\n/** parse 액션 공통 구현 — 루트 커맨드와 `parse` 서브커맨드가 공유 */\nasync function runParse(files: string[], opts: ParseOpts) {\n const logger = createLoggerFromEnv().withRun(generateRunId(\"cli\")).child({ component: \"cli.ts\" })\n logger.log({ level: \"info\", stage: \"detect\", event: \"start\", message: \"CLI parse 시작\", meta: { files: files.length } })\n const validFormats = [\"markdown\", \"json\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\n process.exit(1)\n }\n for (let fi = 0; fi < files.length; fi++) {\n const filePath = files[fi]\n const absPath = resolve(filePath)\n const fileName = basename(absPath)\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\n\n try {\n const fileSize = statSync(absPath).size\n if (fileSize > 500 * 1024 * 1024) {\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\n process.exitCode = 1\n continue\n }\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const format = detectFormat(arrayBuffer)\n\n if (!opts.silent) {\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\n }\n logger.log({ level: \"debug\", stage: \"detect\", event: \"done\", message: \"파일 포맷 감지\", meta: { fileName, format } })\n\n const parseOptions: ParseOptions = {}\n if (opts.pages) parseOptions.pages = opts.pages as string\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\n\n // OCR 모드: CLI 기본값 \"auto\" (라이브러리 API는 undefined 유지)\n const validOcrModes = [\"auto\", \"gemini\", \"claude\", \"codex\", \"ollama\", \"tesseract\", \"off\"]\n if (opts.ocr) {\n if (!validOcrModes.includes(opts.ocr)) {\n process.stderr.write(`[kordoc] 지원하지 않는 OCR 모드: ${opts.ocr}\\n`)\n process.stderr.write(` 사용 가능: ${validOcrModes.join(\", \")}\\n`)\n process.exit(1)\n }\n parseOptions.ocrMode = opts.ocr as OcrMode\n } else {\n parseOptions.ocrMode = \"auto\"\n }\n\n // OCR 병렬 처리 수 (--ocr-jobs)\n // - tesseract: CPU 코어 수 (워커 스레드 병렬)\n // - gemini/claude/codex/auto: 4 (배치 병렬 실행)\n if (opts.ocrJobs) {\n const n = parseInt(opts.ocrJobs, 10)\n if (n > 0) parseOptions.ocrConcurrency = n\n } else if (parseOptions.ocrMode === \"tesseract\") {\n parseOptions.ocrConcurrency = cpus().length\n } else {\n parseOptions.ocrConcurrency = 4\n }\n\n // OCR 배치 크기 (--ocr-batch-size)\n if (opts.ocrBatchSize) {\n const n = parseInt(opts.ocrBatchSize, 10)\n if (n > 0) parseOptions.ocrBatchSize = n\n }\n\n if (!opts.silent) {\n parseOptions.onProgress = (current: number, total: number) => {\n const pct = total > 0 ? Math.round((current / total) * 100) : 0\n const filled = Math.round(pct / 5) // 20칸 진행바\n const bar = \"█\".repeat(filled) + \"░\".repeat(20 - filled)\n const cr = process.stderr.isTTY ? \"\\r\" : \"\\n\"\n process.stderr.write(`${cr}[kordoc] ${filePrefix}${fileName} — OCR: [${bar}] ${pct}% (${current}/${total}페이지)`)\n }\n }\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${result.error}\\n`)\n logger.log({ level: \"error\", stage: \"finalize\", event: \"error\", message: \"파일 파싱 실패\", meta: { fileName, error: result.error, code: result.code } })\n process.exitCode = 1\n continue\n }\n\n if (!opts.silent) process.stderr.write(` OK\\n`)\n logger.log({ level: \"info\", stage: \"finalize\", event: \"done\", message: \"파일 파싱 완료\", meta: { fileName, output: opts.output ?? opts.outDir ?? \"stdout\" } })\n\n // 이미지 기반 PDF OCR 결과 표시\n if (!opts.silent && result.success && result.isImageBased) {\n process.stderr.write(` → 이미지 기반 PDF — OCR 처리됨\\n`)\n }\n\n // 경고 표시\n if (!opts.silent && result.success && result.warnings?.length) {\n for (const w of result.warnings) {\n process.stderr.write(` ⚠ ${w.message}\\n`)\n }\n }\n\n let markdown = result.markdown\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\n if (opts.outDir && result.images?.length) {\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"\n }\n const output = opts.format === \"json\"\n ? JSON.stringify(result, null, 2)\n : markdown\n\n // 이미지 저장: 출력 MD 파일 기준 폴더 사용 (convert와 일치)\n const saveImages = (outFilePath: string) => {\n if (!result.images?.length) return\n const stem = basename(outFilePath).replace(/\\.[^.]+$/, \"\")\n const defaultDir = resolve(outFilePath, \"..\", stem + \"_images\")\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : defaultDir\n mkdirSync(imgDir, { recursive: true })\n for (const img of result.images) {\n writeFileSync(resolve(imgDir, img.filename), img.data)\n }\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\n }\n\n if (opts.output && files.length === 1) {\n writeFileSync(opts.output, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\n saveImages(resolve(opts.output))\n } else if (opts.outDir) {\n mkdirSync(opts.outDir, { recursive: true })\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\n saveImages(outPath)\n } else {\n process.stdout.write(output + \"\\n\")\n saveImages(absPath) // stdout 출력 시 입력 파일 기준\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\n logger.log({ level: \"error\", stage: \"finalize\", event: \"error\", message: \"CLI 처리 중 예외\", error: { message: sanitizeError(err), name: err instanceof Error ? err.name : \"Error\", stack: err instanceof Error ? err.stack : undefined } })\n process.exitCode = 1\n }\n }\n}\n\n/** 공통 parse 옵션 등록 헬퍼 */\nfunction addParseOptions(cmd: Command): Command {\n return cmd\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\n .option(\"--image-dir <dir>\", \"이미지 저장 폴더 (기본: 입력 파일명_images 폴더)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .option(\"--ocr <mode>\", \"OCR 모드: auto(기본), gemini, claude, codex, ollama, tesseract, off\")\n .option(\"--ocr-jobs <n>\", \"OCR 병렬 처리 수 (기본: CPU 코어 수, tesseract 전용)\")\n .option(\"--ocr-batch-size <n>\", \"OCR 배치 크기 — CLI당 페이지 수 (기본: gemini/claude 50, codex 100)\")\n}\n\nprogram\n .enablePositionalOptions()\n .name(\"kordoc\")\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\n .version(VERSION)\n\n// `kordoc parse <files>` 서브커맨드 (권장)\naddParseOptions(\n program\n .command(\"parse\")\n .description(\"파일을 마크다운으로 파싱 (HWP, HWPX, PDF, XLSX, DOCX)\")\n .argument(\"<files...>\", \"변환할 파일 경로\")\n).action(runParse)\n\nprogram\n .command(\"convert <input>\")\n .description(\"마크다운 파일을 HWPX 또는 XLSX로 변환\")\n .option(\"-f, --format <type>\", \"출력 포맷: hwpx | xlsx\", \"hwpx\")\n .option(\"-o, --output <path>\", \"출력 파일 경로 (기본: 입력명 + 포맷 확장자)\")\n .option(\"--image-dir <dir>\", \"이미지 폴더 경로 (기본: 입력 MD 파일명_images 폴더)\")\n .option(\"--images\", \"이미지 포함 (기본: 생략, 레이아웃 문제 방지)\")\n .option(\"--template <path>\", \"HWPX 템플릿 파일 경로 (hwpx 전용)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (input: string, opts) => {\n const validFormats = [\"hwpx\", \"xlsx\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 포맷: ${opts.format} (hwpx 또는 xlsx)\\n`)\n process.exit(1)\n }\n\n const absInput = resolve(input)\n if (!existsSync(absInput)) {\n process.stderr.write(`[kordoc] 파일을 찾을 수 없습니다: ${input}\\n`)\n process.exit(1)\n }\n\n const stem = basename(absInput).replace(/\\.[^.]+$/, \"\")\n const outPath = opts.output\n ? resolve(opts.output)\n : resolve(absInput, \"..\", `${stem}.${opts.format}`)\n\n if (!opts.silent) process.stderr.write(`[kordoc] ${basename(absInput)} → ${basename(outPath)} ...`)\n\n try {\n const markdown = readFileSync(absInput, \"utf-8\")\n\n // 이미지 폴더에서 이미지 로드 (--images 플래그 필요)\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : resolve(absInput, \"..\", stem + \"_images\")\n const images: import(\"./types.js\").ExtractedImage[] = []\n if (opts.images && existsSync(imgDir)) {\n const mimeMap: Record<string, string> = {\n png: \"image/png\", jpg: \"image/jpeg\", jpeg: \"image/jpeg\",\n gif: \"image/gif\", bmp: \"image/bmp\",\n }\n for (const entry of readdirSync(imgDir, { withFileTypes: true })) {\n if (!entry.isFile()) continue\n const fname = entry.name\n const ext = extname(fname).slice(1).toLowerCase()\n if (!mimeMap[ext]) continue\n const data = readFileSync(resolve(imgDir, fname))\n images.push({ filename: fname, data: new Uint8Array(data), mimeType: mimeMap[ext] })\n }\n if (!opts.silent) process.stderr.write(` → 이미지 ${images.length}개 로드\\n`)\n }\n\n const warnings: string[] = []\n\n let buf: ArrayBuffer\n if (opts.format === \"xlsx\") {\n if (opts.template && !opts.silent) {\n process.stderr.write(`\\n[kordoc] 경고: --template은 hwpx 전용입니다. 무시됩니다.\\n`)\n }\n buf = await markdownToXlsx(markdown, { warnings, images: images.length ? images : undefined })\n } else {\n let templateArrayBuffer: ArrayBuffer | undefined\n if (opts.template) {\n const tmplBuf = readFileSync(resolve(opts.template))\n templateArrayBuffer = tmplBuf.buffer.slice(tmplBuf.byteOffset, tmplBuf.byteOffset + tmplBuf.byteLength)\n }\n buf = await markdownToHwpx(markdown, { warnings, images: images.length ? images : undefined, templateArrayBuffer })\n }\n\n writeFileSync(outPath, Buffer.from(buf))\n\n if (!opts.silent) {\n process.stderr.write(` OK\\n`)\n process.stderr.write(` → ${outPath}\\n`)\n if (warnings.length) warnings.forEach(w => process.stderr.write(` ${w}\\n`))\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${sanitizeError(err)}\\n`)\n process.exit(1)\n }\n })\n\nprogram\n .command(\"init-env\")\n .description(\"kordoc용 .env 템플릿 생성 (명시 실행 시에만 파일 생성)\")\n .option(\"-o, --output <path>\", \"템플릿 출력 경로\", \".env.kordoc.example\")\n .option(\"--create-dotenv\", \".env 파일이 없으면 함께 생성\")\n .option(\"--force\", \"기존 파일이 있어도 덮어쓰기\")\n .action((opts: { output: string; createDotenv?: boolean; force?: boolean }) => {\n const template = buildEnvTemplate()\n const outPath = resolve(opts.output)\n\n if (existsSync(outPath) && !opts.force) {\n process.stderr.write(`[kordoc] 이미 파일이 있습니다: ${outPath} (덮어쓰려면 --force)\\n`)\n process.exit(1)\n }\n writeFileSync(outPath, template, \"utf-8\")\n process.stderr.write(`[kordoc] 생성됨: ${outPath}\\n`)\n\n if (opts.createDotenv) {\n const dotenvPath = resolve(\".env\")\n if (existsSync(dotenvPath) && !opts.force) {\n process.stderr.write(`[kordoc] .env가 이미 있어 생성하지 않았습니다: ${dotenvPath}\\n`)\n } else {\n writeFileSync(dotenvPath, template, \"utf-8\")\n process.stderr.write(`[kordoc] 생성됨: ${dotenvPath}\\n`)\n }\n }\n })\n\nprogram\n .command(\"watch <dir>\")\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (dir: string, opts) => {\n const { watchDirectory } = await import(\"./watch.js\")\n await watchDirectory({\n dir,\n outDir: opts.outDir,\n webhook: opts.webhook,\n format: opts.format,\n pages: opts.pages,\n silent: opts.silent,\n })\n })\n\n// `kordoc <files>` 루트 커맨드 (하위 호환)\n// 주의: 서브커맨드(init-env 등)보다 뒤에서 등록해야 충돌하지 않음\naddParseOptions(\n program\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\n).action(runParse)\n\nprogram.parse()\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,UAAU,YAAY,mBAAmB;AAC1F,SAAS,UAAU,SAAS,eAAe;AAC3C,SAAS,YAAY;AACrB,SAAS,eAAe;AAMxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,SAAS,mBAA2B;AAClC,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAiBA,eAAe,SAAS,OAAiB,MAAiB;AACxD,QAAM,SAAS,oBAAoB,EAAE,QAAQ,cAAc,KAAK,CAAC,EAAE,MAAM,EAAE,WAAW,SAAS,CAAC;AAChG,SAAO,IAAI,EAAE,OAAO,QAAQ,OAAO,UAAU,OAAO,SAAS,SAAS,0BAAgB,MAAM,EAAE,OAAO,MAAM,OAAO,EAAE,CAAC;AACrH,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AACA,aAAO,IAAI,EAAE,OAAO,SAAS,OAAO,UAAU,OAAO,QAAQ,SAAS,0CAAY,MAAM,EAAE,UAAU,OAAO,EAAE,CAAC;AAE9G,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AAGnE,YAAM,gBAAgB,CAAC,QAAQ,UAAU,UAAU,SAAS,UAAU,aAAa,KAAK;AACxF,UAAI,KAAK,KAAK;AACZ,YAAI,CAAC,cAAc,SAAS,KAAK,GAAG,GAAG;AACrC,kBAAQ,OAAO,MAAM,oEAA4B,KAAK,GAAG;AAAA,CAAI;AAC7D,kBAAQ,OAAO,MAAM,gCAAY,cAAc,KAAK,IAAI,CAAC;AAAA,CAAI;AAC7D,kBAAQ,KAAK,CAAC;AAAA,QAChB;AACA,qBAAa,UAAU,KAAK;AAAA,MAC9B,OAAO;AACL,qBAAa,UAAU;AAAA,MACzB;AAKA,UAAI,KAAK,SAAS;AAChB,cAAM,IAAI,SAAS,KAAK,SAAS,EAAE;AACnC,YAAI,IAAI,EAAG,cAAa,iBAAiB;AAAA,MAC3C,WAAW,aAAa,YAAY,aAAa;AAC/C,qBAAa,iBAAiB,KAAK,EAAE;AAAA,MACvC,OAAO;AACL,qBAAa,iBAAiB;AAAA,MAChC;AAGA,UAAI,KAAK,cAAc;AACrB,cAAM,IAAI,SAAS,KAAK,cAAc,EAAE;AACxC,YAAI,IAAI,EAAG,cAAa,eAAe;AAAA,MACzC;AAEA,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,gBAAM,MAAM,QAAQ,IAAI,KAAK,MAAO,UAAU,QAAS,GAAG,IAAI;AAC9D,gBAAM,SAAS,KAAK,MAAM,MAAM,CAAC;AACjC,gBAAM,MAAM,SAAI,OAAO,MAAM,IAAI,SAAI,OAAO,KAAK,MAAM;AACvD,gBAAM,KAAK,QAAQ,OAAO,QAAQ,OAAO;AACzC,kBAAQ,OAAO,MAAM,GAAG,EAAE,YAAY,UAAU,GAAG,QAAQ,iBAAY,GAAG,KAAK,GAAG,MAAM,OAAO,IAAI,KAAK,qBAAM;AAAA,QAChH;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,eAAO,IAAI,EAAE,OAAO,SAAS,OAAO,YAAY,OAAO,SAAS,SAAS,0CAAY,MAAM,EAAE,UAAU,OAAO,OAAO,OAAO,MAAM,OAAO,KAAK,EAAE,CAAC;AACjJ,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAC9C,aAAO,IAAI,EAAE,OAAO,QAAQ,OAAO,YAAY,OAAO,QAAQ,SAAS,0CAAY,MAAM,EAAE,UAAU,QAAQ,KAAK,UAAU,KAAK,UAAU,SAAS,EAAE,CAAC;AAGvJ,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,cAAc;AACzD,gBAAQ,OAAO,MAAM;AAAA,CAA4B;AAAA,MACnD;AAGA,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,UAAU,QAAQ;AAC7D,mBAAW,KAAK,OAAO,UAAU;AAC/B,kBAAQ,OAAO,MAAM,YAAO,EAAE,OAAO;AAAA,CAAI;AAAA,QAC3C;AAAA,MACF;AAEA,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,gBAAwB;AAC1C,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,OAAO,SAAS,WAAW,EAAE,QAAQ,YAAY,EAAE;AACzD,cAAM,aAAa,QAAQ,aAAa,MAAM,OAAO,SAAS;AAC9D,cAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI;AACxD,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,MAAM,CAAC;AAAA,MACjC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,OAAO;AAAA,MACpB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAClC,mBAAW,OAAO;AAAA,MACpB;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,aAAO,IAAI,EAAE,OAAO,SAAS,OAAO,YAAY,OAAO,SAAS,SAAS,wCAAe,OAAO,EAAE,SAAS,cAAc,GAAG,GAAG,MAAM,eAAe,QAAQ,IAAI,OAAO,SAAS,OAAO,eAAe,QAAQ,IAAI,QAAQ,OAAU,EAAE,CAAC;AACtO,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF;AAGA,SAAS,gBAAgB,KAAuB;AAC9C,SAAO,IACJ,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,qBAAqB,kHAAkC,EAC9D,OAAO,YAAY,oDAAY,EAC/B,OAAO,gBAAgB,qFAAiE,EACxF,OAAO,kBAAkB,sGAA0C,EACnE,OAAO,wBAAwB,sHAA0D;AAC9F;AAEA,QACG,wBAAwB,EACxB,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO;AAGlB;AAAA,EACE,QACG,QAAQ,OAAO,EACf,YAAY,mGAA4C,EACxD,SAAS,cAAc,8CAAW;AACvC,EAAE,OAAO,QAAQ;AAEjB,QACG,QAAQ,iBAAiB,EACzB,YAAY,uFAA2B,EACvC,OAAO,uBAAuB,0CAAsB,MAAM,EAC1D,OAAO,uBAAuB,6GAA6B,EAC3D,OAAO,qBAAqB,qHAAqC,EACjE,OAAO,YAAY,kHAA6B,EAChD,OAAO,qBAAqB,uEAA0B,EACtD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAe,SAAS;AACrC,QAAM,eAAe,CAAC,QAAQ,MAAM;AACpC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAmB;AAC3E,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,WAAW,QAAQ,KAAK;AAC9B,MAAI,CAAC,WAAW,QAAQ,GAAG;AACzB,YAAQ,OAAO,MAAM,6EAA2B,KAAK;AAAA,CAAI;AACzD,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,OAAO,SAAS,QAAQ,EAAE,QAAQ,YAAY,EAAE;AACtD,QAAM,UAAU,KAAK,SACjB,QAAQ,KAAK,MAAM,IACnB,QAAQ,UAAU,MAAM,GAAG,IAAI,IAAI,KAAK,MAAM,EAAE;AAEpD,MAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAY,SAAS,QAAQ,CAAC,WAAM,SAAS,OAAO,CAAC,MAAM;AAElG,MAAI;AACF,UAAM,WAAW,aAAa,UAAU,OAAO;AAG/C,UAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI,QAAQ,UAAU,MAAM,OAAO,SAAS;AAChG,UAAM,SAAgD,CAAC;AACvD,QAAI,KAAK,UAAU,WAAW,MAAM,GAAG;AACrC,YAAM,UAAkC;AAAA,QACtC,KAAK;AAAA,QAAa,KAAK;AAAA,QAAc,MAAM;AAAA,QAC3C,KAAK;AAAA,QAAa,KAAK;AAAA,MACzB;AACA,iBAAW,SAAS,YAAY,QAAQ,EAAE,eAAe,KAAK,CAAC,GAAG;AAChE,YAAI,CAAC,MAAM,OAAO,EAAG;AACrB,cAAM,QAAQ,MAAM;AACpB,cAAM,MAAM,QAAQ,KAAK,EAAE,MAAM,CAAC,EAAE,YAAY;AAChD,YAAI,CAAC,QAAQ,GAAG,EAAG;AACnB,cAAM,OAAO,aAAa,QAAQ,QAAQ,KAAK,CAAC;AAChD,eAAO,KAAK,EAAE,UAAU,OAAO,MAAM,IAAI,WAAW,IAAI,GAAG,UAAU,QAAQ,GAAG,EAAE,CAAC;AAAA,MACrF;AACA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,+BAAW,OAAO,MAAM;AAAA,CAAQ;AAAA,IACzE;AAEA,UAAM,WAAqB,CAAC;AAE5B,QAAI;AACJ,QAAI,KAAK,WAAW,QAAQ;AAC1B,UAAI,KAAK,YAAY,CAAC,KAAK,QAAQ;AACjC,gBAAQ,OAAO,MAAM;AAAA;AAAA,CAAiD;AAAA,MACxE;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,OAAU,CAAC;AAAA,IAC/F,OAAO;AACL,UAAI;AACJ,UAAI,KAAK,UAAU;AACjB,cAAM,UAAU,aAAa,QAAQ,KAAK,QAAQ,CAAC;AACnD,8BAAsB,QAAQ,OAAO,MAAM,QAAQ,YAAY,QAAQ,aAAa,QAAQ,UAAU;AAAA,MACxG;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,QAAW,oBAAoB,CAAC;AAAA,IACpH;AAEA,kBAAc,SAAS,OAAO,KAAK,GAAG,CAAC;AAEvC,QAAI,CAAC,KAAK,QAAQ;AAChB,cAAQ,OAAO,MAAM;AAAA,CAAO;AAC5B,cAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACvC,UAAI,SAAS,OAAQ,UAAS,QAAQ,OAAK,QAAQ,OAAO,MAAM,KAAK,CAAC;AAAA,CAAI,CAAC;AAAA,IAC7E;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,YAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,YAAQ,OAAO,MAAM,YAAO,cAAc,GAAG,CAAC;AAAA,CAAI;AAClD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,QACG,QAAQ,UAAU,EAClB,YAAY,4HAAuC,EACnD,OAAO,uBAAuB,gDAAa,qBAAqB,EAChE,OAAO,mBAAmB,sEAAoB,EAC9C,OAAO,WAAW,6EAAiB,EACnC,OAAO,CAAC,SAAsE;AAC7E,QAAM,WAAW,iBAAiB;AAClC,QAAM,UAAU,QAAQ,KAAK,MAAM;AAEnC,MAAI,WAAW,OAAO,KAAK,CAAC,KAAK,OAAO;AACtC,YAAQ,OAAO,MAAM,sEAAyB,OAAO;AAAA,CAAoB;AACzE,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,gBAAc,SAAS,UAAU,OAAO;AACxC,UAAQ,OAAO,MAAM,gCAAiB,OAAO;AAAA,CAAI;AAEjD,MAAI,KAAK,cAAc;AACrB,UAAM,aAAa,QAAQ,MAAM;AACjC,QAAI,WAAW,UAAU,KAAK,CAAC,KAAK,OAAO;AACzC,cAAQ,OAAO,MAAM,0GAAoC,UAAU;AAAA,CAAI;AAAA,IACzE,OAAO;AACL,oBAAc,YAAY,UAAU,OAAO;AAC3C,cAAQ,OAAO,MAAM,gCAAiB,UAAU;AAAA,CAAI;AAAA,IACtD;AAAA,EACF;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAIH;AAAA,EACE,QACG,SAAS,cAAc,2EAAwC;AACpE,EAAE,OAAO,QAAQ;AAEjB,QAAQ,MAAM;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\n\nimport { readFileSync, writeFileSync, mkdirSync, statSync, existsSync, readdirSync } from \"fs\"\nimport { basename, resolve, extname } from \"path\"\nimport { cpus } from \"os\"\nimport { Command } from \"commander\"\nimport { parse, detectFormat, markdownToHwpx, markdownToXlsx } from \"./index.js\"\nimport type { ParseOptions, OcrMode } from \"./types.js\"\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\nimport { createLoggerFromEnv, generateRunId } from \"./logging/logger.js\"\n\nconst program = new Command()\n\nfunction buildEnvTemplate(): string {\n return [\n \"# kordoc 환경 변수 템플릿\",\n \"# 필요 항목만 주석 해제해서 사용하세요.\",\n \"\",\n \"# ---------- OCR ----------\",\n \"# CLI OCR 모드에서 Ollama 모델 지정\",\n \"# KORDOC_OLLAMA_MODEL=qwen3-vl:8b\",\n \"\",\n \"# Unified OCR 파이프라인 모델 기본값\",\n \"KORDOC_UNIFIED_MODEL_CANDIDATES=mistralai/mistral-medium-3-instruct,moonshotai/kimi-k2.5,qwen/qwen3.5-122b-a10b,qwen/qwen3.5-397b-a17b,moonshotai/kimi-k2-thinking,moonshotai/kimi-k2-instruct,moonshotai/kimi-k2-instruct-0905\",\n \"KORDOC_UNIFIED_MODEL_MAX_TOKENS={\\\"mistralai/mistral-medium-3-instruct\\\":8192,\\\"moonshotai/kimi-k2.5\\\":64000,\\\"qwen/qwen3.5-122b-a10b\\\":64000,\\\"qwen/qwen3.5-397b-a17b\\\":64000,\\\"moonshotai/kimi-k2-thinking\\\":64000,\\\"moonshotai/kimi-k2-instruct\\\":64000,\\\"moonshotai/kimi-k2-instruct-0905\\\":64000}\",\n \"\",\n \"# NIM API 키 (단일/다중)\",\n \"NVIDIA_API_KEY=\",\n \"# NVIDIA_API_KEYS=nvapi-key1,nvapi-key2,nvapi-key3\",\n \"\",\n \"# ---------- 로깅 ----------\",\n \"# KORDOC_LOG_LEVEL=error\",\n \"# KORDOC_LOG_FILE=./logs/kordoc.jsonl\",\n \"# KORDOC_LOG_STACK=0\",\n \"# KORDOC_LOG_PROGRESS_SAMPLE_MS=1000\",\n \"# KORDOC_LOG_BASENAME_PATHS=1\",\n \"# KORDOC_LOG_TEXT_LIMIT=400\",\n \"\",\n ].join(\"\\n\")\n}\n\n/** 공통 parse 옵션 타입 */\ninterface ParseOpts {\n output?: string\n outDir?: string\n pages?: string\n format: string\n headerFooter: boolean\n imageDir?: string\n silent?: boolean\n ocr?: string\n ocrJobs?: string\n ocrBatchSize?: string\n}\n\n/** parse 액션 공통 구현 — 루트 커맨드와 `parse` 서브커맨드가 공유 */\nasync function runParse(files: string[], opts: ParseOpts) {\n const logger = createLoggerFromEnv().withRun(generateRunId(\"cli\")).child({ component: \"cli.ts\" })\n logger.log({ level: \"info\", stage: \"detect\", event: \"start\", message: \"CLI parse 시작\", meta: { files: files.length } })\n const validFormats = [\"markdown\", \"json\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\n process.exit(1)\n }\n for (let fi = 0; fi < files.length; fi++) {\n const filePath = files[fi]\n const absPath = resolve(filePath)\n const fileName = basename(absPath)\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\n\n try {\n const fileSize = statSync(absPath).size\n if (fileSize > 500 * 1024 * 1024) {\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\n process.exitCode = 1\n continue\n }\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const format = detectFormat(arrayBuffer)\n\n if (!opts.silent) {\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\n }\n logger.log({ level: \"debug\", stage: \"detect\", event: \"done\", message: \"파일 포맷 감지\", meta: { fileName, format } })\n\n const parseOptions: ParseOptions = {}\n if (opts.pages) parseOptions.pages = opts.pages as string\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\n\n // OCR 모드: CLI 기본값 \"auto\" (라이브러리 API는 undefined 유지)\n const validOcrModes = [\"auto\", \"gemini\", \"claude\", \"codex\", \"ollama\", \"off\"]\n if (opts.ocr) {\n if (!validOcrModes.includes(opts.ocr)) {\n process.stderr.write(`[kordoc] 지원하지 않는 OCR 모드: ${opts.ocr}\\n`)\n process.stderr.write(` 사용 가능: ${validOcrModes.join(\", \")}\\n`)\n process.exit(1)\n }\n parseOptions.ocrMode = opts.ocr as OcrMode\n } else {\n parseOptions.ocrMode = \"auto\"\n }\n\n // OCR 병렬 처리 수 (--ocr-jobs)\n if (opts.ocrJobs) {\n const n = parseInt(opts.ocrJobs, 10)\n if (n > 0) parseOptions.ocrConcurrency = n\n } else {\n parseOptions.ocrConcurrency = 4\n }\n\n // OCR 배치 크기 (--ocr-batch-size)\n if (opts.ocrBatchSize) {\n const n = parseInt(opts.ocrBatchSize, 10)\n if (n > 0) parseOptions.ocrBatchSize = n\n }\n\n if (!opts.silent) {\n parseOptions.onProgress = (current: number, total: number) => {\n const pct = total > 0 ? Math.round((current / total) * 100) : 0\n const filled = Math.round(pct / 5) // 20칸 진행바\n const bar = \"█\".repeat(filled) + \"░\".repeat(20 - filled)\n const cr = process.stderr.isTTY ? \"\\r\" : \"\\n\"\n process.stderr.write(`${cr}[kordoc] ${filePrefix}${fileName} — OCR: [${bar}] ${pct}% (${current}/${total}페이지)`)\n }\n }\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${result.error}\\n`)\n logger.log({ level: \"error\", stage: \"finalize\", event: \"error\", message: \"파일 파싱 실패\", meta: { fileName, error: result.error, code: result.code } })\n process.exitCode = 1\n continue\n }\n\n if (!opts.silent) process.stderr.write(` OK\\n`)\n logger.log({ level: \"info\", stage: \"finalize\", event: \"done\", message: \"파일 파싱 완료\", meta: { fileName, output: opts.output ?? opts.outDir ?? \"stdout\" } })\n\n // 이미지 기반 PDF OCR 결과 표시\n if (!opts.silent && result.success && result.isImageBased) {\n process.stderr.write(` → 이미지 기반 PDF — OCR 처리됨\\n`)\n }\n\n // 경고 표시\n if (!opts.silent && result.success && result.warnings?.length) {\n for (const w of result.warnings) {\n process.stderr.write(` ⚠ ${w.message}\\n`)\n }\n }\n\n let markdown = result.markdown\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\n if (opts.outDir && result.images?.length) {\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"\n }\n const output = opts.format === \"json\"\n ? JSON.stringify(result, null, 2)\n : markdown\n\n // 이미지 저장: 출력 MD 파일 기준 폴더 사용 (convert와 일치)\n const saveImages = (outFilePath: string) => {\n if (!result.images?.length) return\n const stem = basename(outFilePath).replace(/\\.[^.]+$/, \"\")\n const defaultDir = resolve(outFilePath, \"..\", stem + \"_images\")\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : defaultDir\n mkdirSync(imgDir, { recursive: true })\n for (const img of result.images) {\n writeFileSync(resolve(imgDir, img.filename), img.data)\n }\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\n }\n\n if (opts.output && files.length === 1) {\n writeFileSync(opts.output, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\n saveImages(resolve(opts.output))\n } else if (opts.outDir) {\n mkdirSync(opts.outDir, { recursive: true })\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\n saveImages(outPath)\n } else {\n process.stdout.write(output + \"\\n\")\n saveImages(absPath) // stdout 출력 시 입력 파일 기준\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\n logger.log({ level: \"error\", stage: \"finalize\", event: \"error\", message: \"CLI 처리 중 예외\", error: { message: sanitizeError(err), name: err instanceof Error ? err.name : \"Error\", stack: err instanceof Error ? err.stack : undefined } })\n process.exitCode = 1\n }\n }\n}\n\n/** 공통 parse 옵션 등록 헬퍼 */\nfunction addParseOptions(cmd: Command): Command {\n return cmd\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\n .option(\"--image-dir <dir>\", \"이미지 저장 폴더 (기본: 입력 파일명_images 폴더)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .option(\"--ocr <mode>\", \"OCR 모드: auto(기본), gemini, claude, codex, ollama, off\")\n .option(\"--ocr-jobs <n>\", \"OCR 병렬 처리 수 (기본: 4)\")\n .option(\"--ocr-batch-size <n>\", \"OCR 배치 크기 — CLI당 페이지 수 (기본: gemini/claude 50, codex 100)\")\n}\n\nprogram\n .enablePositionalOptions()\n .name(\"kordoc\")\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\n .version(VERSION)\n\n// `kordoc parse <files>` 서브커맨드 (권장)\naddParseOptions(\n program\n .command(\"parse\")\n .description(\"파일을 마크다운으로 파싱 (HWP, HWPX, PDF, XLSX, DOCX)\")\n .argument(\"<files...>\", \"변환할 파일 경로\")\n).action(runParse)\n\nprogram\n .command(\"convert <input>\")\n .description(\"마크다운 파일을 HWPX 또는 XLSX로 변환\")\n .option(\"-f, --format <type>\", \"출력 포맷: hwpx | xlsx\", \"hwpx\")\n .option(\"-o, --output <path>\", \"출력 파일 경로 (기본: 입력명 + 포맷 확장자)\")\n .option(\"--image-dir <dir>\", \"이미지 폴더 경로 (기본: 입력 MD 파일명_images 폴더)\")\n .option(\"--images\", \"이미지 포함 (기본: 생략, 레이아웃 문제 방지)\")\n .option(\"--template <path>\", \"HWPX 템플릿 파일 경로 (hwpx 전용)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (input: string, opts) => {\n const validFormats = [\"hwpx\", \"xlsx\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 포맷: ${opts.format} (hwpx 또는 xlsx)\\n`)\n process.exit(1)\n }\n\n const absInput = resolve(input)\n if (!existsSync(absInput)) {\n process.stderr.write(`[kordoc] 파일을 찾을 수 없습니다: ${input}\\n`)\n process.exit(1)\n }\n\n const stem = basename(absInput).replace(/\\.[^.]+$/, \"\")\n const outPath = opts.output\n ? resolve(opts.output)\n : resolve(absInput, \"..\", `${stem}.${opts.format}`)\n\n if (!opts.silent) process.stderr.write(`[kordoc] ${basename(absInput)} → ${basename(outPath)} ...`)\n\n try {\n const markdown = readFileSync(absInput, \"utf-8\")\n\n // 이미지 폴더에서 이미지 로드 (--images 플래그 필요)\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : resolve(absInput, \"..\", stem + \"_images\")\n const images: import(\"./types.js\").ExtractedImage[] = []\n if (opts.images && existsSync(imgDir)) {\n const mimeMap: Record<string, string> = {\n png: \"image/png\", jpg: \"image/jpeg\", jpeg: \"image/jpeg\",\n gif: \"image/gif\", bmp: \"image/bmp\",\n }\n for (const entry of readdirSync(imgDir, { withFileTypes: true })) {\n if (!entry.isFile()) continue\n const fname = entry.name\n const ext = extname(fname).slice(1).toLowerCase()\n if (!mimeMap[ext]) continue\n const data = readFileSync(resolve(imgDir, fname))\n images.push({ filename: fname, data: new Uint8Array(data), mimeType: mimeMap[ext] })\n }\n if (!opts.silent) process.stderr.write(` → 이미지 ${images.length}개 로드\\n`)\n }\n\n const warnings: string[] = []\n\n let buf: ArrayBuffer\n if (opts.format === \"xlsx\") {\n if (opts.template && !opts.silent) {\n process.stderr.write(`\\n[kordoc] 경고: --template은 hwpx 전용입니다. 무시됩니다.\\n`)\n }\n buf = await markdownToXlsx(markdown, { warnings, images: images.length ? images : undefined })\n } else {\n let templateArrayBuffer: ArrayBuffer | undefined\n if (opts.template) {\n const tmplBuf = readFileSync(resolve(opts.template))\n templateArrayBuffer = tmplBuf.buffer.slice(tmplBuf.byteOffset, tmplBuf.byteOffset + tmplBuf.byteLength)\n }\n buf = await markdownToHwpx(markdown, { warnings, images: images.length ? images : undefined, templateArrayBuffer })\n }\n\n writeFileSync(outPath, Buffer.from(buf))\n\n if (!opts.silent) {\n process.stderr.write(` OK\\n`)\n process.stderr.write(` → ${outPath}\\n`)\n if (warnings.length) warnings.forEach(w => process.stderr.write(` ${w}\\n`))\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${sanitizeError(err)}\\n`)\n process.exit(1)\n }\n })\n\nprogram\n .command(\"init-env\")\n .description(\"kordoc용 .env 템플릿 생성 (명시 실행 시에만 파일 생성)\")\n .option(\"-o, --output <path>\", \"템플릿 출력 경로\", \".env.kordoc.example\")\n .option(\"--create-dotenv\", \".env 파일이 없으면 함께 생성\")\n .option(\"--force\", \"기존 파일이 있어도 덮어쓰기\")\n .action((opts: { output: string; createDotenv?: boolean; force?: boolean }) => {\n const template = buildEnvTemplate()\n const outPath = resolve(opts.output)\n\n if (existsSync(outPath) && !opts.force) {\n process.stderr.write(`[kordoc] 이미 파일이 있습니다: ${outPath} (덮어쓰려면 --force)\\n`)\n process.exit(1)\n }\n writeFileSync(outPath, template, \"utf-8\")\n process.stderr.write(`[kordoc] 생성됨: ${outPath}\\n`)\n\n if (opts.createDotenv) {\n const dotenvPath = resolve(\".env\")\n if (existsSync(dotenvPath) && !opts.force) {\n process.stderr.write(`[kordoc] .env가 이미 있어 생성하지 않았습니다: ${dotenvPath}\\n`)\n } else {\n writeFileSync(dotenvPath, template, \"utf-8\")\n process.stderr.write(`[kordoc] 생성됨: ${dotenvPath}\\n`)\n }\n }\n })\n\nprogram\n .command(\"watch <dir>\")\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (dir: string, opts) => {\n const { watchDirectory } = await import(\"./watch.js\")\n await watchDirectory({\n dir,\n outDir: opts.outDir,\n webhook: opts.webhook,\n format: opts.format,\n pages: opts.pages,\n silent: opts.silent,\n })\n })\n\n// `kordoc <files>` 루트 커맨드 (하위 호환)\n// 주의: 서브커맨드(init-env 등)보다 뒤에서 등록해야 충돌하지 않음\naddParseOptions(\n program\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\n).action(runParse)\n\nprogram.parse()\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,UAAU,YAAY,mBAAmB;AAC1F,SAAS,UAAU,SAAS,eAAe;AAE3C,SAAS,eAAe;AAMxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,SAAS,mBAA2B;AAClC,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAiBA,eAAe,SAAS,OAAiB,MAAiB;AACxD,QAAM,SAAS,oBAAoB,EAAE,QAAQ,cAAc,KAAK,CAAC,EAAE,MAAM,EAAE,WAAW,SAAS,CAAC;AAChG,SAAO,IAAI,EAAE,OAAO,QAAQ,OAAO,UAAU,OAAO,SAAS,SAAS,0BAAgB,MAAM,EAAE,OAAO,MAAM,OAAO,EAAE,CAAC;AACrH,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AACA,aAAO,IAAI,EAAE,OAAO,SAAS,OAAO,UAAU,OAAO,QAAQ,SAAS,0CAAY,MAAM,EAAE,UAAU,OAAO,EAAE,CAAC;AAE9G,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AAGnE,YAAM,gBAAgB,CAAC,QAAQ,UAAU,UAAU,SAAS,UAAU,KAAK;AAC3E,UAAI,KAAK,KAAK;AACZ,YAAI,CAAC,cAAc,SAAS,KAAK,GAAG,GAAG;AACrC,kBAAQ,OAAO,MAAM,oEAA4B,KAAK,GAAG;AAAA,CAAI;AAC7D,kBAAQ,OAAO,MAAM,gCAAY,cAAc,KAAK,IAAI,CAAC;AAAA,CAAI;AAC7D,kBAAQ,KAAK,CAAC;AAAA,QAChB;AACA,qBAAa,UAAU,KAAK;AAAA,MAC9B,OAAO;AACL,qBAAa,UAAU;AAAA,MACzB;AAGA,UAAI,KAAK,SAAS;AAChB,cAAM,IAAI,SAAS,KAAK,SAAS,EAAE;AACnC,YAAI,IAAI,EAAG,cAAa,iBAAiB;AAAA,MAC3C,OAAO;AACL,qBAAa,iBAAiB;AAAA,MAChC;AAGA,UAAI,KAAK,cAAc;AACrB,cAAM,IAAI,SAAS,KAAK,cAAc,EAAE;AACxC,YAAI,IAAI,EAAG,cAAa,eAAe;AAAA,MACzC;AAEA,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,gBAAM,MAAM,QAAQ,IAAI,KAAK,MAAO,UAAU,QAAS,GAAG,IAAI;AAC9D,gBAAM,SAAS,KAAK,MAAM,MAAM,CAAC;AACjC,gBAAM,MAAM,SAAI,OAAO,MAAM,IAAI,SAAI,OAAO,KAAK,MAAM;AACvD,gBAAM,KAAK,QAAQ,OAAO,QAAQ,OAAO;AACzC,kBAAQ,OAAO,MAAM,GAAG,EAAE,YAAY,UAAU,GAAG,QAAQ,iBAAY,GAAG,KAAK,GAAG,MAAM,OAAO,IAAI,KAAK,qBAAM;AAAA,QAChH;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,eAAO,IAAI,EAAE,OAAO,SAAS,OAAO,YAAY,OAAO,SAAS,SAAS,0CAAY,MAAM,EAAE,UAAU,OAAO,OAAO,OAAO,MAAM,OAAO,KAAK,EAAE,CAAC;AACjJ,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAC9C,aAAO,IAAI,EAAE,OAAO,QAAQ,OAAO,YAAY,OAAO,QAAQ,SAAS,0CAAY,MAAM,EAAE,UAAU,QAAQ,KAAK,UAAU,KAAK,UAAU,SAAS,EAAE,CAAC;AAGvJ,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,cAAc;AACzD,gBAAQ,OAAO,MAAM;AAAA,CAA4B;AAAA,MACnD;AAGA,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,UAAU,QAAQ;AAC7D,mBAAW,KAAK,OAAO,UAAU;AAC/B,kBAAQ,OAAO,MAAM,YAAO,EAAE,OAAO;AAAA,CAAI;AAAA,QAC3C;AAAA,MACF;AAEA,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,gBAAwB;AAC1C,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,OAAO,SAAS,WAAW,EAAE,QAAQ,YAAY,EAAE;AACzD,cAAM,aAAa,QAAQ,aAAa,MAAM,OAAO,SAAS;AAC9D,cAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI;AACxD,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,MAAM,CAAC;AAAA,MACjC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,OAAO;AAAA,MACpB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAClC,mBAAW,OAAO;AAAA,MACpB;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,aAAO,IAAI,EAAE,OAAO,SAAS,OAAO,YAAY,OAAO,SAAS,SAAS,wCAAe,OAAO,EAAE,SAAS,cAAc,GAAG,GAAG,MAAM,eAAe,QAAQ,IAAI,OAAO,SAAS,OAAO,eAAe,QAAQ,IAAI,QAAQ,OAAU,EAAE,CAAC;AACtO,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF;AAGA,SAAS,gBAAgB,KAAuB;AAC9C,SAAO,IACJ,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,qBAAqB,kHAAkC,EAC9D,OAAO,YAAY,oDAAY,EAC/B,OAAO,gBAAgB,0EAAsD,EAC7E,OAAO,kBAAkB,wDAAqB,EAC9C,OAAO,wBAAwB,sHAA0D;AAC9F;AAEA,QACG,wBAAwB,EACxB,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO;AAGlB;AAAA,EACE,QACG,QAAQ,OAAO,EACf,YAAY,mGAA4C,EACxD,SAAS,cAAc,8CAAW;AACvC,EAAE,OAAO,QAAQ;AAEjB,QACG,QAAQ,iBAAiB,EACzB,YAAY,uFAA2B,EACvC,OAAO,uBAAuB,0CAAsB,MAAM,EAC1D,OAAO,uBAAuB,6GAA6B,EAC3D,OAAO,qBAAqB,qHAAqC,EACjE,OAAO,YAAY,kHAA6B,EAChD,OAAO,qBAAqB,uEAA0B,EACtD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAe,SAAS;AACrC,QAAM,eAAe,CAAC,QAAQ,MAAM;AACpC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAmB;AAC3E,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,WAAW,QAAQ,KAAK;AAC9B,MAAI,CAAC,WAAW,QAAQ,GAAG;AACzB,YAAQ,OAAO,MAAM,6EAA2B,KAAK;AAAA,CAAI;AACzD,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,OAAO,SAAS,QAAQ,EAAE,QAAQ,YAAY,EAAE;AACtD,QAAM,UAAU,KAAK,SACjB,QAAQ,KAAK,MAAM,IACnB,QAAQ,UAAU,MAAM,GAAG,IAAI,IAAI,KAAK,MAAM,EAAE;AAEpD,MAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAY,SAAS,QAAQ,CAAC,WAAM,SAAS,OAAO,CAAC,MAAM;AAElG,MAAI;AACF,UAAM,WAAW,aAAa,UAAU,OAAO;AAG/C,UAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI,QAAQ,UAAU,MAAM,OAAO,SAAS;AAChG,UAAM,SAAgD,CAAC;AACvD,QAAI,KAAK,UAAU,WAAW,MAAM,GAAG;AACrC,YAAM,UAAkC;AAAA,QACtC,KAAK;AAAA,QAAa,KAAK;AAAA,QAAc,MAAM;AAAA,QAC3C,KAAK;AAAA,QAAa,KAAK;AAAA,MACzB;AACA,iBAAW,SAAS,YAAY,QAAQ,EAAE,eAAe,KAAK,CAAC,GAAG;AAChE,YAAI,CAAC,MAAM,OAAO,EAAG;AACrB,cAAM,QAAQ,MAAM;AACpB,cAAM,MAAM,QAAQ,KAAK,EAAE,MAAM,CAAC,EAAE,YAAY;AAChD,YAAI,CAAC,QAAQ,GAAG,EAAG;AACnB,cAAM,OAAO,aAAa,QAAQ,QAAQ,KAAK,CAAC;AAChD,eAAO,KAAK,EAAE,UAAU,OAAO,MAAM,IAAI,WAAW,IAAI,GAAG,UAAU,QAAQ,GAAG,EAAE,CAAC;AAAA,MACrF;AACA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,+BAAW,OAAO,MAAM;AAAA,CAAQ;AAAA,IACzE;AAEA,UAAM,WAAqB,CAAC;AAE5B,QAAI;AACJ,QAAI,KAAK,WAAW,QAAQ;AAC1B,UAAI,KAAK,YAAY,CAAC,KAAK,QAAQ;AACjC,gBAAQ,OAAO,MAAM;AAAA;AAAA,CAAiD;AAAA,MACxE;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,OAAU,CAAC;AAAA,IAC/F,OAAO;AACL,UAAI;AACJ,UAAI,KAAK,UAAU;AACjB,cAAM,UAAU,aAAa,QAAQ,KAAK,QAAQ,CAAC;AACnD,8BAAsB,QAAQ,OAAO,MAAM,QAAQ,YAAY,QAAQ,aAAa,QAAQ,UAAU;AAAA,MACxG;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,QAAW,oBAAoB,CAAC;AAAA,IACpH;AAEA,kBAAc,SAAS,OAAO,KAAK,GAAG,CAAC;AAEvC,QAAI,CAAC,KAAK,QAAQ;AAChB,cAAQ,OAAO,MAAM;AAAA,CAAO;AAC5B,cAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACvC,UAAI,SAAS,OAAQ,UAAS,QAAQ,OAAK,QAAQ,OAAO,MAAM,KAAK,CAAC;AAAA,CAAI,CAAC;AAAA,IAC7E;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,YAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,YAAQ,OAAO,MAAM,YAAO,cAAc,GAAG,CAAC;AAAA,CAAI;AAClD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,QACG,QAAQ,UAAU,EAClB,YAAY,4HAAuC,EACnD,OAAO,uBAAuB,gDAAa,qBAAqB,EAChE,OAAO,mBAAmB,sEAAoB,EAC9C,OAAO,WAAW,6EAAiB,EACnC,OAAO,CAAC,SAAsE;AAC7E,QAAM,WAAW,iBAAiB;AAClC,QAAM,UAAU,QAAQ,KAAK,MAAM;AAEnC,MAAI,WAAW,OAAO,KAAK,CAAC,KAAK,OAAO;AACtC,YAAQ,OAAO,MAAM,sEAAyB,OAAO;AAAA,CAAoB;AACzE,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,gBAAc,SAAS,UAAU,OAAO;AACxC,UAAQ,OAAO,MAAM,gCAAiB,OAAO;AAAA,CAAI;AAEjD,MAAI,KAAK,cAAc;AACrB,UAAM,aAAa,QAAQ,MAAM;AACjC,QAAI,WAAW,UAAU,KAAK,CAAC,KAAK,OAAO;AACzC,cAAQ,OAAO,MAAM,0GAAoC,UAAU;AAAA,CAAI;AAAA,IACzE,OAAO;AACL,oBAAc,YAAY,UAAU,OAAO;AAC3C,cAAQ,OAAO,MAAM,gCAAiB,UAAU;AAAA,CAAI;AAAA,IACtD;AAAA,EACF;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAIH;AAAA,EACE,QACG,SAAS,cAAc,2EAAwC;AACpE,EAAE,OAAO,QAAQ;AAEjB,QAAQ,MAAM;","names":[]}
|
package/dist/index.cjs
CHANGED
|
@@ -2179,14 +2179,14 @@ var auto_detect_exports = {};
|
|
|
2179
2179
|
__export(auto_detect_exports, {
|
|
2180
2180
|
detectAvailableOcr: () => detectAvailableOcr,
|
|
2181
2181
|
getAutoFallbackChain: () => getAutoFallbackChain,
|
|
2182
|
-
|
|
2182
|
+
getNoCliMessage: () => getNoCliMessage,
|
|
2183
2183
|
validateOcrMode: () => validateOcrMode
|
|
2184
2184
|
});
|
|
2185
2185
|
function detectAvailableOcr() {
|
|
2186
2186
|
for (const cli of CLI_PRIORITY) {
|
|
2187
2187
|
if (isCliInstalled(cli)) return cli;
|
|
2188
2188
|
}
|
|
2189
|
-
return
|
|
2189
|
+
return null;
|
|
2190
2190
|
}
|
|
2191
2191
|
function isCliInstalled(name) {
|
|
2192
2192
|
try {
|
|
@@ -2202,11 +2202,10 @@ function getAutoFallbackChain() {
|
|
|
2202
2202
|
for (const cli of CLI_PRIORITY) {
|
|
2203
2203
|
if (isCliInstalled(cli)) chain.push(cli);
|
|
2204
2204
|
}
|
|
2205
|
-
chain.push("tesseract");
|
|
2206
2205
|
return chain;
|
|
2207
2206
|
}
|
|
2208
2207
|
function validateOcrMode(mode) {
|
|
2209
|
-
if (mode === "auto" || mode === "off"
|
|
2208
|
+
if (mode === "auto" || mode === "off") return;
|
|
2210
2209
|
if (!isCliInstalled(mode)) {
|
|
2211
2210
|
throw new Error(`'${mode}' CLI\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4.
|
|
2212
2211
|
${getInstallGuide(mode)}`);
|
|
@@ -2221,10 +2220,10 @@ function getInstallGuide(mode) {
|
|
|
2221
2220
|
};
|
|
2222
2221
|
return guides[mode] || `'${mode}'\uC744(\uB97C) \uC124\uCE58\uD574\uC8FC\uC138\uC694.`;
|
|
2223
2222
|
}
|
|
2224
|
-
function
|
|
2223
|
+
function getNoCliMessage() {
|
|
2225
2224
|
return [
|
|
2226
|
-
"\uC124\uCE58\uB41C AI CLI\uAC00 \uC5C6\uC5B4
|
|
2227
|
-
"\
|
|
2225
|
+
"\uC124\uCE58\uB41C AI CLI\uAC00 \uC5C6\uC5B4 OCR\uC744 \uC218\uD589\uD560 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.",
|
|
2226
|
+
"\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF \uCC98\uB9AC\uB97C \uC704\uD574 AI CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4:",
|
|
2228
2227
|
"",
|
|
2229
2228
|
" [\uAD8C\uC7A5] Codex CLI: npm install -g @openai/codex",
|
|
2230
2229
|
" Gemini CLI: https://ai.google.dev/gemini-api/docs/cli",
|
|
@@ -2408,66 +2407,6 @@ var init_cli_provider = __esm({
|
|
|
2408
2407
|
}
|
|
2409
2408
|
});
|
|
2410
2409
|
|
|
2411
|
-
// src/ocr/tesseract-provider.ts
|
|
2412
|
-
var tesseract_provider_exports = {};
|
|
2413
|
-
__export(tesseract_provider_exports, {
|
|
2414
|
-
createTesseractPoolProvider: () => createTesseractPoolProvider,
|
|
2415
|
-
createTesseractProvider: () => createTesseractProvider
|
|
2416
|
-
});
|
|
2417
|
-
async function createTesseractProvider() {
|
|
2418
|
-
const worker = await (0, import_tesseract.createWorker)("kor+eng");
|
|
2419
|
-
let terminated = false;
|
|
2420
|
-
const provider = async (pageImage, _pageNumber, _mimeType) => {
|
|
2421
|
-
const { data } = await worker.recognize(pageImage);
|
|
2422
|
-
return data.text;
|
|
2423
|
-
};
|
|
2424
|
-
provider.terminate = async () => {
|
|
2425
|
-
if (!terminated) {
|
|
2426
|
-
await worker.terminate();
|
|
2427
|
-
terminated = true;
|
|
2428
|
-
}
|
|
2429
|
-
};
|
|
2430
|
-
return provider;
|
|
2431
|
-
}
|
|
2432
|
-
async function createTesseractPoolProvider(concurrency) {
|
|
2433
|
-
const workers = await Promise.all(
|
|
2434
|
-
Array.from({ length: concurrency }, () => (0, import_tesseract.createWorker)("kor+eng"))
|
|
2435
|
-
);
|
|
2436
|
-
const idle = [...workers];
|
|
2437
|
-
const waitQueue = [];
|
|
2438
|
-
function acquire() {
|
|
2439
|
-
if (idle.length > 0) return Promise.resolve(idle.pop());
|
|
2440
|
-
return new Promise((resolve4) => waitQueue.push(resolve4));
|
|
2441
|
-
}
|
|
2442
|
-
function release(w) {
|
|
2443
|
-
if (waitQueue.length > 0) {
|
|
2444
|
-
waitQueue.shift()(w);
|
|
2445
|
-
} else {
|
|
2446
|
-
idle.push(w);
|
|
2447
|
-
}
|
|
2448
|
-
}
|
|
2449
|
-
const provider = async (pageImage, _pageNumber, _mimeType) => {
|
|
2450
|
-
const w = await acquire();
|
|
2451
|
-
try {
|
|
2452
|
-
const { data } = await w.recognize(pageImage);
|
|
2453
|
-
return data.text;
|
|
2454
|
-
} finally {
|
|
2455
|
-
release(w);
|
|
2456
|
-
}
|
|
2457
|
-
};
|
|
2458
|
-
provider.terminate = async () => {
|
|
2459
|
-
await Promise.all(workers.map((w) => w.terminate()));
|
|
2460
|
-
};
|
|
2461
|
-
return provider;
|
|
2462
|
-
}
|
|
2463
|
-
var import_tesseract;
|
|
2464
|
-
var init_tesseract_provider = __esm({
|
|
2465
|
-
"src/ocr/tesseract-provider.ts"() {
|
|
2466
|
-
"use strict";
|
|
2467
|
-
import_tesseract = require("tesseract.js");
|
|
2468
|
-
}
|
|
2469
|
-
});
|
|
2470
|
-
|
|
2471
2410
|
// src/ocr/batch-provider.ts
|
|
2472
2411
|
var batch_provider_exports = {};
|
|
2473
2412
|
__export(batch_provider_exports, {
|
|
@@ -2676,15 +2615,6 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
2676
2615
|
}
|
|
2677
2616
|
if (mode !== "auto") {
|
|
2678
2617
|
validateOcrMode(mode);
|
|
2679
|
-
if (mode === "tesseract") {
|
|
2680
|
-
const { createTesseractProvider: createTesseractProvider2, createTesseractPoolProvider: createTesseractPoolProvider2 } = await Promise.resolve().then(() => (init_tesseract_provider(), tesseract_provider_exports));
|
|
2681
|
-
if (concurrency && concurrency > 1) {
|
|
2682
|
-
logger.log({ level: "info", event: "done", message: "Tesseract pool provider \uC120\uD0DD", meta: { concurrency } });
|
|
2683
|
-
return createTesseractPoolProvider2(concurrency);
|
|
2684
|
-
}
|
|
2685
|
-
logger.log({ level: "info", event: "done", message: "Tesseract single provider \uC120\uD0DD" });
|
|
2686
|
-
return createTesseractProvider2();
|
|
2687
|
-
}
|
|
2688
2618
|
if (mode === "gemini" || mode === "claude" || mode === "codex") {
|
|
2689
2619
|
const { createBatchCliProvider: createBatchCliProvider2, DEFAULT_BATCH_SIZES: DEFAULT_BATCH_SIZES2 } = await Promise.resolve().then(() => (init_batch_provider(), batch_provider_exports));
|
|
2690
2620
|
const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES2[mode];
|
|
@@ -2700,27 +2630,16 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
2700
2630
|
}
|
|
2701
2631
|
const detected = detectAvailableOcr();
|
|
2702
2632
|
logger.log({ level: "info", event: "progress", message: "OCR auto \uAC10\uC9C0 \uACB0\uACFC", meta: { detected } });
|
|
2703
|
-
if (detected
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
code: "OCR_CLI_FALLBACK"
|
|
2708
|
-
});
|
|
2709
|
-
} else {
|
|
2710
|
-
warnings?.push({
|
|
2711
|
-
message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (codex CLI\uAC00 \uC5C6\uC5B4 fallback). \uB354 \uB098\uC740 \uD488\uC9C8\uC744 \uC704\uD574 codex CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.`,
|
|
2712
|
-
code: "OCR_CLI_FALLBACK"
|
|
2713
|
-
});
|
|
2714
|
-
}
|
|
2633
|
+
if (!detected) {
|
|
2634
|
+
throw new Error(
|
|
2635
|
+
"\uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR CLI\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4. \uB2E4\uC74C \uC911 \uD558\uB098\uB97C \uC124\uCE58\uD558\uC138\uC694:\n Codex CLI: npm install -g @openai/codex\n Claude CLI: npm install -g @anthropic-ai/claude-code\n Gemini CLI: https://ai.google.dev/gemini-api/docs/cli"
|
|
2636
|
+
);
|
|
2715
2637
|
}
|
|
2716
|
-
if (detected
|
|
2717
|
-
|
|
2718
|
-
|
|
2719
|
-
|
|
2720
|
-
|
|
2721
|
-
}
|
|
2722
|
-
logger.log({ level: "info", event: "done", message: "AUTO: Tesseract single provider \uC120\uD0DD" });
|
|
2723
|
-
return createTesseractProvider2();
|
|
2638
|
+
if (detected !== "codex") {
|
|
2639
|
+
warnings?.push({
|
|
2640
|
+
message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (codex CLI\uAC00 \uC5C6\uC5B4 fallback). \uB354 \uB098\uC740 \uD488\uC9C8\uC744 \uC704\uD574 codex CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.`,
|
|
2641
|
+
code: "OCR_CLI_FALLBACK"
|
|
2642
|
+
});
|
|
2724
2643
|
}
|
|
2725
2644
|
if (detected === "gemini" || detected === "codex" || detected === "claude") {
|
|
2726
2645
|
const { createBatchCliProvider: createBatchCliProvider2, DEFAULT_BATCH_SIZES: DEFAULT_BATCH_SIZES2 } = await Promise.resolve().then(() => (init_batch_provider(), batch_provider_exports));
|
|
@@ -3138,7 +3057,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
3138
3057
|
var import_xmldom = require("@xmldom/xmldom");
|
|
3139
3058
|
|
|
3140
3059
|
// src/utils.ts
|
|
3141
|
-
var VERSION = true ? "2.4.
|
|
3060
|
+
var VERSION = true ? "2.4.19" : "0.0.0-dev";
|
|
3142
3061
|
function toArrayBuffer(buf) {
|
|
3143
3062
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3144
3063
|
return buf.buffer;
|
|
@@ -8734,7 +8653,6 @@ async function parseDocxDocument(buffer, options, existingZip) {
|
|
|
8734
8653
|
|
|
8735
8654
|
// src/index.ts
|
|
8736
8655
|
init_cli_provider();
|
|
8737
|
-
init_tesseract_provider();
|
|
8738
8656
|
init_markdown_to_blocks();
|
|
8739
8657
|
init_logger();
|
|
8740
8658
|
|
|
@@ -12093,9 +12011,6 @@ async function parseImage(buffer, options) {
|
|
|
12093
12011
|
if (ocrMode === "gemini" || ocrMode === "claude" || ocrMode === "codex" || ocrMode === "ollama") {
|
|
12094
12012
|
ocrProvider = createCliOcrProvider(ocrMode);
|
|
12095
12013
|
actualOcrMode = ocrMode;
|
|
12096
|
-
} else if (ocrMode === "tesseract") {
|
|
12097
|
-
ocrProvider = await createTesseractProvider();
|
|
12098
|
-
actualOcrMode = ocrMode;
|
|
12099
12014
|
} else if (ocrMode === "auto") {
|
|
12100
12015
|
const modesToTry = ["gemini", "claude", "codex", "ollama"];
|
|
12101
12016
|
for (const mode of modesToTry) {
|
|
@@ -12107,10 +12022,6 @@ async function parseImage(buffer, options) {
|
|
|
12107
12022
|
console.warn(`[kordoc] OCR auto-detection: ${mode} CLI not available or failed. Trying next.`, e);
|
|
12108
12023
|
}
|
|
12109
12024
|
}
|
|
12110
|
-
if (!ocrProvider) {
|
|
12111
|
-
ocrProvider = await createTesseractProvider();
|
|
12112
|
-
actualOcrMode = "tesseract";
|
|
12113
|
-
}
|
|
12114
12025
|
}
|
|
12115
12026
|
if (!ocrProvider) {
|
|
12116
12027
|
return { success: false, fileType: "image", error: "\uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uD504\uB85C\uBC14\uC774\uB354\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.", code: "PARSE_ERROR" };
|