@clazic/kordoc 2.5.1 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/batch-provider-XRF6F26E.js +234 -0
- package/dist/batch-provider-XRF6F26E.js.map +1 -0
- package/dist/{chunk-QG6BYZMR.js → chunk-25ZYYLVP.js} +4 -4
- package/dist/{chunk-IJGNPAK2.js → chunk-5CILZHRW.js} +2 -2
- package/dist/chunk-S7BHLD2V.js +200 -0
- package/dist/{chunk-Y4WFKJ5P.js.map → chunk-S7BHLD2V.js.map} +1 -1
- package/dist/cli.js +6 -6
- package/dist/index.cjs +88 -11
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +88 -11
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +3 -3
- package/dist/{resolve-XWYJYKKH.js → resolve-ZSUEJK3E.js} +4 -4
- package/dist/{utils-RBXHHCLI.js → utils-H2BL5GNR.js} +2 -2
- package/dist/{watch-5CCMTZ7F.js → watch-D6ODQLPJ.js} +4 -4
- package/package.json +1 -1
- package/dist/batch-provider-5BFJRKAZ.js +0 -190
- package/dist/batch-provider-5BFJRKAZ.js.map +0 -1
- package/dist/chunk-Y4WFKJ5P.js +0 -167
- /package/dist/{chunk-QG6BYZMR.js.map → chunk-25ZYYLVP.js.map} +0 -0
- /package/dist/{chunk-IJGNPAK2.js.map → chunk-5CILZHRW.js.map} +0 -0
- /package/dist/{resolve-XWYJYKKH.js.map → resolve-ZSUEJK3E.js.map} +0 -0
- /package/dist/{utils-RBXHHCLI.js.map → utils-H2BL5GNR.js.map} +0 -0
- /package/dist/{watch-5CCMTZ7F.js.map → watch-D6ODQLPJ.js.map} +0 -0
package/dist/mcp.js
CHANGED
|
@@ -10,15 +10,15 @@ import {
|
|
|
10
10
|
markdownToHwpx,
|
|
11
11
|
markdownToXlsx,
|
|
12
12
|
parse
|
|
13
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-25ZYYLVP.js";
|
|
14
14
|
import {
|
|
15
15
|
KordocError,
|
|
16
16
|
VERSION,
|
|
17
17
|
sanitizeError,
|
|
18
18
|
toArrayBuffer
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-5CILZHRW.js";
|
|
20
20
|
import "./chunk-MOL7MDBG.js";
|
|
21
|
-
import "./chunk-
|
|
21
|
+
import "./chunk-S7BHLD2V.js";
|
|
22
22
|
import "./chunk-YW5G6BCJ.js";
|
|
23
23
|
import {
|
|
24
24
|
createLoggerFromEnv,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
createCliOcrProvider
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-S7BHLD2V.js";
|
|
5
5
|
import {
|
|
6
6
|
detectAvailableOcr,
|
|
7
7
|
validateOcrMode
|
|
@@ -22,7 +22,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
22
22
|
if (mode !== "auto") {
|
|
23
23
|
validateOcrMode(mode);
|
|
24
24
|
if (mode === "gemini" || mode === "claude" || mode === "codex") {
|
|
25
|
-
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-
|
|
25
|
+
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-XRF6F26E.js");
|
|
26
26
|
const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[mode];
|
|
27
27
|
if (effectiveBatch > 1) {
|
|
28
28
|
logger.log({ level: "info", event: "done", message: "Batch CLI provider \uC120\uD0DD", meta: { mode, batchSize: effectiveBatch } });
|
|
@@ -48,7 +48,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
48
48
|
});
|
|
49
49
|
}
|
|
50
50
|
if (detected === "gemini" || detected === "codex" || detected === "claude") {
|
|
51
|
-
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-
|
|
51
|
+
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-XRF6F26E.js");
|
|
52
52
|
const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[detected];
|
|
53
53
|
if (effectiveBatch > 1) {
|
|
54
54
|
logger.log({ level: "info", event: "done", message: "AUTO: Batch CLI provider \uC120\uD0DD", meta: { mode: detected, batchSize: effectiveBatch } });
|
|
@@ -63,4 +63,4 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
63
63
|
export {
|
|
64
64
|
resolveOcrProvider
|
|
65
65
|
};
|
|
66
|
-
//# sourceMappingURL=resolve-
|
|
66
|
+
//# sourceMappingURL=resolve-ZSUEJK3E.js.map
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
sanitizeError,
|
|
10
10
|
sanitizeHref,
|
|
11
11
|
toArrayBuffer
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-5CILZHRW.js";
|
|
13
13
|
import "./chunk-ZWE3DS7E.js";
|
|
14
14
|
export {
|
|
15
15
|
KordocError,
|
|
@@ -22,4 +22,4 @@ export {
|
|
|
22
22
|
sanitizeHref,
|
|
23
23
|
toArrayBuffer
|
|
24
24
|
};
|
|
25
|
-
//# sourceMappingURL=utils-
|
|
25
|
+
//# sourceMappingURL=utils-H2BL5GNR.js.map
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
import {
|
|
3
3
|
detectFormat,
|
|
4
4
|
parse
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-25ZYYLVP.js";
|
|
6
6
|
import {
|
|
7
7
|
toArrayBuffer
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-5CILZHRW.js";
|
|
9
9
|
import "./chunk-MOL7MDBG.js";
|
|
10
|
-
import "./chunk-
|
|
10
|
+
import "./chunk-S7BHLD2V.js";
|
|
11
11
|
import "./chunk-YW5G6BCJ.js";
|
|
12
12
|
import "./chunk-I6YC6ZGK.js";
|
|
13
13
|
import "./chunk-ZWE3DS7E.js";
|
|
@@ -136,4 +136,4 @@ async function sendWebhook(url, payload) {
|
|
|
136
136
|
export {
|
|
137
137
|
watchDirectory
|
|
138
138
|
};
|
|
139
|
-
//# sourceMappingURL=watch-
|
|
139
|
+
//# sourceMappingURL=watch-D6ODQLPJ.js.map
|
package/package.json
CHANGED
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import "./chunk-ZWE3DS7E.js";
|
|
3
|
-
|
|
4
|
-
// src/ocr/batch-provider.ts
|
|
5
|
-
import { spawn, execSync } from "child_process";
|
|
6
|
-
import { writeFileSync, readFileSync, unlinkSync, mkdirSync } from "fs";
|
|
7
|
-
import { join } from "path";
|
|
8
|
-
import { tmpdir } from "os";
|
|
9
|
-
var BATCH_OCR_PROMPT = "\uB2E4\uC74C \uBB38\uC11C \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0\uB4E4\uC744 OCR\uD558\uC5EC \uC21C\uC218 Markdown\uC73C\uB85C \uBCC0\uD658\uD558\uC138\uC694.\n\n\uADDC\uCE59:\n- \uAC01 \uD398\uC774\uC9C0 \uACB0\uACFC \uC0AC\uC774\uC5D0 \uBC18\uB4DC\uC2DC \uC774 \uAD6C\uBD84\uC790\uB97C \uC0BD\uC785: <!-- PAGE_BREAK -->\n- \uD14C\uC774\uBE14\uC740 Markdown \uD14C\uC774\uBE14 \uBB38\uBC95 \uC0AC\uC6A9 (| \uAD6C\uBD84, |---|---| \uD5E4\uB354 \uAD6C\uBD84\uC120 \uD3EC\uD568)\n- \uBCD1\uD569\uB41C \uC140\uC740 \uD574\uB2F9 \uC704\uCE58\uC5D0 \uB0B4\uC6A9 \uAE30\uC7AC\n- \uD5E4\uB529\uC740 \uAE00\uC790 \uD06C\uAE30\uC5D0 \uB530\uB77C ## ~ ###### \uC0AC\uC6A9\n- \uB9AC\uC2A4\uD2B8\uB294 - \uB610\uB294 1. \uC0AC\uC6A9\n- \uC774\uBBF8\uC9C0, \uB3C4\uD615 \uB4F1 \uBE44\uD14D\uC2A4\uD2B8 \uC694\uC18C\uB294 \uBB34\uC2DC\n- \uC6D0\uBB38\uC758 \uC77D\uAE30 \uC21C\uC11C\uC640 \uAD6C\uC870\uB97C \uC720\uC9C0\n- ```\uB85C \uAC10\uC2F8\uC9C0 \uB9D0\uACE0 \uC21C\uC218 Markdown\uB9CC \uCD9C\uB825";
|
|
10
|
-
var DEFAULT_BATCH_SIZES = {
|
|
11
|
-
gemini: 5,
|
|
12
|
-
claude: 5,
|
|
13
|
-
codex: 10
|
|
14
|
-
};
|
|
15
|
-
var _batchTempDir = null;
|
|
16
|
-
function getBatchTempDir() {
|
|
17
|
-
if (!_batchTempDir) {
|
|
18
|
-
_batchTempDir = join(process.cwd(), ".kordoc_ocr_tmp");
|
|
19
|
-
mkdirSync(_batchTempDir, { recursive: true });
|
|
20
|
-
if (process.platform === "win32") {
|
|
21
|
-
try {
|
|
22
|
-
execSync(`attrib +h "${_batchTempDir}"`, { stdio: "ignore" });
|
|
23
|
-
} catch {
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
return _batchTempDir;
|
|
28
|
-
}
|
|
29
|
-
function createBatchCliProvider(mode, batchSize) {
|
|
30
|
-
return {
|
|
31
|
-
__batch: true,
|
|
32
|
-
batchSize,
|
|
33
|
-
async processBatch(pages) {
|
|
34
|
-
const results = /* @__PURE__ */ new Map();
|
|
35
|
-
const tempDir = getBatchTempDir();
|
|
36
|
-
const tempFiles = [];
|
|
37
|
-
try {
|
|
38
|
-
for (const { image, pageNum } of pages) {
|
|
39
|
-
const path = join(tempDir, `batch-p${pageNum}.png`);
|
|
40
|
-
writeFileSync(path, image);
|
|
41
|
-
tempFiles.push(path);
|
|
42
|
-
}
|
|
43
|
-
let output;
|
|
44
|
-
if (mode === "codex") {
|
|
45
|
-
output = await callBatchCodexCli(tempFiles);
|
|
46
|
-
} else {
|
|
47
|
-
output = await callBatchCli(mode, tempFiles);
|
|
48
|
-
}
|
|
49
|
-
const cleaned = stripCodeFence(output.trim());
|
|
50
|
-
const parts = cleaned.split(/<!--\s*PAGE_BREAK\s*-->/).map((p) => p.trim()).filter((p) => p.length > 0);
|
|
51
|
-
for (let i = 0; i < pages.length; i++) {
|
|
52
|
-
const pageNum = pages[i].pageNum;
|
|
53
|
-
if (i < parts.length) {
|
|
54
|
-
results.set(pageNum, { markdown: parts[i] });
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
} finally {
|
|
58
|
-
for (const f of tempFiles) {
|
|
59
|
-
try {
|
|
60
|
-
unlinkSync(f);
|
|
61
|
-
} catch {
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
return results;
|
|
66
|
-
}
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
function spawnAsync(cmd, args, opts) {
|
|
70
|
-
return new Promise((resolve, reject) => {
|
|
71
|
-
const child = spawn(cmd, args, {
|
|
72
|
-
cwd: opts.cwd,
|
|
73
|
-
env: process.env,
|
|
74
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
75
|
-
shell: process.platform === "win32"
|
|
76
|
-
});
|
|
77
|
-
let stdout = "";
|
|
78
|
-
let stderr = "";
|
|
79
|
-
let killed = false;
|
|
80
|
-
child.stdout.setEncoding("utf-8");
|
|
81
|
-
child.stderr.setEncoding("utf-8");
|
|
82
|
-
child.stdout.on("data", (d) => {
|
|
83
|
-
stdout += d;
|
|
84
|
-
});
|
|
85
|
-
child.stderr.on("data", (d) => {
|
|
86
|
-
stderr += d;
|
|
87
|
-
});
|
|
88
|
-
const timer = setTimeout(() => {
|
|
89
|
-
killed = true;
|
|
90
|
-
if (process.platform === "win32") {
|
|
91
|
-
child.kill();
|
|
92
|
-
} else {
|
|
93
|
-
child.kill("SIGTERM");
|
|
94
|
-
}
|
|
95
|
-
}, opts.timeoutMs);
|
|
96
|
-
if (opts.stdin !== void 0) {
|
|
97
|
-
child.stdin.end(opts.stdin);
|
|
98
|
-
} else {
|
|
99
|
-
child.stdin.end();
|
|
100
|
-
}
|
|
101
|
-
child.on("close", (code) => {
|
|
102
|
-
clearTimeout(timer);
|
|
103
|
-
if (killed) {
|
|
104
|
-
reject(new Error(`\uD0C0\uC784\uC544\uC6C3 (${Math.round(opts.timeoutMs / 1e3)}\uCD08)`));
|
|
105
|
-
} else {
|
|
106
|
-
resolve({ stdout, stderr, exitCode: code ?? 1 });
|
|
107
|
-
}
|
|
108
|
-
});
|
|
109
|
-
child.on("error", (err) => {
|
|
110
|
-
clearTimeout(timer);
|
|
111
|
-
reject(err);
|
|
112
|
-
});
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
async function callBatchCli(mode, imagePaths) {
|
|
116
|
-
const fileRefs = imagePaths.map((p) => `@${p.replace(/\\/g, "/")}`).join("\n");
|
|
117
|
-
const prompt = `${BATCH_OCR_PROMPT}
|
|
118
|
-
|
|
119
|
-
${fileRefs}`;
|
|
120
|
-
let args;
|
|
121
|
-
if (mode === "gemini") {
|
|
122
|
-
const model = process.env.KORDOC_GEMINI_MODEL ?? "gemini-2.5-flash";
|
|
123
|
-
args = ["--prompt", prompt, "--yolo", "--model", model];
|
|
124
|
-
} else {
|
|
125
|
-
args = ["--print", prompt];
|
|
126
|
-
const model = process.env.KORDOC_CLAUDE_MODEL;
|
|
127
|
-
if (model) args.push("--model", model);
|
|
128
|
-
}
|
|
129
|
-
const timeoutMs = 6e4 + imagePaths.length * 2e4;
|
|
130
|
-
const result = await spawnAsync(mode, args, {
|
|
131
|
-
timeoutMs,
|
|
132
|
-
...mode === "claude" ? { cwd: tmpdir() } : {}
|
|
133
|
-
});
|
|
134
|
-
if (result.exitCode !== 0) {
|
|
135
|
-
const errMsg = result.stderr?.trim() || `exit code ${result.exitCode}`;
|
|
136
|
-
throw new Error(`${mode} \uBC30\uCE58 OCR \uC2E4\uD328: ${errMsg}`);
|
|
137
|
-
}
|
|
138
|
-
const output = result.stdout || "";
|
|
139
|
-
checkForLimitError(output, mode);
|
|
140
|
-
return output;
|
|
141
|
-
}
|
|
142
|
-
async function callBatchCodexCli(imagePaths) {
|
|
143
|
-
const outPath = join(tmpdir(), `kordoc-codex-batch-${Date.now()}-${Math.random().toString(36).slice(2)}.txt`);
|
|
144
|
-
try {
|
|
145
|
-
const args = ["exec", BATCH_OCR_PROMPT];
|
|
146
|
-
for (const p of imagePaths) {
|
|
147
|
-
args.push("--image", p);
|
|
148
|
-
}
|
|
149
|
-
args.push("--output-last-message", outPath);
|
|
150
|
-
const model = process.env.KORDOC_CODEX_MODEL;
|
|
151
|
-
if (model) args.push("--model", model);
|
|
152
|
-
const timeoutMs = 6e4 + imagePaths.length * 2e4;
|
|
153
|
-
const result = await spawnAsync("codex", args, {
|
|
154
|
-
timeoutMs,
|
|
155
|
-
stdin: ""
|
|
156
|
-
});
|
|
157
|
-
if (result.exitCode !== 0) {
|
|
158
|
-
const errMsg = result.stderr?.trim() || `exit code ${result.exitCode}`;
|
|
159
|
-
throw new Error(`codex \uBC30\uCE58 OCR \uC2E4\uD328: ${errMsg}`);
|
|
160
|
-
}
|
|
161
|
-
let text;
|
|
162
|
-
try {
|
|
163
|
-
text = readFileSync(outPath, "utf-8");
|
|
164
|
-
} catch {
|
|
165
|
-
text = result.stdout || "";
|
|
166
|
-
}
|
|
167
|
-
checkForLimitError(text, "codex");
|
|
168
|
-
return text;
|
|
169
|
-
} finally {
|
|
170
|
-
try {
|
|
171
|
-
unlinkSync(outPath);
|
|
172
|
-
} catch {
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
function checkForLimitError(output, mode) {
|
|
177
|
-
const lower = output.toLowerCase();
|
|
178
|
-
if (lower.includes("usage limit") || lower.includes("rate limit")) {
|
|
179
|
-
throw new Error(`${mode} \uC0AC\uC6A9\uB7C9/\uC18D\uB3C4 \uC81C\uD55C: ${output.trim().slice(0, 200)}`);
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
function stripCodeFence(text) {
|
|
183
|
-
const match = text.match(/^```(?:markdown|md)?\s*\n([\s\S]*?)\n```\s*$/m);
|
|
184
|
-
return match ? match[1].trim() : text;
|
|
185
|
-
}
|
|
186
|
-
export {
|
|
187
|
-
DEFAULT_BATCH_SIZES,
|
|
188
|
-
createBatchCliProvider
|
|
189
|
-
};
|
|
190
|
-
//# sourceMappingURL=batch-provider-5BFJRKAZ.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/ocr/batch-provider.ts"],"sourcesContent":["/**\n * CLI 배치 OCR 프로바이더\n *\n * 여러 페이지 이미지를 단일 CLI 호출로 처리하여 API 호출 수를 대폭 감소.\n * gemini/claude: @file 멀티 참조, codex: --image 멀티 플래그\n *\n * 299페이지 기준:\n * - 기존: CLI 299회 호출 (~30분)\n * - 배치: CLI 3~6회 호출 (~3분)\n */\n\nimport { spawn, execSync } from \"child_process\"\nimport { writeFileSync, readFileSync, unlinkSync, mkdirSync } from \"fs\"\nimport { join } from \"path\"\nimport { tmpdir } from \"os\"\nimport type { StructuredOcrResult, BatchOcrProvider } from \"../types.js\"\n\n/** 배치 OCR 프롬프트 */\nconst BATCH_OCR_PROMPT =\n \"다음 문서 페이지 이미지들을 OCR하여 순수 Markdown으로 변환하세요.\\n\\n\" +\n \"규칙:\\n\" +\n \"- 각 페이지 결과 사이에 반드시 이 구분자를 삽입: <!-- PAGE_BREAK -->\\n\" +\n \"- 테이블은 Markdown 테이블 문법 사용 (| 구분, |---|---| 헤더 구분선 포함)\\n\" +\n \"- 병합된 셀은 해당 위치에 내용 기재\\n\" +\n \"- 헤딩은 글자 크기에 따라 ## ~ ###### 사용\\n\" +\n \"- 리스트는 - 또는 1. 사용\\n\" +\n \"- 이미지, 도형 등 비텍스트 요소는 무시\\n\" +\n \"- 원문의 읽기 순서와 구조를 유지\\n\" +\n \"- ```로 감싸지 말고 순수 Markdown만 출력\"\n\n/** 모드별 기본 배치 크기 (CLI 내부 타임아웃 + 실측 기반)\n *\n * gemini CLI: 10장 이상에서 AbortError 발생 (내부 타임아웃).\n * 5장 배치가 안정적으로 동작 확인 (35초/배치).\n * 299페이지 = 60배치 = 기존 299회 대비 80% 감소.\n */\nexport const DEFAULT_BATCH_SIZES: Record<string, number> = {\n gemini: 5,\n claude: 5,\n codex: 10,\n}\n\n/**\n * 임시 디렉토리 — gemini CLI는 cwd 하위 + gitignore 밖만 @참조 가능\n *\n * 숨김 처리:\n * - macOS/Linux: '.' 접두사로 기본 숨김 (ls -a 로만 표시)\n * - Windows: '.' 접두사 + attrib +h 로 숨김 속성 부여\n */\nlet _batchTempDir: string | null = null\nfunction getBatchTempDir(): string {\n if (!_batchTempDir) {\n _batchTempDir = join(process.cwd(), \".kordoc_ocr_tmp\")\n mkdirSync(_batchTempDir, { recursive: true })\n // Windows: dot-prefix만으로 숨김 처리 불충분 → attrib +h 추가\n if (process.platform === \"win32\") {\n try { execSync(`attrib +h \"${_batchTempDir}\"`, { stdio: \"ignore\" }) } catch { /* ignore */ }\n }\n }\n return _batchTempDir\n}\n\n/**\n * 배치 CLI 프로바이더 생성\n */\nexport function createBatchCliProvider(\n mode: \"gemini\" | \"claude\" | \"codex\",\n batchSize: number\n): BatchOcrProvider {\n return {\n __batch: true as const,\n batchSize,\n async processBatch(pages) {\n const results = new Map<number, StructuredOcrResult>()\n const tempDir = getBatchTempDir()\n const tempFiles: string[] = []\n\n try {\n // 1. Write all page images to temp files\n for (const { image, pageNum } of pages) {\n const path = join(tempDir, `batch-p${pageNum}.png`)\n writeFileSync(path, image)\n tempFiles.push(path)\n }\n\n // 2. Call CLI with all file references (비동기 — 병렬 배치 실행 가능)\n let output: string\n if (mode === \"codex\") {\n output = await callBatchCodexCli(tempFiles)\n } else {\n output = await callBatchCli(mode, tempFiles)\n }\n\n // 3. Parse response by PAGE_BREAK separator\n const cleaned = stripCodeFence(output.trim())\n const parts = cleaned.split(/<!--\\s*PAGE_BREAK\\s*-->/)\n .map(p => p.trim())\n .filter(p => p.length > 0)\n\n // 4. Map results to page numbers (best-effort if count mismatch)\n for (let i = 0; i < pages.length; i++) {\n const pageNum = pages[i].pageNum\n if (i < parts.length) {\n results.set(pageNum, { markdown: parts[i] })\n }\n // If fewer parts than pages, remaining pages get no result\n }\n } finally {\n // 5. Clean up temp files\n for (const f of tempFiles) {\n try { unlinkSync(f) } catch { /* ignore */ }\n }\n }\n\n return results\n },\n }\n}\n\n/**\n * 비동기 CLI 실행 헬퍼 — spawn + Promise 래핑.\n * spawnSync는 이벤트 루프를 차단하여 병렬 배치 실행 불가.\n */\nfunction spawnAsync(\n cmd: string,\n args: string[],\n opts: { timeoutMs: number; cwd?: string; stdin?: string }\n): Promise<{ stdout: string; stderr: string; exitCode: number }> {\n return new Promise((resolve, reject) => {\n const child = spawn(cmd, args, {\n cwd: opts.cwd,\n env: process.env,\n stdio: [\"pipe\", \"pipe\", \"pipe\"],\n shell: process.platform === \"win32\",\n })\n\n let stdout = \"\"\n let stderr = \"\"\n let killed = false\n\n child.stdout.setEncoding(\"utf-8\")\n child.stderr.setEncoding(\"utf-8\")\n child.stdout.on(\"data\", (d: string) => { stdout += d })\n child.stderr.on(\"data\", (d: string) => { stderr += d })\n\n const timer = setTimeout(() => {\n killed = true\n if (process.platform === \"win32\") {\n child.kill()\n } else {\n child.kill(\"SIGTERM\")\n }\n }, opts.timeoutMs)\n\n if (opts.stdin !== undefined) {\n child.stdin.end(opts.stdin)\n } else {\n child.stdin.end()\n }\n\n child.on(\"close\", (code) => {\n clearTimeout(timer)\n if (killed) {\n reject(new Error(`타임아웃 (${Math.round(opts.timeoutMs / 1000)}초)`))\n } else {\n resolve({ stdout, stderr, exitCode: code ?? 1 })\n }\n })\n child.on(\"error\", (err) => {\n clearTimeout(timer)\n reject(err)\n })\n })\n}\n\n/** gemini/claude 배치 호출 (비동기) */\nasync function callBatchCli(mode: \"gemini\" | \"claude\", imagePaths: string[]): Promise<string> {\n const fileRefs = imagePaths.map(p => `@${p.replace(/\\\\/g, \"/\")}`).join(\"\\n\")\n const prompt = `${BATCH_OCR_PROMPT}\\n\\n${fileRefs}`\n\n let args: string[]\n if (mode === \"gemini\") {\n const model = process.env.KORDOC_GEMINI_MODEL ?? \"gemini-2.5-flash\"\n args = [\"--prompt\", prompt, \"--yolo\", \"--model\", model]\n } else {\n args = [\"--print\", prompt]\n const model = process.env.KORDOC_CLAUDE_MODEL\n if (model) args.push(\"--model\", model)\n }\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = await spawnAsync(mode, args, {\n timeoutMs,\n ...(mode === \"claude\" ? { cwd: tmpdir() } : {}),\n })\n\n if (result.exitCode !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.exitCode}`\n throw new Error(`${mode} 배치 OCR 실패: ${errMsg}`)\n }\n\n const output = result.stdout || \"\"\n checkForLimitError(output, mode)\n return output\n}\n\n/** codex 배치 호출 (비동기) — --image를 여러 번 지정 */\nasync function callBatchCodexCli(imagePaths: string[]): Promise<string> {\n const outPath = join(tmpdir(), `kordoc-codex-batch-${Date.now()}-${Math.random().toString(36).slice(2)}.txt`)\n try {\n const args = [\"exec\", BATCH_OCR_PROMPT]\n for (const p of imagePaths) {\n args.push(\"--image\", p)\n }\n args.push(\"--output-last-message\", outPath)\n const model = process.env.KORDOC_CODEX_MODEL\n if (model) args.push(\"--model\", model)\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = await spawnAsync(\"codex\", args, {\n timeoutMs,\n stdin: \"\",\n })\n\n if (result.exitCode !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.exitCode}`\n throw new Error(`codex 배치 OCR 실패: ${errMsg}`)\n }\n\n let text: string\n try {\n text = readFileSync(outPath, \"utf-8\")\n } catch {\n text = result.stdout || \"\"\n }\n checkForLimitError(text, \"codex\")\n return text\n } finally {\n try { unlinkSync(outPath) } catch { /* ignore */ }\n }\n}\n\n/**\n * 출력 텍스트에서 사용량·속도 제한 에러 감지.\n * 해당 메시지가 포함된 경우 throw하여 다음 엔진으로 fallback 트리거.\n */\nfunction checkForLimitError(output: string, mode: string): void {\n const lower = output.toLowerCase()\n if (lower.includes(\"usage limit\") || lower.includes(\"rate limit\")) {\n throw new Error(`${mode} 사용량/속도 제한: ${output.trim().slice(0, 200)}`)\n }\n}\n\n/** LLM 출력에서 코드 펜스 제거 (cli-provider.ts와 동일 로직) */\nfunction stripCodeFence(text: string): string {\n const match = text.match(/^```(?:markdown|md)?\\s*\\n([\\s\\S]*?)\\n```\\s*$/m)\n return match ? match[1].trim() : text\n}\n"],"mappings":";;;;AAWA,SAAS,OAAO,gBAAgB;AAChC,SAAS,eAAe,cAAc,YAAY,iBAAiB;AACnE,SAAS,YAAY;AACrB,SAAS,cAAc;AAIvB,IAAM,mBACJ;AAiBK,IAAM,sBAA8C;AAAA,EACzD,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,OAAO;AACT;AASA,IAAI,gBAA+B;AACnC,SAAS,kBAA0B;AACjC,MAAI,CAAC,eAAe;AAClB,oBAAgB,KAAK,QAAQ,IAAI,GAAG,iBAAiB;AACrD,cAAU,eAAe,EAAE,WAAW,KAAK,CAAC;AAE5C,QAAI,QAAQ,aAAa,SAAS;AAChC,UAAI;AAAE,iBAAS,cAAc,aAAa,KAAK,EAAE,OAAO,SAAS,CAAC;AAAA,MAAE,QAAQ;AAAA,MAAe;AAAA,IAC7F;AAAA,EACF;AACA,SAAO;AACT;AAKO,SAAS,uBACd,MACA,WACkB;AAClB,SAAO;AAAA,IACL,SAAS;AAAA,IACT;AAAA,IACA,MAAM,aAAa,OAAO;AACxB,YAAM,UAAU,oBAAI,IAAiC;AACrD,YAAM,UAAU,gBAAgB;AAChC,YAAM,YAAsB,CAAC;AAE7B,UAAI;AAEF,mBAAW,EAAE,OAAO,QAAQ,KAAK,OAAO;AACtC,gBAAM,OAAO,KAAK,SAAS,UAAU,OAAO,MAAM;AAClD,wBAAc,MAAM,KAAK;AACzB,oBAAU,KAAK,IAAI;AAAA,QACrB;AAGA,YAAI;AACJ,YAAI,SAAS,SAAS;AACpB,mBAAS,MAAM,kBAAkB,SAAS;AAAA,QAC5C,OAAO;AACL,mBAAS,MAAM,aAAa,MAAM,SAAS;AAAA,QAC7C;AAGA,cAAM,UAAU,eAAe,OAAO,KAAK,CAAC;AAC5C,cAAM,QAAQ,QAAQ,MAAM,yBAAyB,EAClD,IAAI,OAAK,EAAE,KAAK,CAAC,EACjB,OAAO,OAAK,EAAE,SAAS,CAAC;AAG3B,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,gBAAM,UAAU,MAAM,CAAC,EAAE;AACzB,cAAI,IAAI,MAAM,QAAQ;AACpB,oBAAQ,IAAI,SAAS,EAAE,UAAU,MAAM,CAAC,EAAE,CAAC;AAAA,UAC7C;AAAA,QAEF;AAAA,MACF,UAAE;AAEA,mBAAW,KAAK,WAAW;AACzB,cAAI;AAAE,uBAAW,CAAC;AAAA,UAAE,QAAQ;AAAA,UAAe;AAAA,QAC7C;AAAA,MACF;AAEA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAMA,SAAS,WACP,KACA,MACA,MAC+D;AAC/D,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,QAAQ,MAAM,KAAK,MAAM;AAAA,MAC7B,KAAK,KAAK;AAAA,MACV,KAAK,QAAQ;AAAA,MACb,OAAO,CAAC,QAAQ,QAAQ,MAAM;AAAA,MAC9B,OAAO,QAAQ,aAAa;AAAA,IAC9B,CAAC;AAED,QAAI,SAAS;AACb,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,UAAM,OAAO,YAAY,OAAO;AAChC,UAAM,OAAO,YAAY,OAAO;AAChC,UAAM,OAAO,GAAG,QAAQ,CAAC,MAAc;AAAE,gBAAU;AAAA,IAAE,CAAC;AACtD,UAAM,OAAO,GAAG,QAAQ,CAAC,MAAc;AAAE,gBAAU;AAAA,IAAE,CAAC;AAEtD,UAAM,QAAQ,WAAW,MAAM;AAC7B,eAAS;AACT,UAAI,QAAQ,aAAa,SAAS;AAChC,cAAM,KAAK;AAAA,MACb,OAAO;AACL,cAAM,KAAK,SAAS;AAAA,MACtB;AAAA,IACF,GAAG,KAAK,SAAS;AAEjB,QAAI,KAAK,UAAU,QAAW;AAC5B,YAAM,MAAM,IAAI,KAAK,KAAK;AAAA,IAC5B,OAAO;AACL,YAAM,MAAM,IAAI;AAAA,IAClB;AAEA,UAAM,GAAG,SAAS,CAAC,SAAS;AAC1B,mBAAa,KAAK;AAClB,UAAI,QAAQ;AACV,eAAO,IAAI,MAAM,6BAAS,KAAK,MAAM,KAAK,YAAY,GAAI,CAAC,SAAI,CAAC;AAAA,MAClE,OAAO;AACL,gBAAQ,EAAE,QAAQ,QAAQ,UAAU,QAAQ,EAAE,CAAC;AAAA,MACjD;AAAA,IACF,CAAC;AACD,UAAM,GAAG,SAAS,CAAC,QAAQ;AACzB,mBAAa,KAAK;AAClB,aAAO,GAAG;AAAA,IACZ,CAAC;AAAA,EACH,CAAC;AACH;AAGA,eAAe,aAAa,MAA2B,YAAuC;AAC5F,QAAM,WAAW,WAAW,IAAI,OAAK,IAAI,EAAE,QAAQ,OAAO,GAAG,CAAC,EAAE,EAAE,KAAK,IAAI;AAC3E,QAAM,SAAS,GAAG,gBAAgB;AAAA;AAAA,EAAO,QAAQ;AAEjD,MAAI;AACJ,MAAI,SAAS,UAAU;AACrB,UAAM,QAAQ,QAAQ,IAAI,uBAAuB;AACjD,WAAO,CAAC,YAAY,QAAQ,UAAU,WAAW,KAAK;AAAA,EACxD,OAAO;AACL,WAAO,CAAC,WAAW,MAAM;AACzB,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAAA,EACvC;AAEA,QAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,QAAM,SAAS,MAAM,WAAW,MAAM,MAAM;AAAA,IAC1C;AAAA,IACA,GAAI,SAAS,WAAW,EAAE,KAAK,OAAO,EAAE,IAAI,CAAC;AAAA,EAC/C,CAAC;AAED,MAAI,OAAO,aAAa,GAAG;AACzB,UAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,QAAQ;AACpE,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,MAAM,EAAE;AAAA,EAChD;AAEA,QAAM,SAAS,OAAO,UAAU;AAChC,qBAAmB,QAAQ,IAAI;AAC/B,SAAO;AACT;AAGA,eAAe,kBAAkB,YAAuC;AACtE,QAAM,UAAU,KAAK,OAAO,GAAG,sBAAsB,KAAK,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,CAAC,CAAC,MAAM;AAC5G,MAAI;AACF,UAAM,OAAO,CAAC,QAAQ,gBAAgB;AACtC,eAAW,KAAK,YAAY;AAC1B,WAAK,KAAK,WAAW,CAAC;AAAA,IACxB;AACA,SAAK,KAAK,yBAAyB,OAAO;AAC1C,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAErC,UAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,UAAM,SAAS,MAAM,WAAW,SAAS,MAAM;AAAA,MAC7C;AAAA,MACA,OAAO;AAAA,IACT,CAAC;AAED,QAAI,OAAO,aAAa,GAAG;AACzB,YAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,QAAQ;AACpE,YAAM,IAAI,MAAM,wCAAoB,MAAM,EAAE;AAAA,IAC9C;AAEA,QAAI;AACJ,QAAI;AACF,aAAO,aAAa,SAAS,OAAO;AAAA,IACtC,QAAQ;AACN,aAAO,OAAO,UAAU;AAAA,IAC1B;AACA,uBAAmB,MAAM,OAAO;AAChC,WAAO;AAAA,EACT,UAAE;AACA,QAAI;AAAE,iBAAW,OAAO;AAAA,IAAE,QAAQ;AAAA,IAAe;AAAA,EACnD;AACF;AAMA,SAAS,mBAAmB,QAAgB,MAAoB;AAC9D,QAAM,QAAQ,OAAO,YAAY;AACjC,MAAI,MAAM,SAAS,aAAa,KAAK,MAAM,SAAS,YAAY,GAAG;AACjE,UAAM,IAAI,MAAM,GAAG,IAAI,kDAAe,OAAO,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,EACrE;AACF;AAGA,SAAS,eAAe,MAAsB;AAC5C,QAAM,QAAQ,KAAK,MAAM,+CAA+C;AACxE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;","names":[]}
|
package/dist/chunk-Y4WFKJ5P.js
DELETED
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
// src/ocr/cli-provider.ts
|
|
4
|
-
import { spawnSync } from "child_process";
|
|
5
|
-
import { writeFileSync, readFileSync, unlinkSync, mkdirSync } from "fs";
|
|
6
|
-
import { join } from "path";
|
|
7
|
-
import { tmpdir } from "os";
|
|
8
|
-
var OCR_PROMPT = `\uC774 PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0\uC5D0\uC11C \uD14D\uC2A4\uD2B8\uC640 \uD14C\uC774\uBE14\uC744 \uCD94\uCD9C\uD558\uC5EC \uC21C\uC218 Markdown\uC73C\uB85C \uBCC0\uD658\uD558\uC138\uC694.
|
|
9
|
-
\uADDC\uCE59:
|
|
10
|
-
- \uD14C\uC774\uBE14\uC740 Markdown \uD14C\uC774\uBE14 \uBB38\uBC95 \uC0AC\uC6A9 (| \uAD6C\uBD84, |---|---| \uD5E4\uB354 \uAD6C\uBD84\uC120 \uD3EC\uD568)
|
|
11
|
-
- \uBCD1\uD569\uB41C \uC140\uC740 \uD574\uB2F9 \uC704\uCE58\uC5D0 \uB0B4\uC6A9 \uAE30\uC7AC
|
|
12
|
-
- \uD5E4\uB529\uC740 \uAE00\uC790 \uD06C\uAE30\uC5D0 \uB530\uB77C ## ~ ###### \uC0AC\uC6A9
|
|
13
|
-
- \uB9AC\uC2A4\uD2B8\uB294 - \uB610\uB294 1. \uC0AC\uC6A9
|
|
14
|
-
- \uC774\uBBF8\uC9C0, \uB3C4\uD615 \uB4F1 \uBE44\uD14D\uC2A4\uD2B8 \uC694\uC18C\uB294 \uBB34\uC2DC
|
|
15
|
-
- \uC6D0\uBB38\uC758 \uC77D\uAE30 \uC21C\uC11C\uC640 \uAD6C\uC870\uB97C \uC720\uC9C0
|
|
16
|
-
- \`\`\`\uB85C \uAC10\uC2F8\uC9C0 \uB9D0\uACE0 \uC21C\uC218 Markdown\uB9CC \uCD9C\uB825`;
|
|
17
|
-
var _tempDir = null;
|
|
18
|
-
function getTempDir() {
|
|
19
|
-
if (!_tempDir) {
|
|
20
|
-
_tempDir = join(process.cwd(), ".kordoc_ocr_tmp");
|
|
21
|
-
mkdirSync(_tempDir, { recursive: true });
|
|
22
|
-
}
|
|
23
|
-
return _tempDir;
|
|
24
|
-
}
|
|
25
|
-
function createCliOcrProvider(mode) {
|
|
26
|
-
return async (pageImage, pageNumber) => {
|
|
27
|
-
const tempPath = join(getTempDir(), `page-${pageNumber}.png`);
|
|
28
|
-
try {
|
|
29
|
-
writeFileSync(tempPath, pageImage);
|
|
30
|
-
let output;
|
|
31
|
-
if (mode === "ollama") {
|
|
32
|
-
output = await callOllamaApi(tempPath);
|
|
33
|
-
} else {
|
|
34
|
-
output = callCli(mode, tempPath);
|
|
35
|
-
}
|
|
36
|
-
return { markdown: stripCodeFence(output.trim()) };
|
|
37
|
-
} finally {
|
|
38
|
-
try {
|
|
39
|
-
unlinkSync(tempPath);
|
|
40
|
-
} catch {
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
};
|
|
44
|
-
}
|
|
45
|
-
function checkForLimitError(output, mode) {
|
|
46
|
-
const lower = output.toLowerCase();
|
|
47
|
-
if (lower.includes("usage limit") || lower.includes("rate limit")) {
|
|
48
|
-
throw new Error(`${mode} \uC0AC\uC6A9\uB7C9/\uC18D\uB3C4 \uC81C\uD55C: ${output.trim().slice(0, 200)}`);
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
function callCli(mode, imagePath) {
|
|
52
|
-
if (mode === "codex") {
|
|
53
|
-
return callCodexCli(imagePath);
|
|
54
|
-
}
|
|
55
|
-
const args = buildCliArgs(mode, imagePath);
|
|
56
|
-
const result = spawnSync(mode, args, {
|
|
57
|
-
encoding: "utf-8",
|
|
58
|
-
timeout: 6e5,
|
|
59
|
-
maxBuffer: 10 * 1024 * 1024,
|
|
60
|
-
shell: process.platform === "win32",
|
|
61
|
-
// claude: /tmp에서 실행하여 프로젝트 CLAUDE.md의 규칙 간섭 방지
|
|
62
|
-
...mode === "claude" ? { cwd: tmpdir() } : {}
|
|
63
|
-
});
|
|
64
|
-
if (result.error) {
|
|
65
|
-
throw new Error(`${mode} CLI \uC2E4\uD589 \uC2E4\uD328: ${result.error.message}`);
|
|
66
|
-
}
|
|
67
|
-
if (result.status !== 0) {
|
|
68
|
-
const errMsg = result.stderr?.trim() || `exit code ${result.status}`;
|
|
69
|
-
throw new Error(`${mode} OCR \uC2E4\uD328: ${errMsg}`);
|
|
70
|
-
}
|
|
71
|
-
const output = result.stdout || "";
|
|
72
|
-
checkForLimitError(output, mode);
|
|
73
|
-
return output;
|
|
74
|
-
}
|
|
75
|
-
function callCodexCli(imagePath) {
|
|
76
|
-
const outPath = join(tmpdir(), `kordoc-codex-out-${Date.now()}.txt`);
|
|
77
|
-
try {
|
|
78
|
-
const args = ["exec", OCR_PROMPT, "--image", imagePath, "--output-last-message", outPath];
|
|
79
|
-
const model = process.env.KORDOC_CODEX_MODEL;
|
|
80
|
-
if (model) args.push("--model", model);
|
|
81
|
-
const result = spawnSync("codex", args, {
|
|
82
|
-
encoding: "utf-8",
|
|
83
|
-
timeout: 18e4,
|
|
84
|
-
maxBuffer: 10 * 1024 * 1024,
|
|
85
|
-
input: "",
|
|
86
|
-
// stdin EOF 즉시 전달 (대화형 입력 차단)
|
|
87
|
-
shell: process.platform === "win32"
|
|
88
|
-
});
|
|
89
|
-
if (result.error) {
|
|
90
|
-
throw new Error(`codex CLI \uC2E4\uD589 \uC2E4\uD328: ${result.error.message}`);
|
|
91
|
-
}
|
|
92
|
-
if (result.status !== 0) {
|
|
93
|
-
const errMsg = result.stderr?.trim() || `exit code ${result.status}`;
|
|
94
|
-
throw new Error(`codex OCR \uC2E4\uD328: ${errMsg}`);
|
|
95
|
-
}
|
|
96
|
-
let text;
|
|
97
|
-
try {
|
|
98
|
-
text = readFileSync(outPath, "utf-8");
|
|
99
|
-
} catch {
|
|
100
|
-
text = result.stdout || "";
|
|
101
|
-
}
|
|
102
|
-
checkForLimitError(text, "codex");
|
|
103
|
-
return text;
|
|
104
|
-
} finally {
|
|
105
|
-
try {
|
|
106
|
-
unlinkSync(outPath);
|
|
107
|
-
} catch {
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
function buildCliArgs(mode, imagePath) {
|
|
112
|
-
const normalizedPath = imagePath.replace(/\\/g, "/");
|
|
113
|
-
const promptWithImage = `${OCR_PROMPT}
|
|
114
|
-
|
|
115
|
-
\uC774\uBBF8\uC9C0: @${normalizedPath}`;
|
|
116
|
-
switch (mode) {
|
|
117
|
-
case "gemini": {
|
|
118
|
-
const args = ["--prompt", promptWithImage, "--yolo"];
|
|
119
|
-
const model = process.env.KORDOC_GEMINI_MODEL;
|
|
120
|
-
if (model) args.push("--model", model);
|
|
121
|
-
return args;
|
|
122
|
-
}
|
|
123
|
-
case "claude": {
|
|
124
|
-
const args = ["--print", promptWithImage];
|
|
125
|
-
const model = process.env.KORDOC_CLAUDE_MODEL;
|
|
126
|
-
if (model) args.push("--model", model);
|
|
127
|
-
return args;
|
|
128
|
-
}
|
|
129
|
-
default:
|
|
130
|
-
throw new Error(`\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 CLI: ${mode}`);
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
async function callOllamaApi(imagePath) {
|
|
134
|
-
const { readFileSync: readFileSync2 } = await import("fs");
|
|
135
|
-
const imageBase64 = readFileSync2(imagePath).toString("base64");
|
|
136
|
-
const model = process.env.KORDOC_OLLAMA_MODEL || "qwen3-vl:8b";
|
|
137
|
-
const host = process.env.KORDOC_OLLAMA_HOST || "http://localhost:11434";
|
|
138
|
-
const timeoutMs = Number(process.env.KORDOC_OLLAMA_TIMEOUT) || 12e4;
|
|
139
|
-
const response = await fetch(`${host}/api/chat`, {
|
|
140
|
-
method: "POST",
|
|
141
|
-
headers: { "Content-Type": "application/json" },
|
|
142
|
-
body: JSON.stringify({
|
|
143
|
-
model,
|
|
144
|
-
messages: [{
|
|
145
|
-
role: "user",
|
|
146
|
-
content: OCR_PROMPT,
|
|
147
|
-
images: [imageBase64]
|
|
148
|
-
}],
|
|
149
|
-
stream: false
|
|
150
|
-
}),
|
|
151
|
-
signal: AbortSignal.timeout(timeoutMs)
|
|
152
|
-
});
|
|
153
|
-
if (!response.ok) {
|
|
154
|
-
throw new Error(`Ollama API \uC624\uB958: ${response.status} ${response.statusText}`);
|
|
155
|
-
}
|
|
156
|
-
const data = await response.json();
|
|
157
|
-
return data.message?.content || "";
|
|
158
|
-
}
|
|
159
|
-
function stripCodeFence(text) {
|
|
160
|
-
const match = text.match(/^```(?:markdown|md)?\s*([\s\S]*?)```\s*$/m);
|
|
161
|
-
return match ? match[1].trim() : text;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
export {
|
|
165
|
-
createCliOcrProvider
|
|
166
|
-
};
|
|
167
|
-
//# sourceMappingURL=chunk-Y4WFKJ5P.js.map
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|