@clazic/kordoc 2.5.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -1
- package/dist/batch-provider-XRF6F26E.js +234 -0
- package/dist/batch-provider-XRF6F26E.js.map +1 -0
- package/dist/chunk-S7BHLD2V.js +200 -0
- package/dist/{chunk-Y4WFKJ5P.js.map → chunk-S7BHLD2V.js.map} +1 -1
- package/dist/{chunk-IJGNPAK2.js → chunk-TND4YFBV.js} +2 -2
- package/dist/{chunk-QG6BYZMR.js → chunk-TS3F57LY.js} +160 -8
- package/dist/chunk-TS3F57LY.js.map +1 -0
- package/dist/cli.js +53 -6
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +420 -145
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +71 -2
- package/dist/index.d.ts +71 -2
- package/dist/index.js +407 -135
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +44 -3
- package/dist/mcp.js.map +1 -1
- package/dist/{resolve-XWYJYKKH.js → resolve-ZSUEJK3E.js} +4 -4
- package/dist/{utils-RBXHHCLI.js → utils-F66K7PXH.js} +2 -2
- package/dist/{watch-5CCMTZ7F.js → watch-2S5ULHAM.js} +4 -4
- package/package.json +1 -1
- package/dist/batch-provider-5BFJRKAZ.js +0 -190
- package/dist/batch-provider-5BFJRKAZ.js.map +0 -1
- package/dist/chunk-QG6BYZMR.js.map +0 -1
- package/dist/chunk-Y4WFKJ5P.js +0 -167
- /package/dist/{chunk-IJGNPAK2.js.map → chunk-TND4YFBV.js.map} +0 -0
- /package/dist/{resolve-XWYJYKKH.js.map → resolve-ZSUEJK3E.js.map} +0 -0
- /package/dist/{utils-RBXHHCLI.js.map → utils-F66K7PXH.js.map} +0 -0
- /package/dist/{watch-5CCMTZ7F.js.map → watch-2S5ULHAM.js.map} +0 -0
package/dist/chunk-Y4WFKJ5P.js
DELETED
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
// src/ocr/cli-provider.ts
|
|
4
|
-
import { spawnSync } from "child_process";
|
|
5
|
-
import { writeFileSync, readFileSync, unlinkSync, mkdirSync } from "fs";
|
|
6
|
-
import { join } from "path";
|
|
7
|
-
import { tmpdir } from "os";
|
|
8
|
-
var OCR_PROMPT = `\uC774 PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0\uC5D0\uC11C \uD14D\uC2A4\uD2B8\uC640 \uD14C\uC774\uBE14\uC744 \uCD94\uCD9C\uD558\uC5EC \uC21C\uC218 Markdown\uC73C\uB85C \uBCC0\uD658\uD558\uC138\uC694.
|
|
9
|
-
\uADDC\uCE59:
|
|
10
|
-
- \uD14C\uC774\uBE14\uC740 Markdown \uD14C\uC774\uBE14 \uBB38\uBC95 \uC0AC\uC6A9 (| \uAD6C\uBD84, |---|---| \uD5E4\uB354 \uAD6C\uBD84\uC120 \uD3EC\uD568)
|
|
11
|
-
- \uBCD1\uD569\uB41C \uC140\uC740 \uD574\uB2F9 \uC704\uCE58\uC5D0 \uB0B4\uC6A9 \uAE30\uC7AC
|
|
12
|
-
- \uD5E4\uB529\uC740 \uAE00\uC790 \uD06C\uAE30\uC5D0 \uB530\uB77C ## ~ ###### \uC0AC\uC6A9
|
|
13
|
-
- \uB9AC\uC2A4\uD2B8\uB294 - \uB610\uB294 1. \uC0AC\uC6A9
|
|
14
|
-
- \uC774\uBBF8\uC9C0, \uB3C4\uD615 \uB4F1 \uBE44\uD14D\uC2A4\uD2B8 \uC694\uC18C\uB294 \uBB34\uC2DC
|
|
15
|
-
- \uC6D0\uBB38\uC758 \uC77D\uAE30 \uC21C\uC11C\uC640 \uAD6C\uC870\uB97C \uC720\uC9C0
|
|
16
|
-
- \`\`\`\uB85C \uAC10\uC2F8\uC9C0 \uB9D0\uACE0 \uC21C\uC218 Markdown\uB9CC \uCD9C\uB825`;
|
|
17
|
-
var _tempDir = null;
|
|
18
|
-
function getTempDir() {
|
|
19
|
-
if (!_tempDir) {
|
|
20
|
-
_tempDir = join(process.cwd(), ".kordoc_ocr_tmp");
|
|
21
|
-
mkdirSync(_tempDir, { recursive: true });
|
|
22
|
-
}
|
|
23
|
-
return _tempDir;
|
|
24
|
-
}
|
|
25
|
-
function createCliOcrProvider(mode) {
|
|
26
|
-
return async (pageImage, pageNumber) => {
|
|
27
|
-
const tempPath = join(getTempDir(), `page-${pageNumber}.png`);
|
|
28
|
-
try {
|
|
29
|
-
writeFileSync(tempPath, pageImage);
|
|
30
|
-
let output;
|
|
31
|
-
if (mode === "ollama") {
|
|
32
|
-
output = await callOllamaApi(tempPath);
|
|
33
|
-
} else {
|
|
34
|
-
output = callCli(mode, tempPath);
|
|
35
|
-
}
|
|
36
|
-
return { markdown: stripCodeFence(output.trim()) };
|
|
37
|
-
} finally {
|
|
38
|
-
try {
|
|
39
|
-
unlinkSync(tempPath);
|
|
40
|
-
} catch {
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
};
|
|
44
|
-
}
|
|
45
|
-
function checkForLimitError(output, mode) {
|
|
46
|
-
const lower = output.toLowerCase();
|
|
47
|
-
if (lower.includes("usage limit") || lower.includes("rate limit")) {
|
|
48
|
-
throw new Error(`${mode} \uC0AC\uC6A9\uB7C9/\uC18D\uB3C4 \uC81C\uD55C: ${output.trim().slice(0, 200)}`);
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
function callCli(mode, imagePath) {
|
|
52
|
-
if (mode === "codex") {
|
|
53
|
-
return callCodexCli(imagePath);
|
|
54
|
-
}
|
|
55
|
-
const args = buildCliArgs(mode, imagePath);
|
|
56
|
-
const result = spawnSync(mode, args, {
|
|
57
|
-
encoding: "utf-8",
|
|
58
|
-
timeout: 6e5,
|
|
59
|
-
maxBuffer: 10 * 1024 * 1024,
|
|
60
|
-
shell: process.platform === "win32",
|
|
61
|
-
// claude: /tmp에서 실행하여 프로젝트 CLAUDE.md의 규칙 간섭 방지
|
|
62
|
-
...mode === "claude" ? { cwd: tmpdir() } : {}
|
|
63
|
-
});
|
|
64
|
-
if (result.error) {
|
|
65
|
-
throw new Error(`${mode} CLI \uC2E4\uD589 \uC2E4\uD328: ${result.error.message}`);
|
|
66
|
-
}
|
|
67
|
-
if (result.status !== 0) {
|
|
68
|
-
const errMsg = result.stderr?.trim() || `exit code ${result.status}`;
|
|
69
|
-
throw new Error(`${mode} OCR \uC2E4\uD328: ${errMsg}`);
|
|
70
|
-
}
|
|
71
|
-
const output = result.stdout || "";
|
|
72
|
-
checkForLimitError(output, mode);
|
|
73
|
-
return output;
|
|
74
|
-
}
|
|
75
|
-
function callCodexCli(imagePath) {
|
|
76
|
-
const outPath = join(tmpdir(), `kordoc-codex-out-${Date.now()}.txt`);
|
|
77
|
-
try {
|
|
78
|
-
const args = ["exec", OCR_PROMPT, "--image", imagePath, "--output-last-message", outPath];
|
|
79
|
-
const model = process.env.KORDOC_CODEX_MODEL;
|
|
80
|
-
if (model) args.push("--model", model);
|
|
81
|
-
const result = spawnSync("codex", args, {
|
|
82
|
-
encoding: "utf-8",
|
|
83
|
-
timeout: 18e4,
|
|
84
|
-
maxBuffer: 10 * 1024 * 1024,
|
|
85
|
-
input: "",
|
|
86
|
-
// stdin EOF 즉시 전달 (대화형 입력 차단)
|
|
87
|
-
shell: process.platform === "win32"
|
|
88
|
-
});
|
|
89
|
-
if (result.error) {
|
|
90
|
-
throw new Error(`codex CLI \uC2E4\uD589 \uC2E4\uD328: ${result.error.message}`);
|
|
91
|
-
}
|
|
92
|
-
if (result.status !== 0) {
|
|
93
|
-
const errMsg = result.stderr?.trim() || `exit code ${result.status}`;
|
|
94
|
-
throw new Error(`codex OCR \uC2E4\uD328: ${errMsg}`);
|
|
95
|
-
}
|
|
96
|
-
let text;
|
|
97
|
-
try {
|
|
98
|
-
text = readFileSync(outPath, "utf-8");
|
|
99
|
-
} catch {
|
|
100
|
-
text = result.stdout || "";
|
|
101
|
-
}
|
|
102
|
-
checkForLimitError(text, "codex");
|
|
103
|
-
return text;
|
|
104
|
-
} finally {
|
|
105
|
-
try {
|
|
106
|
-
unlinkSync(outPath);
|
|
107
|
-
} catch {
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
function buildCliArgs(mode, imagePath) {
|
|
112
|
-
const normalizedPath = imagePath.replace(/\\/g, "/");
|
|
113
|
-
const promptWithImage = `${OCR_PROMPT}
|
|
114
|
-
|
|
115
|
-
\uC774\uBBF8\uC9C0: @${normalizedPath}`;
|
|
116
|
-
switch (mode) {
|
|
117
|
-
case "gemini": {
|
|
118
|
-
const args = ["--prompt", promptWithImage, "--yolo"];
|
|
119
|
-
const model = process.env.KORDOC_GEMINI_MODEL;
|
|
120
|
-
if (model) args.push("--model", model);
|
|
121
|
-
return args;
|
|
122
|
-
}
|
|
123
|
-
case "claude": {
|
|
124
|
-
const args = ["--print", promptWithImage];
|
|
125
|
-
const model = process.env.KORDOC_CLAUDE_MODEL;
|
|
126
|
-
if (model) args.push("--model", model);
|
|
127
|
-
return args;
|
|
128
|
-
}
|
|
129
|
-
default:
|
|
130
|
-
throw new Error(`\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 CLI: ${mode}`);
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
async function callOllamaApi(imagePath) {
|
|
134
|
-
const { readFileSync: readFileSync2 } = await import("fs");
|
|
135
|
-
const imageBase64 = readFileSync2(imagePath).toString("base64");
|
|
136
|
-
const model = process.env.KORDOC_OLLAMA_MODEL || "qwen3-vl:8b";
|
|
137
|
-
const host = process.env.KORDOC_OLLAMA_HOST || "http://localhost:11434";
|
|
138
|
-
const timeoutMs = Number(process.env.KORDOC_OLLAMA_TIMEOUT) || 12e4;
|
|
139
|
-
const response = await fetch(`${host}/api/chat`, {
|
|
140
|
-
method: "POST",
|
|
141
|
-
headers: { "Content-Type": "application/json" },
|
|
142
|
-
body: JSON.stringify({
|
|
143
|
-
model,
|
|
144
|
-
messages: [{
|
|
145
|
-
role: "user",
|
|
146
|
-
content: OCR_PROMPT,
|
|
147
|
-
images: [imageBase64]
|
|
148
|
-
}],
|
|
149
|
-
stream: false
|
|
150
|
-
}),
|
|
151
|
-
signal: AbortSignal.timeout(timeoutMs)
|
|
152
|
-
});
|
|
153
|
-
if (!response.ok) {
|
|
154
|
-
throw new Error(`Ollama API \uC624\uB958: ${response.status} ${response.statusText}`);
|
|
155
|
-
}
|
|
156
|
-
const data = await response.json();
|
|
157
|
-
return data.message?.content || "";
|
|
158
|
-
}
|
|
159
|
-
function stripCodeFence(text) {
|
|
160
|
-
const match = text.match(/^```(?:markdown|md)?\s*([\s\S]*?)```\s*$/m);
|
|
161
|
-
return match ? match[1].trim() : text;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
export {
|
|
165
|
-
createCliOcrProvider
|
|
166
|
-
};
|
|
167
|
-
//# sourceMappingURL=chunk-Y4WFKJ5P.js.map
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|