@clazic/kordoc 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/{chunk-JJMA5HGQ.js → chunk-3NF22UFF.js} +3 -3
- package/dist/{chunk-XWET7ONC.js → chunk-7MXQWWUW.js} +2 -2
- package/dist/cli.js +5 -5
- package/dist/index.cjs +29 -44
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +27 -42
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{resolve-Y3KMGD3R.js → resolve-Z4DEPDUS.js} +27 -35
- package/dist/resolve-Z4DEPDUS.js.map +1 -0
- package/dist/{tesseract-provider-MZ37ZKQW.js → tesseract-provider-UNJOI25M.js} +3 -10
- package/dist/tesseract-provider-UNJOI25M.js.map +1 -0
- package/dist/{utils-4NP2VUFW.js → utils-I4UIMOH7.js} +2 -2
- package/dist/{watch-4VVWG2WC.js → watch-XPLMUIZB.js} +3 -3
- package/package.json +2 -1
- package/dist/resolve-Y3KMGD3R.js.map +0 -1
- package/dist/tesseract-provider-MZ37ZKQW.js.map +0 -1
- /package/dist/{chunk-JJMA5HGQ.js.map → chunk-3NF22UFF.js.map} +0 -0
- /package/dist/{chunk-XWET7ONC.js.map → chunk-7MXQWWUW.js.map} +0 -0
- /package/dist/{utils-4NP2VUFW.js.map → utils-I4UIMOH7.js.map} +0 -0
- /package/dist/{watch-4VVWG2WC.js.map → watch-XPLMUIZB.js.map} +0 -0
package/README.md
CHANGED
|
@@ -190,7 +190,7 @@ kordoc scan.pdf --ocr gemini # gemini CLI 강제 지정
|
|
|
190
190
|
kordoc scan.pdf --ocr claude # claude CLI 강제 지정
|
|
191
191
|
kordoc scan.pdf --ocr codex # codex CLI 강제 지정
|
|
192
192
|
kordoc scan.pdf --ocr ollama # Ollama REST API 사용 (기본 모델: gemma4:27b)
|
|
193
|
-
kordoc scan.pdf --ocr tesseract # tesseract.js 사용 (
|
|
193
|
+
kordoc scan.pdf --ocr tesseract # 내장 tesseract.js 사용 (별도 설치 불필요)
|
|
194
194
|
kordoc scan.pdf --ocr off # OCR 비활성화
|
|
195
195
|
```
|
|
196
196
|
|
|
@@ -238,7 +238,7 @@ kordoc scan.pdf --ocr gemini # gemini CLI 지정
|
|
|
238
238
|
kordoc scan.pdf --ocr claude # claude CLI 지정
|
|
239
239
|
kordoc scan.pdf --ocr codex # codex CLI 지정
|
|
240
240
|
kordoc scan.pdf --ocr ollama # Ollama (KORDOC_OLLAMA_MODEL 환경변수로 모델 지정)
|
|
241
|
-
kordoc scan.pdf --ocr tesseract # tesseract.js (
|
|
241
|
+
kordoc scan.pdf --ocr tesseract # 내장 tesseract.js (별도 설치 불필요)
|
|
242
242
|
|
|
243
243
|
# Markdown → 문서 변환
|
|
244
244
|
kordoc convert 보고서.md # → 보고서.hwpx (기본)
|
|
@@ -6,7 +6,7 @@ import {
|
|
|
6
6
|
precheckZipSize,
|
|
7
7
|
sanitizeHref,
|
|
8
8
|
toArrayBuffer
|
|
9
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-7MXQWWUW.js";
|
|
10
10
|
import {
|
|
11
11
|
parsePageRange
|
|
12
12
|
} from "./chunk-MOL7MDBG.js";
|
|
@@ -5406,7 +5406,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
5406
5406
|
const ocrMode = options?.ocrMode;
|
|
5407
5407
|
if (!ocrProvider && ocrMode && ocrMode !== "off") {
|
|
5408
5408
|
try {
|
|
5409
|
-
const { resolveOcrProvider } = await import("./resolve-
|
|
5409
|
+
const { resolveOcrProvider } = await import("./resolve-Z4DEPDUS.js");
|
|
5410
5410
|
ocrProvider = await resolveOcrProvider(ocrMode, warnings);
|
|
5411
5411
|
} catch (resolveErr) {
|
|
5412
5412
|
if (ocrMode !== "auto") {
|
|
@@ -9614,4 +9614,4 @@ export {
|
|
|
9614
9614
|
cfb/cfb.js:
|
|
9615
9615
|
(*! crc32.js (C) 2014-present SheetJS -- http://sheetjs.com *)
|
|
9616
9616
|
*/
|
|
9617
|
-
//# sourceMappingURL=chunk-
|
|
9617
|
+
//# sourceMappingURL=chunk-3NF22UFF.js.map
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/utils.ts
|
|
4
|
-
var VERSION = true ? "2.2.
|
|
4
|
+
var VERSION = true ? "2.2.1" : "0.0.0-dev";
|
|
5
5
|
function toArrayBuffer(buf) {
|
|
6
6
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
7
7
|
return buf.buffer;
|
|
@@ -90,4 +90,4 @@ export {
|
|
|
90
90
|
sanitizeHref,
|
|
91
91
|
classifyError
|
|
92
92
|
};
|
|
93
|
-
//# sourceMappingURL=chunk-
|
|
93
|
+
//# sourceMappingURL=chunk-7MXQWWUW.js.map
|
package/dist/cli.js
CHANGED
|
@@ -4,11 +4,11 @@ import {
|
|
|
4
4
|
markdownToHwpx,
|
|
5
5
|
markdownToXlsx,
|
|
6
6
|
parse
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-3NF22UFF.js";
|
|
8
8
|
import {
|
|
9
9
|
VERSION,
|
|
10
10
|
toArrayBuffer
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-7MXQWWUW.js";
|
|
12
12
|
import "./chunk-MOL7MDBG.js";
|
|
13
13
|
import "./chunk-ZWE3DS7E.js";
|
|
14
14
|
|
|
@@ -121,7 +121,7 @@ program.enablePositionalOptions().name("kordoc").description("\uBAA8\uB450 \uD30
|
|
|
121
121
|
saveImages(absPath);
|
|
122
122
|
}
|
|
123
123
|
} catch (err) {
|
|
124
|
-
const { sanitizeError } = await import("./utils-
|
|
124
|
+
const { sanitizeError } = await import("./utils-I4UIMOH7.js");
|
|
125
125
|
process.stderr.write(`
|
|
126
126
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
127
127
|
`);
|
|
@@ -195,7 +195,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
195
195
|
`));
|
|
196
196
|
}
|
|
197
197
|
} catch (err) {
|
|
198
|
-
const { sanitizeError } = await import("./utils-
|
|
198
|
+
const { sanitizeError } = await import("./utils-I4UIMOH7.js");
|
|
199
199
|
process.stderr.write(` FAIL
|
|
200
200
|
`);
|
|
201
201
|
process.stderr.write(` \u2192 ${sanitizeError(err)}
|
|
@@ -204,7 +204,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
204
204
|
}
|
|
205
205
|
});
|
|
206
206
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
207
|
-
const { watchDirectory } = await import("./watch-
|
|
207
|
+
const { watchDirectory } = await import("./watch-XPLMUIZB.js");
|
|
208
208
|
await watchDirectory({
|
|
209
209
|
dir,
|
|
210
210
|
outDir: opts.outDir,
|
package/dist/index.cjs
CHANGED
|
@@ -1961,8 +1961,7 @@ function detectAvailableOcr() {
|
|
|
1961
1961
|
for (const cli of CLI_PRIORITY) {
|
|
1962
1962
|
if (isCliInstalled(cli)) return cli;
|
|
1963
1963
|
}
|
|
1964
|
-
|
|
1965
|
-
return null;
|
|
1964
|
+
return "tesseract";
|
|
1966
1965
|
}
|
|
1967
1966
|
function isCliInstalled(name) {
|
|
1968
1967
|
try {
|
|
@@ -1973,25 +1972,8 @@ function isCliInstalled(name) {
|
|
|
1973
1972
|
return false;
|
|
1974
1973
|
}
|
|
1975
1974
|
}
|
|
1976
|
-
function isTesseractAvailable() {
|
|
1977
|
-
try {
|
|
1978
|
-
const require2 = (0, import_module.createRequire)(import_meta.url);
|
|
1979
|
-
require2.resolve("tesseract.js");
|
|
1980
|
-
return true;
|
|
1981
|
-
} catch {
|
|
1982
|
-
return false;
|
|
1983
|
-
}
|
|
1984
|
-
}
|
|
1985
1975
|
function validateOcrMode(mode) {
|
|
1986
|
-
if (mode === "auto" || mode === "off") return;
|
|
1987
|
-
if (mode === "tesseract") {
|
|
1988
|
-
if (!isTesseractAvailable()) {
|
|
1989
|
-
throw new Error(
|
|
1990
|
-
"tesseract.js\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4.\n\uC124\uCE58: npm install tesseract.js"
|
|
1991
|
-
);
|
|
1992
|
-
}
|
|
1993
|
-
return;
|
|
1994
|
-
}
|
|
1976
|
+
if (mode === "auto" || mode === "off" || mode === "tesseract") return;
|
|
1995
1977
|
if (!isCliInstalled(mode)) {
|
|
1996
1978
|
throw new Error(`'${mode}' CLI\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4.
|
|
1997
1979
|
${getInstallGuide(mode)}`);
|
|
@@ -2006,13 +1988,22 @@ function getInstallGuide(mode) {
|
|
|
2006
1988
|
};
|
|
2007
1989
|
return guides[mode] || `'${mode}'\uC744(\uB97C) \uC124\uCE58\uD574\uC8FC\uC138\uC694.`;
|
|
2008
1990
|
}
|
|
2009
|
-
|
|
1991
|
+
function getTesseractFallbackMessage() {
|
|
1992
|
+
return [
|
|
1993
|
+
"\uC124\uCE58\uB41C AI CLI\uAC00 \uC5C6\uC5B4 \uB0B4\uC7A5 tesseract.js\uB85C OCR\uC744 \uC218\uD589\uD569\uB2C8\uB2E4.",
|
|
1994
|
+
"\uB354 \uB098\uC740 \uD488\uC9C8(\uD14C\uC774\uBE14/\uD5E4\uB529 \uAD6C\uC870 \uBCF4\uC874)\uC744 \uC704\uD574 AI CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4:",
|
|
1995
|
+
"",
|
|
1996
|
+
" [\uAD8C\uC7A5] Gemini CLI: https://ai.google.dev/gemini-api/docs/cli",
|
|
1997
|
+
" Claude CLI: npm install -g @anthropic-ai/claude-code",
|
|
1998
|
+
" Codex CLI: npm install -g @openai/codex",
|
|
1999
|
+
" Ollama: brew install ollama (+ ollama pull gemma4:27b)"
|
|
2000
|
+
].join("\n");
|
|
2001
|
+
}
|
|
2002
|
+
var import_child_process, CLI_PRIORITY;
|
|
2010
2003
|
var init_auto_detect = __esm({
|
|
2011
2004
|
"src/ocr/auto-detect.ts"() {
|
|
2012
2005
|
"use strict";
|
|
2013
2006
|
import_child_process = require("child_process");
|
|
2014
|
-
import_module = require("module");
|
|
2015
|
-
import_meta = {};
|
|
2016
2007
|
CLI_PRIORITY = ["gemini", "claude", "codex", "ollama"];
|
|
2017
2008
|
}
|
|
2018
2009
|
});
|
|
@@ -2122,15 +2113,7 @@ __export(tesseract_provider_exports, {
|
|
|
2122
2113
|
createTesseractProvider: () => createTesseractProvider
|
|
2123
2114
|
});
|
|
2124
2115
|
async function createTesseractProvider() {
|
|
2125
|
-
|
|
2126
|
-
try {
|
|
2127
|
-
tesseract = await import("tesseract.js");
|
|
2128
|
-
} catch {
|
|
2129
|
-
throw new Error(
|
|
2130
|
-
"tesseract.js\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4.\n\uC124\uCE58: npm install tesseract.js"
|
|
2131
|
-
);
|
|
2132
|
-
}
|
|
2133
|
-
const worker = await tesseract.createWorker("kor+eng");
|
|
2116
|
+
const worker = await (0, import_tesseract.createWorker)("kor+eng");
|
|
2134
2117
|
let terminated = false;
|
|
2135
2118
|
const provider = async (pageImage, _pageNumber, _mimeType) => {
|
|
2136
2119
|
const { data } = await worker.recognize(pageImage);
|
|
@@ -2144,9 +2127,11 @@ async function createTesseractProvider() {
|
|
|
2144
2127
|
};
|
|
2145
2128
|
return provider;
|
|
2146
2129
|
}
|
|
2130
|
+
var import_tesseract;
|
|
2147
2131
|
var init_tesseract_provider = __esm({
|
|
2148
2132
|
"src/ocr/tesseract-provider.ts"() {
|
|
2149
2133
|
"use strict";
|
|
2134
|
+
import_tesseract = require("tesseract.js");
|
|
2150
2135
|
}
|
|
2151
2136
|
});
|
|
2152
2137
|
|
|
@@ -2168,20 +2153,20 @@ async function resolveOcrProvider(mode, warnings) {
|
|
|
2168
2153
|
return createCliOcrProvider(mode);
|
|
2169
2154
|
}
|
|
2170
2155
|
const detected = detectAvailableOcr();
|
|
2171
|
-
if (!detected) {
|
|
2172
|
-
throw new Error("\uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uB3C4\uAD6C\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
2173
|
-
}
|
|
2174
2156
|
if (detected !== "gemini") {
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2157
|
+
if (detected === "tesseract") {
|
|
2158
|
+
warnings?.push({
|
|
2159
|
+
message: getTesseractFallbackMessage(),
|
|
2160
|
+
code: "OCR_CLI_FALLBACK"
|
|
2161
|
+
});
|
|
2162
|
+
} else {
|
|
2163
|
+
warnings?.push({
|
|
2164
|
+
message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (gemini CLI\uAC00 \uC5C6\uC5B4 fallback). \uB354 \uB098\uC740 \uD488\uC9C8\uC744 \uC704\uD574 gemini CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.`,
|
|
2165
|
+
code: "OCR_CLI_FALLBACK"
|
|
2166
|
+
});
|
|
2167
|
+
}
|
|
2179
2168
|
}
|
|
2180
2169
|
if (detected === "tesseract") {
|
|
2181
|
-
warnings?.push({
|
|
2182
|
-
message: "tesseract.js\uB294 \uD14C\uC774\uBE14 \uAD6C\uC870\uB97C \uBCF5\uC6D0\uD558\uC9C0 \uBABB\uD569\uB2C8\uB2E4. Vision LLM CLI(gemini/claude/codex) \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.",
|
|
2183
|
-
code: "OCR_CLI_FALLBACK"
|
|
2184
|
-
});
|
|
2185
2170
|
const { createTesseractProvider: createTesseractProvider2 } = await Promise.resolve().then(() => (init_tesseract_provider(), tesseract_provider_exports));
|
|
2186
2171
|
return createTesseractProvider2();
|
|
2187
2172
|
}
|
|
@@ -2443,7 +2428,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
2443
2428
|
var import_xmldom = require("@xmldom/xmldom");
|
|
2444
2429
|
|
|
2445
2430
|
// src/utils.ts
|
|
2446
|
-
var VERSION = true ? "2.2.
|
|
2431
|
+
var VERSION = true ? "2.2.1" : "0.0.0-dev";
|
|
2447
2432
|
function toArrayBuffer(buf) {
|
|
2448
2433
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
2449
2434
|
return buf.buffer;
|