@clazic/kordoc 2.4.3 → 2.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auto-detect-2YGFYQCN.js +15 -0
- package/dist/{chunk-IAU7NTTA.js → chunk-5AXJRBBK.js} +55 -39
- package/dist/chunk-5AXJRBBK.js.map +1 -0
- package/dist/chunk-7NOZFYH6.js +63 -0
- package/dist/chunk-7NOZFYH6.js.map +1 -0
- package/dist/{chunk-HOUVJPR7.js → chunk-KEDUF24M.js} +2 -2
- package/dist/cli.js +6 -6
- package/dist/index.cjs +66 -35
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +66 -35
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +3 -3
- package/dist/{resolve-UOAOPQ4H.js → resolve-TZVGVOVD.js} +6 -47
- package/dist/resolve-TZVGVOVD.js.map +1 -0
- package/dist/{utils-PYEEPTPM.js → utils-BB2CDSTB.js} +2 -2
- package/dist/utils-BB2CDSTB.js.map +1 -0
- package/dist/{watch-IQLSW2OB.js → watch-6QVK32X7.js} +4 -4
- package/package.json +1 -1
- package/dist/chunk-IAU7NTTA.js.map +0 -1
- package/dist/resolve-UOAOPQ4H.js.map +0 -1
- /package/dist/{utils-PYEEPTPM.js.map → auto-detect-2YGFYQCN.js.map} +0 -0
- /package/dist/{chunk-HOUVJPR7.js.map → chunk-KEDUF24M.js.map} +0 -0
- /package/dist/{watch-IQLSW2OB.js.map → watch-6QVK32X7.js.map} +0 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/ocr/auto-detect.ts
|
|
4
|
+
import { execSync } from "child_process";
|
|
5
|
+
var CLI_PRIORITY = ["codex", "gemini", "claude", "ollama"];
|
|
6
|
+
function detectAvailableOcr() {
|
|
7
|
+
for (const cli of CLI_PRIORITY) {
|
|
8
|
+
if (isCliInstalled(cli)) return cli;
|
|
9
|
+
}
|
|
10
|
+
return "tesseract";
|
|
11
|
+
}
|
|
12
|
+
function isCliInstalled(name) {
|
|
13
|
+
try {
|
|
14
|
+
const cmd = process.platform === "win32" ? "where" : "which";
|
|
15
|
+
execSync(`${cmd} ${name}`, { stdio: "ignore", timeout: 3e3 });
|
|
16
|
+
return true;
|
|
17
|
+
} catch {
|
|
18
|
+
return false;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
function getAutoFallbackChain() {
|
|
22
|
+
const chain = [];
|
|
23
|
+
for (const cli of CLI_PRIORITY) {
|
|
24
|
+
if (isCliInstalled(cli)) chain.push(cli);
|
|
25
|
+
}
|
|
26
|
+
chain.push("tesseract");
|
|
27
|
+
return chain;
|
|
28
|
+
}
|
|
29
|
+
function validateOcrMode(mode) {
|
|
30
|
+
if (mode === "auto" || mode === "off" || mode === "tesseract") return;
|
|
31
|
+
if (!isCliInstalled(mode)) {
|
|
32
|
+
throw new Error(`'${mode}' CLI\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4.
|
|
33
|
+
${getInstallGuide(mode)}`);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
function getInstallGuide(mode) {
|
|
37
|
+
const guides = {
|
|
38
|
+
gemini: "\uC124\uCE58: https://ai.google.dev/gemini-api/docs/cli",
|
|
39
|
+
claude: "\uC124\uCE58: npm install -g @anthropic-ai/claude-code \uB610\uB294 https://claude.ai/code",
|
|
40
|
+
codex: "\uC124\uCE58: npm install -g @openai/codex \uB610\uB294 https://github.com/openai/codex",
|
|
41
|
+
ollama: "\uC124\uCE58: brew install ollama \uB610\uB294 https://ollama.com/download"
|
|
42
|
+
};
|
|
43
|
+
return guides[mode] || `'${mode}'\uC744(\uB97C) \uC124\uCE58\uD574\uC8FC\uC138\uC694.`;
|
|
44
|
+
}
|
|
45
|
+
function getTesseractFallbackMessage() {
|
|
46
|
+
return [
|
|
47
|
+
"\uC124\uCE58\uB41C AI CLI\uAC00 \uC5C6\uC5B4 \uB0B4\uC7A5 tesseract.js\uB85C OCR\uC744 \uC218\uD589\uD569\uB2C8\uB2E4.",
|
|
48
|
+
"\uB354 \uB098\uC740 \uD488\uC9C8(\uD14C\uC774\uBE14/\uD5E4\uB529 \uAD6C\uC870 \uBCF4\uC874)\uC744 \uC704\uD574 AI CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4:",
|
|
49
|
+
"",
|
|
50
|
+
" [\uAD8C\uC7A5] Codex CLI: npm install -g @openai/codex",
|
|
51
|
+
" Gemini CLI: https://ai.google.dev/gemini-api/docs/cli",
|
|
52
|
+
" Claude CLI: npm install -g @anthropic-ai/claude-code",
|
|
53
|
+
" Ollama: brew install ollama (+ ollama pull gemma4:27b)"
|
|
54
|
+
].join("\n");
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export {
|
|
58
|
+
detectAvailableOcr,
|
|
59
|
+
getAutoFallbackChain,
|
|
60
|
+
validateOcrMode,
|
|
61
|
+
getTesseractFallbackMessage
|
|
62
|
+
};
|
|
63
|
+
//# sourceMappingURL=chunk-7NOZFYH6.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/ocr/auto-detect.ts"],"sourcesContent":["/**\n * OCR CLI 자동 탐색\n *\n * 탐색 순서: codex → gemini → claude → ollama → tesseract.js\n * CLI는 which(unix) / where(win) 명령어로 PATH 존재 확인.\n * tesseract.js는 bundled 의존성이므로 항상 사용 가능 (최후 fallback).\n */\n\nimport { execSync } from \"child_process\"\nimport type { OcrMode } from \"../types.js\"\n\n/** CLI 탐색 우선순위 */\nconst CLI_PRIORITY = [\"codex\", \"gemini\", \"claude\", \"ollama\"] as const\n\n/**\n * 시스템에 설치된 OCR 도구를 우선순위대로 탐색.\n * tesseract.js는 bundled 의존성이므로 CLI를 찾지 못해도 항상 \"tesseract\" 반환.\n * @returns 사용 가능한 OcrMode (null 반환 없음)\n */\nexport function detectAvailableOcr(): OcrMode {\n // 1. CLI 프로그램 탐색 (codex → gemini → claude → ollama)\n for (const cli of CLI_PRIORITY) {\n if (isCliInstalled(cli)) return cli\n }\n\n // 2. tesseract.js — bundled 의존성, 항상 사용 가능\n return \"tesseract\"\n}\n\n/**\n * 특정 CLI가 시스템 PATH에 있는지 확인.\n * which(unix) 또는 where(win32) 사용.\n */\nfunction isCliInstalled(name: string): boolean {\n try {\n const cmd = process.platform === \"win32\" ? \"where\" : \"which\"\n execSync(`${cmd} ${name}`, { stdio: \"ignore\", timeout: 3000 })\n return true\n } catch {\n return false\n }\n}\n\n/**\n * auto 모드에서 시도할 fallback 체인 반환.\n * 설치된 CLI만 포함하며, tesseract는 항상 마지막에 추가.\n */\nexport function getAutoFallbackChain(): OcrMode[] {\n const chain: OcrMode[] = []\n for (const cli of CLI_PRIORITY) {\n if (isCliInstalled(cli)) chain.push(cli)\n }\n chain.push(\"tesseract\")\n return chain\n}\n\n/**\n * 수동 지정된 OcrMode 유효성 검증.\n * --ocr gemini 등 강제 지정 시 호출.\n * @throws 해당 CLI가 설치되지 않은 경우 Error (tesseract는 항상 통과)\n */\nexport function validateOcrMode(mode: OcrMode): void {\n if (mode === \"auto\" || mode === \"off\" || mode === \"tesseract\") return\n\n if (!isCliInstalled(mode)) {\n throw new Error(`'${mode}' CLI가 설치되지 않았습니다.\\n${getInstallGuide(mode)}`)\n }\n}\n\n/** CLI별 설치 안내 메시지 */\nfunction getInstallGuide(mode: string): string {\n const guides: Record<string, string> = {\n gemini: \"설치: https://ai.google.dev/gemini-api/docs/cli\",\n claude: \"설치: npm install -g @anthropic-ai/claude-code 또는 https://claude.ai/code\",\n codex: \"설치: npm install -g @openai/codex 또는 https://github.com/openai/codex\",\n ollama: \"설치: brew install ollama 또는 https://ollama.com/download\",\n }\n return guides[mode] || `'${mode}'을(를) 설치해주세요.`\n}\n\n/**\n * AI CLI가 없어 tesseract.js로 fallback할 때 표시할 안내 메시지.\n */\nexport function getTesseractFallbackMessage(): string {\n return [\n \"설치된 AI CLI가 없어 내장 tesseract.js로 OCR을 수행합니다.\",\n \"더 나은 품질(테이블/헤딩 구조 보존)을 위해 AI CLI 설치를 권장합니다:\",\n \"\",\n \" [권장] Codex CLI: npm install -g @openai/codex\",\n \" Gemini CLI: https://ai.google.dev/gemini-api/docs/cli\",\n \" Claude CLI: npm install -g @anthropic-ai/claude-code\",\n \" Ollama: brew install ollama (+ ollama pull gemma4:27b)\",\n ].join(\"\\n\")\n}\n"],"mappings":";;;AAQA,SAAS,gBAAgB;AAIzB,IAAM,eAAe,CAAC,SAAS,UAAU,UAAU,QAAQ;AAOpD,SAAS,qBAA8B;AAE5C,aAAW,OAAO,cAAc;AAC9B,QAAI,eAAe,GAAG,EAAG,QAAO;AAAA,EAClC;AAGA,SAAO;AACT;AAMA,SAAS,eAAe,MAAuB;AAC7C,MAAI;AACF,UAAM,MAAM,QAAQ,aAAa,UAAU,UAAU;AACrD,aAAS,GAAG,GAAG,IAAI,IAAI,IAAI,EAAE,OAAO,UAAU,SAAS,IAAK,CAAC;AAC7D,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAMO,SAAS,uBAAkC;AAChD,QAAM,QAAmB,CAAC;AAC1B,aAAW,OAAO,cAAc;AAC9B,QAAI,eAAe,GAAG,EAAG,OAAM,KAAK,GAAG;AAAA,EACzC;AACA,QAAM,KAAK,WAAW;AACtB,SAAO;AACT;AAOO,SAAS,gBAAgB,MAAqB;AACnD,MAAI,SAAS,UAAU,SAAS,SAAS,SAAS,YAAa;AAE/D,MAAI,CAAC,eAAe,IAAI,GAAG;AACzB,UAAM,IAAI,MAAM,IAAI,IAAI;AAAA,EAAuB,gBAAgB,IAAI,CAAC,EAAE;AAAA,EACxE;AACF;AAGA,SAAS,gBAAgB,MAAsB;AAC7C,QAAM,SAAiC;AAAA,IACrC,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,OAAQ;AAAA,IACR,QAAQ;AAAA,EACV;AACA,SAAO,OAAO,IAAI,KAAK,IAAI,IAAI;AACjC;AAKO,SAAS,8BAAsC;AACpD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;","names":[]}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/utils.ts
|
|
4
|
-
var VERSION = true ? "2.4.
|
|
4
|
+
var VERSION = true ? "2.4.4" : "0.0.0-dev";
|
|
5
5
|
function toArrayBuffer(buf) {
|
|
6
6
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
7
7
|
return buf.buffer;
|
|
@@ -90,4 +90,4 @@ export {
|
|
|
90
90
|
sanitizeHref,
|
|
91
91
|
classifyError
|
|
92
92
|
};
|
|
93
|
-
//# sourceMappingURL=chunk-
|
|
93
|
+
//# sourceMappingURL=chunk-KEDUF24M.js.map
|
package/dist/cli.js
CHANGED
|
@@ -4,15 +4,15 @@ import {
|
|
|
4
4
|
markdownToHwpx,
|
|
5
5
|
markdownToXlsx,
|
|
6
6
|
parse
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-5AXJRBBK.js";
|
|
8
|
+
import "./chunk-4PP34NVQ.js";
|
|
8
9
|
import {
|
|
9
10
|
VERSION,
|
|
10
11
|
toArrayBuffer
|
|
11
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-KEDUF24M.js";
|
|
12
13
|
import "./chunk-MOL7MDBG.js";
|
|
13
14
|
import "./chunk-7FMKAV4P.js";
|
|
14
15
|
import "./chunk-JOGAFNIL.js";
|
|
15
|
-
import "./chunk-4PP34NVQ.js";
|
|
16
16
|
import "./chunk-ZWE3DS7E.js";
|
|
17
17
|
|
|
18
18
|
// src/cli.ts
|
|
@@ -137,7 +137,7 @@ async function runParse(files, opts) {
|
|
|
137
137
|
saveImages(absPath);
|
|
138
138
|
}
|
|
139
139
|
} catch (err) {
|
|
140
|
-
const { sanitizeError } = await import("./utils-
|
|
140
|
+
const { sanitizeError } = await import("./utils-BB2CDSTB.js");
|
|
141
141
|
process.stderr.write(`
|
|
142
142
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
143
143
|
`);
|
|
@@ -221,7 +221,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
221
221
|
`));
|
|
222
222
|
}
|
|
223
223
|
} catch (err) {
|
|
224
|
-
const { sanitizeError } = await import("./utils-
|
|
224
|
+
const { sanitizeError } = await import("./utils-BB2CDSTB.js");
|
|
225
225
|
process.stderr.write(` FAIL
|
|
226
226
|
`);
|
|
227
227
|
process.stderr.write(` \u2192 ${sanitizeError(err)}
|
|
@@ -230,7 +230,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
230
230
|
}
|
|
231
231
|
});
|
|
232
232
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
233
|
-
const { watchDirectory } = await import("./watch-
|
|
233
|
+
const { watchDirectory } = await import("./watch-6QVK32X7.js");
|
|
234
234
|
await watchDirectory({
|
|
235
235
|
dir,
|
|
236
236
|
outDir: opts.outDir,
|
package/dist/index.cjs
CHANGED
|
@@ -1957,6 +1957,13 @@ var require_cfb = __commonJS({
|
|
|
1957
1957
|
});
|
|
1958
1958
|
|
|
1959
1959
|
// src/ocr/auto-detect.ts
|
|
1960
|
+
var auto_detect_exports = {};
|
|
1961
|
+
__export(auto_detect_exports, {
|
|
1962
|
+
detectAvailableOcr: () => detectAvailableOcr,
|
|
1963
|
+
getAutoFallbackChain: () => getAutoFallbackChain,
|
|
1964
|
+
getTesseractFallbackMessage: () => getTesseractFallbackMessage,
|
|
1965
|
+
validateOcrMode: () => validateOcrMode
|
|
1966
|
+
});
|
|
1960
1967
|
function detectAvailableOcr() {
|
|
1961
1968
|
for (const cli of CLI_PRIORITY) {
|
|
1962
1969
|
if (isCliInstalled(cli)) return cli;
|
|
@@ -1972,6 +1979,14 @@ function isCliInstalled(name) {
|
|
|
1972
1979
|
return false;
|
|
1973
1980
|
}
|
|
1974
1981
|
}
|
|
1982
|
+
function getAutoFallbackChain() {
|
|
1983
|
+
const chain = [];
|
|
1984
|
+
for (const cli of CLI_PRIORITY) {
|
|
1985
|
+
if (isCliInstalled(cli)) chain.push(cli);
|
|
1986
|
+
}
|
|
1987
|
+
chain.push("tesseract");
|
|
1988
|
+
return chain;
|
|
1989
|
+
}
|
|
1975
1990
|
function validateOcrMode(mode) {
|
|
1976
1991
|
if (mode === "auto" || mode === "off" || mode === "tesseract") return;
|
|
1977
1992
|
if (!isCliInstalled(mode)) {
|
|
@@ -2810,7 +2825,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
2810
2825
|
var import_xmldom = require("@xmldom/xmldom");
|
|
2811
2826
|
|
|
2812
2827
|
// src/utils.ts
|
|
2813
|
-
var VERSION = true ? "2.4.
|
|
2828
|
+
var VERSION = true ? "2.4.4" : "0.0.0-dev";
|
|
2814
2829
|
function toArrayBuffer(buf) {
|
|
2815
2830
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
2816
2831
|
return buf.buffer;
|
|
@@ -6362,53 +6377,69 @@ async function parsePdfDocument(buffer, options) {
|
|
|
6362
6377
|
}
|
|
6363
6378
|
const parsedPageCount = parsedPages || (pageFilter ? pageFilter.size : effectivePageCount);
|
|
6364
6379
|
if (isImageBased) {
|
|
6365
|
-
let ocrProvider = options?.ocr ?? null;
|
|
6366
6380
|
const ocrMode = options?.ocrMode ?? "auto";
|
|
6367
|
-
|
|
6368
|
-
|
|
6369
|
-
|
|
6370
|
-
|
|
6371
|
-
const batchSize = options?.ocrBatchSize;
|
|
6372
|
-
ocrProvider = await resolveOcrProvider2(ocrMode, warnings, concurrency, batchSize);
|
|
6373
|
-
} catch (resolveErr) {
|
|
6374
|
-
if (ocrMode !== "auto") {
|
|
6375
|
-
throw Object.assign(
|
|
6376
|
-
new KordocError(resolveErr instanceof Error ? resolveErr.message : "OCR \uD504\uB85C\uBC14\uC774\uB354 \uCD08\uAE30\uD654 \uC2E4\uD328"),
|
|
6377
|
-
{ isImageBased: true }
|
|
6378
|
-
);
|
|
6379
|
-
}
|
|
6380
|
-
}
|
|
6381
|
+
const concurrency = options?.ocrConcurrency ?? 1;
|
|
6382
|
+
const batchSize = options?.ocrBatchSize;
|
|
6383
|
+
if (ocrMode === "off") {
|
|
6384
|
+
throw Object.assign(new KordocError(`\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF (${pageCount}\uD398\uC774\uC9C0, ${totalChars}\uC790)`), { isImageBased: true });
|
|
6381
6385
|
}
|
|
6382
|
-
|
|
6383
|
-
|
|
6386
|
+
const { resolveOcrProvider: resolveOcrProvider2 } = await Promise.resolve().then(() => (init_resolve(), resolve_exports));
|
|
6387
|
+
const { ocrPages: ocrPages2 } = await Promise.resolve().then(() => (init_provider(), provider_exports));
|
|
6388
|
+
const tryProvider = async (provider) => {
|
|
6384
6389
|
try {
|
|
6385
|
-
|
|
6386
|
-
const concurrency = options?.ocrConcurrency ?? 1;
|
|
6387
|
-
ocrBlocks = await ocrPages2(doc, ocrProvider, pageFilter, effectivePageCount, warnings, concurrency, options?.onProgress);
|
|
6390
|
+
return await ocrPages2(doc, provider, pageFilter, effectivePageCount, warnings, concurrency, options?.onProgress);
|
|
6388
6391
|
} catch {
|
|
6392
|
+
return [];
|
|
6389
6393
|
} finally {
|
|
6390
|
-
const terminable =
|
|
6394
|
+
const terminable = provider;
|
|
6391
6395
|
if (typeof terminable.terminate === "function") {
|
|
6392
6396
|
await terminable.terminate().catch(() => {
|
|
6393
6397
|
});
|
|
6394
6398
|
}
|
|
6395
6399
|
}
|
|
6396
|
-
|
|
6397
|
-
|
|
6398
|
-
|
|
6399
|
-
|
|
6400
|
-
|
|
6401
|
-
|
|
6402
|
-
|
|
6403
|
-
|
|
6404
|
-
|
|
6400
|
+
};
|
|
6401
|
+
let ocrBlocks = [];
|
|
6402
|
+
if (options?.ocr) {
|
|
6403
|
+
ocrBlocks = await tryProvider(options.ocr);
|
|
6404
|
+
} else if (ocrMode === "auto") {
|
|
6405
|
+
const { getAutoFallbackChain: getAutoFallbackChain2 } = await Promise.resolve().then(() => (init_auto_detect(), auto_detect_exports));
|
|
6406
|
+
for (const mode of getAutoFallbackChain2()) {
|
|
6407
|
+
try {
|
|
6408
|
+
const provider = await resolveOcrProvider2(mode, warnings, concurrency, batchSize);
|
|
6409
|
+
const blocks2 = await tryProvider(provider);
|
|
6410
|
+
if (blocks2.length > 0) {
|
|
6411
|
+
ocrBlocks = blocks2;
|
|
6412
|
+
break;
|
|
6413
|
+
}
|
|
6414
|
+
warnings.push({ message: `OCR: '${mode}' \uACB0\uACFC \uC5C6\uC74C, \uB2E4\uC74C \uC5D4\uC9C4\uC73C\uB85C \uC2DC\uB3C4`, code: "OCR_CLI_FALLBACK" });
|
|
6415
|
+
} catch {
|
|
6416
|
+
}
|
|
6417
|
+
}
|
|
6418
|
+
} else {
|
|
6419
|
+
try {
|
|
6420
|
+
const provider = await resolveOcrProvider2(ocrMode, warnings, concurrency, batchSize);
|
|
6421
|
+
ocrBlocks = await tryProvider(provider);
|
|
6422
|
+
} catch (resolveErr) {
|
|
6423
|
+
throw Object.assign(
|
|
6424
|
+
new KordocError(resolveErr instanceof Error ? resolveErr.message : "OCR \uD504\uB85C\uBC14\uC774\uB354 \uCD08\uAE30\uD654 \uC2E4\uD328"),
|
|
6425
|
+
{ isImageBased: true }
|
|
6426
|
+
);
|
|
6405
6427
|
}
|
|
6406
6428
|
}
|
|
6407
|
-
if (
|
|
6408
|
-
|
|
6429
|
+
if (ocrBlocks.length > 0) {
|
|
6430
|
+
const ocrMarkdown = blocksToMarkdown(ocrBlocks);
|
|
6431
|
+
return {
|
|
6432
|
+
markdown: ocrMarkdown,
|
|
6433
|
+
blocks: ocrBlocks,
|
|
6434
|
+
metadata,
|
|
6435
|
+
warnings: warnings.length > 0 ? warnings : void 0,
|
|
6436
|
+
isImageBased: true
|
|
6437
|
+
};
|
|
6409
6438
|
}
|
|
6410
|
-
|
|
6411
|
-
|
|
6439
|
+
throw Object.assign(
|
|
6440
|
+
new KordocError(`\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF \u2014 OCR \uC2E4\uD328 (${pageCount}\uD398\uC774\uC9C0, ${totalChars}\uC790)`),
|
|
6441
|
+
{ isImageBased: true }
|
|
6442
|
+
);
|
|
6412
6443
|
}
|
|
6413
6444
|
if (options?.removeHeaderFooter !== false && parsedPageCount >= 3) {
|
|
6414
6445
|
const removed = removeHeaderFooterBlocks(blocks, pageHeights, warnings);
|