@clazic/kordoc 2.4.2 → 2.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/mcp.js CHANGED
@@ -10,17 +10,17 @@ import {
10
10
  markdownToHwpx,
11
11
  markdownToXlsx,
12
12
  parse
13
- } from "./chunk-I3HO5HLQ.js";
13
+ } from "./chunk-5AXJRBBK.js";
14
+ import "./chunk-4PP34NVQ.js";
14
15
  import {
15
16
  KordocError,
16
17
  VERSION,
17
18
  sanitizeError,
18
19
  toArrayBuffer
19
- } from "./chunk-CMZPKEJ7.js";
20
+ } from "./chunk-KEDUF24M.js";
20
21
  import "./chunk-MOL7MDBG.js";
21
22
  import "./chunk-7FMKAV4P.js";
22
23
  import "./chunk-JOGAFNIL.js";
23
- import "./chunk-4PP34NVQ.js";
24
24
  import "./chunk-ZWE3DS7E.js";
25
25
 
26
26
  // src/mcp.ts
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ createCliOcrProvider
4
+ } from "./chunk-JOGAFNIL.js";
5
+ import {
6
+ detectAvailableOcr,
7
+ getTesseractFallbackMessage,
8
+ validateOcrMode
9
+ } from "./chunk-7NOZFYH6.js";
10
+ import "./chunk-ZWE3DS7E.js";
11
+
12
+ // src/ocr/resolve.ts
13
+ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
14
+ if (mode === "off") {
15
+ throw new Error("OCR\uC774 \uBE44\uD65C\uC131\uD654\uB418\uC5B4 \uC788\uC2B5\uB2C8\uB2E4 (--ocr off).");
16
+ }
17
+ if (mode !== "auto") {
18
+ validateOcrMode(mode);
19
+ if (mode === "tesseract") {
20
+ const { createTesseractProvider, createTesseractPoolProvider } = await import("./tesseract-provider-MNMZPSGF.js");
21
+ if (concurrency && concurrency > 1) {
22
+ return createTesseractPoolProvider(concurrency);
23
+ }
24
+ return createTesseractProvider();
25
+ }
26
+ if (mode === "gemini" || mode === "claude" || mode === "codex") {
27
+ const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-PNDCSGQW.js");
28
+ const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[mode];
29
+ if (effectiveBatch > 1) {
30
+ return createBatchCliProvider(mode, effectiveBatch);
31
+ }
32
+ return createCliOcrProvider(mode);
33
+ }
34
+ return createCliOcrProvider(mode);
35
+ }
36
+ const detected = detectAvailableOcr();
37
+ if (detected !== "codex") {
38
+ if (detected === "tesseract") {
39
+ warnings?.push({
40
+ message: getTesseractFallbackMessage(),
41
+ code: "OCR_CLI_FALLBACK"
42
+ });
43
+ } else {
44
+ warnings?.push({
45
+ message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (codex CLI\uAC00 \uC5C6\uC5B4 fallback). \uB354 \uB098\uC740 \uD488\uC9C8\uC744 \uC704\uD574 codex CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.`,
46
+ code: "OCR_CLI_FALLBACK"
47
+ });
48
+ }
49
+ }
50
+ if (detected === "tesseract") {
51
+ const { createTesseractProvider, createTesseractPoolProvider } = await import("./tesseract-provider-MNMZPSGF.js");
52
+ if (concurrency && concurrency > 1) {
53
+ return createTesseractPoolProvider(concurrency);
54
+ }
55
+ return createTesseractProvider();
56
+ }
57
+ if (detected === "gemini" || detected === "codex" || detected === "claude") {
58
+ const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-PNDCSGQW.js");
59
+ const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[detected];
60
+ if (effectiveBatch > 1) {
61
+ return createBatchCliProvider(detected, effectiveBatch);
62
+ }
63
+ return createCliOcrProvider(detected);
64
+ }
65
+ return createCliOcrProvider(detected);
66
+ }
67
+ export {
68
+ resolveOcrProvider
69
+ };
70
+ //# sourceMappingURL=resolve-TZVGVOVD.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/ocr/resolve.ts"],"sourcesContent":["/**\n * OCR 프로바이더 팩토리\n *\n * ocrMode에 따라 적절한 OcrProvider를 생성하여 반환.\n * - \"auto\": 설치된 CLI 자동 탐색 (gemini → claude → codex → ollama → tesseract)\n * tesseract.js는 bundled 의존성이므로 항상 사용 가능 (null 반환 없음)\n * - 특정 CLI: 해당 CLI 사용 (미설치 시 에러)\n * - \"tesseract\": 내장 tesseract.js 직접 사용\n * - \"off\": 에러 throw\n */\n\nimport type { OcrMode, OcrProvider, ParseWarning, BatchOcrProvider } from \"../types.js\"\nimport { detectAvailableOcr, validateOcrMode, getTesseractFallbackMessage } from \"./auto-detect.js\"\nimport { createCliOcrProvider } from \"./cli-provider.js\"\n\n/**\n * ocrMode에 따라 OcrProvider를 생성.\n *\n * @param mode - OCR 모드\n * @param warnings - 경고 수집 배열 (fallback 발생 시 경고 추가)\n * @param concurrency - 병렬 처리 수 (tesseract 전용, 기본: 1=순차)\n * @returns OcrProvider 함수\n * @throws mode=\"off\"이거나 지정 CLI 미설치 시 Error\n */\nexport async function resolveOcrProvider(\n mode: OcrMode,\n warnings?: ParseWarning[],\n concurrency?: number,\n batchSize?: number\n): Promise<OcrProvider | BatchOcrProvider> {\n if (mode === \"off\") {\n throw new Error(\"OCR이 비활성화되어 있습니다 (--ocr off).\")\n }\n\n // ── 수동 지정 모드 ──────────────────────────────────\n if (mode !== \"auto\") {\n validateOcrMode(mode) // tesseract는 항상 통과\n\n if (mode === \"tesseract\") {\n const { createTesseractProvider, createTesseractPoolProvider } = await import(\"./tesseract-provider.js\")\n // concurrency > 1이면 워커 풀 사용, 그 외 단일 워커 사용\n if (concurrency && concurrency > 1) {\n return createTesseractPoolProvider(concurrency)\n }\n return createTesseractProvider()\n }\n\n // gemini/claude/codex: 배치 크기 > 1이면 배치 프로바이더 사용\n if (mode === \"gemini\" || mode === \"claude\" || mode === \"codex\") {\n const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import(\"./batch-provider.js\")\n const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[mode]\n if (effectiveBatch > 1) {\n return createBatchCliProvider(mode, effectiveBatch)\n }\n return createCliOcrProvider(mode)\n }\n\n // CLI 프로바이더는 rate limit 보호를 위해 concurrency 무시 (항상 순차)\n return createCliOcrProvider(mode)\n }\n\n // ── 자동 탐색 모드 ───────────────────────────────────\n // detectAvailableOcr()는 항상 값을 반환 (tesseract fallback으로 null 없음)\n const detected = detectAvailableOcr()\n\n // codex가 아닌 경우 fallback 경고\n if (detected !== \"codex\") {\n if (detected === \"tesseract\") {\n // 내장 tesseract로 fallback — 구조 복원 제한 안내\n warnings?.push({\n message: getTesseractFallbackMessage(),\n code: \"OCR_CLI_FALLBACK\",\n })\n } else {\n warnings?.push({\n message: `OCR: '${detected}' 사용 중 (codex CLI가 없어 fallback). 더 나은 품질을 위해 codex CLI 설치를 권장합니다.`,\n code: \"OCR_CLI_FALLBACK\",\n })\n }\n }\n\n if (detected === \"tesseract\") {\n const { createTesseractProvider, createTesseractPoolProvider } = await import(\"./tesseract-provider.js\")\n // concurrency > 1이면 워커 풀 사용, 그 외 단일 워커 사용\n if (concurrency && concurrency > 1) {\n return createTesseractPoolProvider(concurrency)\n }\n return createTesseractProvider()\n }\n\n // gemini/claude/codex: 배치 크기 > 1이면 배치 프로바이더 사용\n if (detected === \"gemini\" || detected === \"codex\" || detected === \"claude\") {\n const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import(\"./batch-provider.js\")\n const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[detected]\n if (effectiveBatch > 1) {\n return createBatchCliProvider(detected, effectiveBatch)\n }\n return createCliOcrProvider(detected)\n }\n\n // CLI 프로바이더는 rate limit 보호를 위해 concurrency 무시 (항상 순차)\n return createCliOcrProvider(detected)\n}\n"],"mappings":";;;;;;;;;;;;AAwBA,eAAsB,mBACpB,MACA,UACA,aACA,WACyC;AACzC,MAAI,SAAS,OAAO;AAClB,UAAM,IAAI,MAAM,sFAA+B;AAAA,EACjD;AAGA,MAAI,SAAS,QAAQ;AACnB,oBAAgB,IAAI;AAEpB,QAAI,SAAS,aAAa;AACxB,YAAM,EAAE,yBAAyB,4BAA4B,IAAI,MAAM,OAAO,kCAAyB;AAEvG,UAAI,eAAe,cAAc,GAAG;AAClC,eAAO,4BAA4B,WAAW;AAAA,MAChD;AACA,aAAO,wBAAwB;AAAA,IACjC;AAGA,QAAI,SAAS,YAAY,SAAS,YAAY,SAAS,SAAS;AAC9D,YAAM,EAAE,wBAAwB,oBAAoB,IAAI,MAAM,OAAO,8BAAqB;AAC1F,YAAM,iBAAiB,aAAa,oBAAoB,IAAI;AAC5D,UAAI,iBAAiB,GAAG;AACtB,eAAO,uBAAuB,MAAM,cAAc;AAAA,MACpD;AACA,aAAO,qBAAqB,IAAI;AAAA,IAClC;AAGA,WAAO,qBAAqB,IAAI;AAAA,EAClC;AAIA,QAAM,WAAW,mBAAmB;AAGpC,MAAI,aAAa,SAAS;AACxB,QAAI,aAAa,aAAa;AAE5B,gBAAU,KAAK;AAAA,QACb,SAAS,4BAA4B;AAAA,QACrC,MAAM;AAAA,MACR,CAAC;AAAA,IACH,OAAO;AACL,gBAAU,KAAK;AAAA,QACb,SAAS,SAAS,QAAQ;AAAA,QAC1B,MAAM;AAAA,MACR,CAAC;AAAA,IACH;AAAA,EACF;AAEA,MAAI,aAAa,aAAa;AAC5B,UAAM,EAAE,yBAAyB,4BAA4B,IAAI,MAAM,OAAO,kCAAyB;AAEvG,QAAI,eAAe,cAAc,GAAG;AAClC,aAAO,4BAA4B,WAAW;AAAA,IAChD;AACA,WAAO,wBAAwB;AAAA,EACjC;AAGA,MAAI,aAAa,YAAY,aAAa,WAAW,aAAa,UAAU;AAC1E,UAAM,EAAE,wBAAwB,oBAAoB,IAAI,MAAM,OAAO,8BAAqB;AAC1F,UAAM,iBAAiB,aAAa,oBAAoB,QAAQ;AAChE,QAAI,iBAAiB,GAAG;AACtB,aAAO,uBAAuB,UAAU,cAAc;AAAA,IACxD;AACA,WAAO,qBAAqB,QAAQ;AAAA,EACtC;AAGA,SAAO,qBAAqB,QAAQ;AACtC;","names":[]}
@@ -8,7 +8,7 @@ import {
8
8
  sanitizeError,
9
9
  sanitizeHref,
10
10
  toArrayBuffer
11
- } from "./chunk-CMZPKEJ7.js";
11
+ } from "./chunk-KEDUF24M.js";
12
12
  import "./chunk-ZWE3DS7E.js";
13
13
  export {
14
14
  KordocError,
@@ -20,4 +20,4 @@ export {
20
20
  sanitizeHref,
21
21
  toArrayBuffer
22
22
  };
23
- //# sourceMappingURL=utils-BRQCU3AW.js.map
23
+ //# sourceMappingURL=utils-BB2CDSTB.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
@@ -2,14 +2,14 @@
2
2
  import {
3
3
  detectFormat,
4
4
  parse
5
- } from "./chunk-I3HO5HLQ.js";
5
+ } from "./chunk-5AXJRBBK.js";
6
+ import "./chunk-4PP34NVQ.js";
6
7
  import {
7
8
  toArrayBuffer
8
- } from "./chunk-CMZPKEJ7.js";
9
+ } from "./chunk-KEDUF24M.js";
9
10
  import "./chunk-MOL7MDBG.js";
10
11
  import "./chunk-7FMKAV4P.js";
11
12
  import "./chunk-JOGAFNIL.js";
12
- import "./chunk-4PP34NVQ.js";
13
13
  import "./chunk-ZWE3DS7E.js";
14
14
 
15
15
  // src/watch.ts
@@ -129,4 +129,4 @@ async function sendWebhook(url, payload) {
129
129
  export {
130
130
  watchDirectory
131
131
  };
132
- //# sourceMappingURL=watch-SWG6JGKP.js.map
132
+ //# sourceMappingURL=watch-6QVK32X7.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@clazic/kordoc",
3
- "version": "2.4.2",
3
+ "version": "2.4.4",
4
4
  "description": "Parse Korean documents (HWP, HWPX, PDF, XLSX, DOCX) to Markdown",
5
5
  "type": "module",
6
6
  "exports": {