@clazic/kordoc 2.2.8 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env node
2
+ import "./chunk-ZWE3DS7E.js";
3
+
4
+ // src/ocr/batch-provider.ts
5
+ import { spawnSync } from "child_process";
6
+ import { writeFileSync, readFileSync, unlinkSync, mkdirSync } from "fs";
7
+ import { join } from "path";
8
+ import { tmpdir } from "os";
9
+ var BATCH_OCR_PROMPT = "\uB2E4\uC74C \uBB38\uC11C \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0\uB4E4\uC744 OCR\uD558\uC5EC \uC21C\uC218 Markdown\uC73C\uB85C \uBCC0\uD658\uD558\uC138\uC694.\n\n\uADDC\uCE59:\n- \uAC01 \uD398\uC774\uC9C0 \uACB0\uACFC \uC0AC\uC774\uC5D0 \uBC18\uB4DC\uC2DC \uC774 \uAD6C\uBD84\uC790\uB97C \uC0BD\uC785: <!-- PAGE_BREAK -->\n- \uD14C\uC774\uBE14\uC740 Markdown \uD14C\uC774\uBE14 \uBB38\uBC95 \uC0AC\uC6A9 (| \uAD6C\uBD84, |---|---| \uD5E4\uB354 \uAD6C\uBD84\uC120 \uD3EC\uD568)\n- \uBCD1\uD569\uB41C \uC140\uC740 \uD574\uB2F9 \uC704\uCE58\uC5D0 \uB0B4\uC6A9 \uAE30\uC7AC\n- \uD5E4\uB529\uC740 \uAE00\uC790 \uD06C\uAE30\uC5D0 \uB530\uB77C ## ~ ###### \uC0AC\uC6A9\n- \uB9AC\uC2A4\uD2B8\uB294 - \uB610\uB294 1. \uC0AC\uC6A9\n- \uC774\uBBF8\uC9C0, \uB3C4\uD615 \uB4F1 \uBE44\uD14D\uC2A4\uD2B8 \uC694\uC18C\uB294 \uBB34\uC2DC\n- \uC6D0\uBB38\uC758 \uC77D\uAE30 \uC21C\uC11C\uC640 \uAD6C\uC870\uB97C \uC720\uC9C0\n- ```\uB85C \uAC10\uC2F8\uC9C0 \uB9D0\uACE0 \uC21C\uC218 Markdown\uB9CC \uCD9C\uB825";
10
+ var DEFAULT_BATCH_SIZES = {
11
+ gemini: 5,
12
+ claude: 5,
13
+ codex: 10
14
+ };
15
+ var _batchTempDir = null;
16
+ function getBatchTempDir() {
17
+ if (!_batchTempDir) {
18
+ _batchTempDir = join(process.cwd(), "_kordoc_ocr_tmp");
19
+ mkdirSync(_batchTempDir, { recursive: true });
20
+ }
21
+ return _batchTempDir;
22
+ }
23
+ function createBatchCliProvider(mode, batchSize) {
24
+ return {
25
+ __batch: true,
26
+ batchSize,
27
+ async processBatch(pages) {
28
+ const results = /* @__PURE__ */ new Map();
29
+ const tempDir = getBatchTempDir();
30
+ const tempFiles = [];
31
+ try {
32
+ for (const { image, pageNum } of pages) {
33
+ const path = join(tempDir, `batch-p${pageNum}.png`);
34
+ writeFileSync(path, image);
35
+ tempFiles.push(path);
36
+ }
37
+ let output;
38
+ if (mode === "codex") {
39
+ output = callBatchCodexCli(tempFiles);
40
+ } else {
41
+ output = callBatchCli(mode, tempFiles);
42
+ }
43
+ const cleaned = stripCodeFence(output.trim());
44
+ const parts = cleaned.split(/<!--\s*PAGE_BREAK\s*-->/).map((p) => p.trim()).filter((p) => p.length > 0);
45
+ for (let i = 0; i < pages.length; i++) {
46
+ const pageNum = pages[i].pageNum;
47
+ if (i < parts.length) {
48
+ results.set(pageNum, { markdown: parts[i] });
49
+ }
50
+ }
51
+ } finally {
52
+ for (const f of tempFiles) {
53
+ try {
54
+ unlinkSync(f);
55
+ } catch {
56
+ }
57
+ }
58
+ }
59
+ return results;
60
+ }
61
+ };
62
+ }
63
+ function callBatchCli(mode, imagePaths) {
64
+ const fileRefs = imagePaths.map((p) => `@${p}`).join("\n");
65
+ const prompt = `${BATCH_OCR_PROMPT}
66
+
67
+ ${fileRefs}`;
68
+ let args;
69
+ if (mode === "gemini") {
70
+ args = ["--prompt", prompt, "--yolo"];
71
+ const model = process.env.KORDOC_GEMINI_MODEL;
72
+ if (model) args.push("--model", model);
73
+ } else {
74
+ args = ["--print", prompt];
75
+ const model = process.env.KORDOC_CLAUDE_MODEL;
76
+ if (model) args.push("--model", model);
77
+ }
78
+ const timeoutMs = 6e4 + imagePaths.length * 2e4;
79
+ const result = spawnSync(mode, args, {
80
+ encoding: "utf-8",
81
+ timeout: timeoutMs,
82
+ maxBuffer: 50 * 1024 * 1024,
83
+ // 50MB (large batch output)
84
+ ...mode === "claude" ? { cwd: tmpdir() } : {}
85
+ });
86
+ if (result.error) {
87
+ throw new Error(`${mode} \uBC30\uCE58 OCR \uC2E4\uD328: ${result.error.message}`);
88
+ }
89
+ if (result.status !== 0) {
90
+ const errMsg = result.stderr?.trim() || `exit code ${result.status}`;
91
+ throw new Error(`${mode} \uBC30\uCE58 OCR \uC2E4\uD328: ${errMsg}`);
92
+ }
93
+ return result.stdout || "";
94
+ }
95
+ function callBatchCodexCli(imagePaths) {
96
+ const outPath = join(tmpdir(), `kordoc-codex-batch-${Date.now()}.txt`);
97
+ try {
98
+ const args = ["exec", BATCH_OCR_PROMPT];
99
+ for (const p of imagePaths) {
100
+ args.push("--image", p);
101
+ }
102
+ args.push("--output-last-message", outPath);
103
+ const model = process.env.KORDOC_CODEX_MODEL;
104
+ if (model) args.push("--model", model);
105
+ const timeoutMs = 6e4 + imagePaths.length * 2e4;
106
+ const result = spawnSync("codex", args, {
107
+ encoding: "utf-8",
108
+ timeout: timeoutMs,
109
+ maxBuffer: 50 * 1024 * 1024,
110
+ input: ""
111
+ });
112
+ if (result.error) {
113
+ throw new Error(`codex \uBC30\uCE58 OCR \uC2E4\uD328: ${result.error.message}`);
114
+ }
115
+ if (result.status !== 0) {
116
+ const errMsg = result.stderr?.trim() || `exit code ${result.status}`;
117
+ throw new Error(`codex \uBC30\uCE58 OCR \uC2E4\uD328: ${errMsg}`);
118
+ }
119
+ try {
120
+ return readFileSync(outPath, "utf-8");
121
+ } catch {
122
+ return result.stdout || "";
123
+ }
124
+ } finally {
125
+ try {
126
+ unlinkSync(outPath);
127
+ } catch {
128
+ }
129
+ }
130
+ }
131
+ function stripCodeFence(text) {
132
+ const match = text.match(/^```(?:markdown|md)?\s*\n([\s\S]*?)\n```\s*$/m);
133
+ return match ? match[1].trim() : text;
134
+ }
135
+ export {
136
+ DEFAULT_BATCH_SIZES,
137
+ createBatchCliProvider
138
+ };
139
+ //# sourceMappingURL=batch-provider-FUCIIS4M.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/ocr/batch-provider.ts"],"sourcesContent":["/**\n * CLI 배치 OCR 프로바이더\n *\n * 여러 페이지 이미지를 단일 CLI 호출로 처리하여 API 호출 수를 대폭 감소.\n * gemini/claude: @file 멀티 참조, codex: --image 멀티 플래그\n *\n * 299페이지 기준:\n * - 기존: CLI 299회 호출 (~30분)\n * - 배치: CLI 3~6회 호출 (~3분)\n */\n\nimport { spawnSync } from \"child_process\"\nimport { writeFileSync, readFileSync, unlinkSync, mkdirSync } from \"fs\"\nimport { join } from \"path\"\nimport { tmpdir } from \"os\"\nimport type { StructuredOcrResult, BatchOcrProvider } from \"../types.js\"\n\n/** 배치 OCR 프롬프트 */\nconst BATCH_OCR_PROMPT =\n \"다음 문서 페이지 이미지들을 OCR하여 순수 Markdown으로 변환하세요.\\n\\n\" +\n \"규칙:\\n\" +\n \"- 각 페이지 결과 사이에 반드시 이 구분자를 삽입: <!-- PAGE_BREAK -->\\n\" +\n \"- 테이블은 Markdown 테이블 문법 사용 (| 구분, |---|---| 헤더 구분선 포함)\\n\" +\n \"- 병합된 셀은 해당 위치에 내용 기재\\n\" +\n \"- 헤딩은 글자 크기에 따라 ## ~ ###### 사용\\n\" +\n \"- 리스트는 - 또는 1. 사용\\n\" +\n \"- 이미지, 도형 등 비텍스트 요소는 무시\\n\" +\n \"- 원문의 읽기 순서와 구조를 유지\\n\" +\n \"- ```로 감싸지 말고 순수 Markdown만 출력\"\n\n/** 모드별 기본 배치 크기 (CLI 내부 타임아웃 + 실측 기반)\n *\n * gemini CLI: 10장 이상에서 AbortError 발생 (내부 타임아웃).\n * 5장 배치가 안정적으로 동작 확인 (35초/배치).\n * 299페이지 = 60배치 = 기존 299회 대비 80% 감소.\n */\nexport const DEFAULT_BATCH_SIZES: Record<string, number> = {\n gemini: 5,\n claude: 5,\n codex: 10,\n}\n\n/** 임시 디렉토리 — gemini CLI는 cwd 하위 + gitignore 밖만 @참조 가능 */\nlet _batchTempDir: string | null = null\nfunction getBatchTempDir(): string {\n if (!_batchTempDir) {\n _batchTempDir = join(process.cwd(), \"_kordoc_ocr_tmp\")\n mkdirSync(_batchTempDir, { recursive: true })\n }\n return _batchTempDir\n}\n\n/**\n * 배치 CLI 프로바이더 생성\n */\nexport function createBatchCliProvider(\n mode: \"gemini\" | \"claude\" | \"codex\",\n batchSize: number\n): BatchOcrProvider {\n return {\n __batch: true as const,\n batchSize,\n async processBatch(pages) {\n const results = new Map<number, StructuredOcrResult>()\n const tempDir = getBatchTempDir()\n const tempFiles: string[] = []\n\n try {\n // 1. Write all page images to temp files\n for (const { image, pageNum } of pages) {\n const path = join(tempDir, `batch-p${pageNum}.png`)\n writeFileSync(path, image)\n tempFiles.push(path)\n }\n\n // 2. Call CLI with all file references\n let output: string\n if (mode === \"codex\") {\n output = callBatchCodexCli(tempFiles)\n } else {\n output = callBatchCli(mode, tempFiles)\n }\n\n // 3. Parse response by PAGE_BREAK separator\n const cleaned = stripCodeFence(output.trim())\n const parts = cleaned.split(/<!--\\s*PAGE_BREAK\\s*-->/)\n .map(p => p.trim())\n .filter(p => p.length > 0)\n\n // 4. Map results to page numbers (best-effort if count mismatch)\n for (let i = 0; i < pages.length; i++) {\n const pageNum = pages[i].pageNum\n if (i < parts.length) {\n results.set(pageNum, { markdown: parts[i] })\n }\n // If fewer parts than pages, remaining pages get no result\n }\n } finally {\n // 5. Clean up temp files\n for (const f of tempFiles) {\n try { unlinkSync(f) } catch { /* ignore */ }\n }\n }\n\n return results\n },\n }\n}\n\n/** gemini/claude 배치 호출 */\nfunction callBatchCli(mode: \"gemini\" | \"claude\", imagePaths: string[]): string {\n const fileRefs = imagePaths.map(p => `@${p}`).join(\"\\n\")\n const prompt = `${BATCH_OCR_PROMPT}\\n\\n${fileRefs}`\n\n let args: string[]\n if (mode === \"gemini\") {\n args = [\"--prompt\", prompt, \"--yolo\"]\n const model = process.env.KORDOC_GEMINI_MODEL\n if (model) args.push(\"--model\", model)\n } else {\n // claude\n args = [\"--print\", prompt]\n const model = process.env.KORDOC_CLAUDE_MODEL\n if (model) args.push(\"--model\", model)\n }\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = spawnSync(mode, args, {\n encoding: \"utf-8\",\n timeout: timeoutMs,\n maxBuffer: 50 * 1024 * 1024, // 50MB (large batch output)\n ...(mode === \"claude\" ? { cwd: tmpdir() } : {}),\n })\n\n if (result.error) {\n throw new Error(`${mode} 배치 OCR 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`${mode} 배치 OCR 실패: ${errMsg}`)\n }\n\n return result.stdout || \"\"\n}\n\n/** codex 배치 호출 — --image를 여러 번 지정 */\nfunction callBatchCodexCli(imagePaths: string[]): string {\n const outPath = join(tmpdir(), `kordoc-codex-batch-${Date.now()}.txt`)\n try {\n const args = [\"exec\", BATCH_OCR_PROMPT]\n for (const p of imagePaths) {\n args.push(\"--image\", p)\n }\n args.push(\"--output-last-message\", outPath)\n const model = process.env.KORDOC_CODEX_MODEL\n if (model) args.push(\"--model\", model)\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = spawnSync(\"codex\", args, {\n encoding: \"utf-8\",\n timeout: timeoutMs,\n maxBuffer: 50 * 1024 * 1024,\n input: \"\",\n })\n\n if (result.error) {\n throw new Error(`codex 배치 OCR 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`codex 배치 OCR 실패: ${errMsg}`)\n }\n\n try {\n return readFileSync(outPath, \"utf-8\")\n } catch {\n return result.stdout || \"\"\n }\n } finally {\n try { unlinkSync(outPath) } catch { /* ignore */ }\n }\n}\n\n/** LLM 출력에서 코드 펜스 제거 (cli-provider.ts와 동일 로직) */\nfunction stripCodeFence(text: string): string {\n const match = text.match(/^```(?:markdown|md)?\\s*\\n([\\s\\S]*?)\\n```\\s*$/m)\n return match ? match[1].trim() : text\n}\n"],"mappings":";;;;AAWA,SAAS,iBAAiB;AAC1B,SAAS,eAAe,cAAc,YAAY,iBAAiB;AACnE,SAAS,YAAY;AACrB,SAAS,cAAc;AAIvB,IAAM,mBACJ;AAiBK,IAAM,sBAA8C;AAAA,EACzD,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,OAAO;AACT;AAGA,IAAI,gBAA+B;AACnC,SAAS,kBAA0B;AACjC,MAAI,CAAC,eAAe;AAClB,oBAAgB,KAAK,QAAQ,IAAI,GAAG,iBAAiB;AACrD,cAAU,eAAe,EAAE,WAAW,KAAK,CAAC;AAAA,EAC9C;AACA,SAAO;AACT;AAKO,SAAS,uBACd,MACA,WACkB;AAClB,SAAO;AAAA,IACL,SAAS;AAAA,IACT;AAAA,IACA,MAAM,aAAa,OAAO;AACxB,YAAM,UAAU,oBAAI,IAAiC;AACrD,YAAM,UAAU,gBAAgB;AAChC,YAAM,YAAsB,CAAC;AAE7B,UAAI;AAEF,mBAAW,EAAE,OAAO,QAAQ,KAAK,OAAO;AACtC,gBAAM,OAAO,KAAK,SAAS,UAAU,OAAO,MAAM;AAClD,wBAAc,MAAM,KAAK;AACzB,oBAAU,KAAK,IAAI;AAAA,QACrB;AAGA,YAAI;AACJ,YAAI,SAAS,SAAS;AACpB,mBAAS,kBAAkB,SAAS;AAAA,QACtC,OAAO;AACL,mBAAS,aAAa,MAAM,SAAS;AAAA,QACvC;AAGA,cAAM,UAAU,eAAe,OAAO,KAAK,CAAC;AAC5C,cAAM,QAAQ,QAAQ,MAAM,yBAAyB,EAClD,IAAI,OAAK,EAAE,KAAK,CAAC,EACjB,OAAO,OAAK,EAAE,SAAS,CAAC;AAG3B,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,gBAAM,UAAU,MAAM,CAAC,EAAE;AACzB,cAAI,IAAI,MAAM,QAAQ;AACpB,oBAAQ,IAAI,SAAS,EAAE,UAAU,MAAM,CAAC,EAAE,CAAC;AAAA,UAC7C;AAAA,QAEF;AAAA,MACF,UAAE;AAEA,mBAAW,KAAK,WAAW;AACzB,cAAI;AAAE,uBAAW,CAAC;AAAA,UAAE,QAAQ;AAAA,UAAe;AAAA,QAC7C;AAAA,MACF;AAEA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAGA,SAAS,aAAa,MAA2B,YAA8B;AAC7E,QAAM,WAAW,WAAW,IAAI,OAAK,IAAI,CAAC,EAAE,EAAE,KAAK,IAAI;AACvD,QAAM,SAAS,GAAG,gBAAgB;AAAA;AAAA,EAAO,QAAQ;AAEjD,MAAI;AACJ,MAAI,SAAS,UAAU;AACrB,WAAO,CAAC,YAAY,QAAQ,QAAQ;AACpC,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAAA,EACvC,OAAO;AAEL,WAAO,CAAC,WAAW,MAAM;AACzB,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAAA,EACvC;AAEA,QAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,QAAM,SAAS,UAAU,MAAM,MAAM;AAAA,IACnC,UAAU;AAAA,IACV,SAAS;AAAA,IACT,WAAW,KAAK,OAAO;AAAA;AAAA,IACvB,GAAI,SAAS,WAAW,EAAE,KAAK,OAAO,EAAE,IAAI,CAAC;AAAA,EAC/C,CAAC;AAED,MAAI,OAAO,OAAO;AAChB,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,OAAO,MAAM,OAAO,EAAE;AAAA,EAC9D;AACA,MAAI,OAAO,WAAW,GAAG;AACvB,UAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,MAAM,EAAE;AAAA,EAChD;AAEA,SAAO,OAAO,UAAU;AAC1B;AAGA,SAAS,kBAAkB,YAA8B;AACvD,QAAM,UAAU,KAAK,OAAO,GAAG,sBAAsB,KAAK,IAAI,CAAC,MAAM;AACrE,MAAI;AACF,UAAM,OAAO,CAAC,QAAQ,gBAAgB;AACtC,eAAW,KAAK,YAAY;AAC1B,WAAK,KAAK,WAAW,CAAC;AAAA,IACxB;AACA,SAAK,KAAK,yBAAyB,OAAO;AAC1C,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAErC,UAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,UAAM,SAAS,UAAU,SAAS,MAAM;AAAA,MACtC,UAAU;AAAA,MACV,SAAS;AAAA,MACT,WAAW,KAAK,OAAO;AAAA,MACvB,OAAO;AAAA,IACT,CAAC;AAED,QAAI,OAAO,OAAO;AAChB,YAAM,IAAI,MAAM,wCAAoB,OAAO,MAAM,OAAO,EAAE;AAAA,IAC5D;AACA,QAAI,OAAO,WAAW,GAAG;AACvB,YAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,YAAM,IAAI,MAAM,wCAAoB,MAAM,EAAE;AAAA,IAC9C;AAEA,QAAI;AACF,aAAO,aAAa,SAAS,OAAO;AAAA,IACtC,QAAQ;AACN,aAAO,OAAO,UAAU;AAAA,IAC1B;AAAA,EACF,UAAE;AACA,QAAI;AAAE,iBAAW,OAAO;AAAA,IAAE,QAAQ;AAAA,IAAe;AAAA,EACnD;AACF;AAGA,SAAS,eAAe,MAAsB;AAC5C,QAAM,QAAQ,KAAK,MAAM,+CAA+C;AACxE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;","names":[]}
@@ -6,7 +6,7 @@ import {
6
6
  precheckZipSize,
7
7
  sanitizeHref,
8
8
  toArrayBuffer
9
- } from "./chunk-6KLTURMA.js";
9
+ } from "./chunk-WWILSVMJ.js";
10
10
  import {
11
11
  parsePageRange
12
12
  } from "./chunk-MOL7MDBG.js";
@@ -5406,9 +5406,10 @@ async function parsePdfDocument(buffer, options) {
5406
5406
  const ocrMode = options?.ocrMode;
5407
5407
  if (!ocrProvider && ocrMode && ocrMode !== "off") {
5408
5408
  try {
5409
- const { resolveOcrProvider } = await import("./resolve-UFUJEPCJ.js");
5409
+ const { resolveOcrProvider } = await import("./resolve-LBFYRHJI.js");
5410
5410
  const concurrency = options?.ocrConcurrency ?? 1;
5411
- ocrProvider = await resolveOcrProvider(ocrMode, warnings, concurrency);
5411
+ const batchSize = options?.ocrBatchSize;
5412
+ ocrProvider = await resolveOcrProvider(ocrMode, warnings, concurrency, batchSize);
5412
5413
  } catch (resolveErr) {
5413
5414
  if (ocrMode !== "auto") {
5414
5415
  throw Object.assign(
@@ -5419,21 +5420,28 @@ async function parsePdfDocument(buffer, options) {
5419
5420
  }
5420
5421
  }
5421
5422
  if (ocrProvider) {
5423
+ let ocrBlocks = [];
5422
5424
  try {
5423
- const { ocrPages } = await import("./provider-I3XGSVL6.js");
5425
+ const { ocrPages } = await import("./provider-OBY3XFSZ.js");
5424
5426
  const concurrency = options?.ocrConcurrency ?? 1;
5425
- const ocrBlocks = await ocrPages(doc, ocrProvider, pageFilter, effectivePageCount, warnings, concurrency);
5426
- if (ocrBlocks.length > 0) {
5427
- const ocrMarkdown = blocksToMarkdown(ocrBlocks);
5428
- return {
5429
- markdown: ocrMarkdown,
5430
- blocks: ocrBlocks,
5431
- metadata,
5432
- warnings: warnings.length > 0 ? warnings : void 0,
5433
- isImageBased: true
5434
- };
5435
- }
5427
+ ocrBlocks = await ocrPages(doc, ocrProvider, pageFilter, effectivePageCount, warnings, concurrency, options?.onProgress);
5436
5428
  } catch {
5429
+ } finally {
5430
+ const terminable = ocrProvider;
5431
+ if (typeof terminable.terminate === "function") {
5432
+ await terminable.terminate().catch(() => {
5433
+ });
5434
+ }
5435
+ }
5436
+ if (ocrBlocks.length > 0) {
5437
+ const ocrMarkdown = blocksToMarkdown(ocrBlocks);
5438
+ return {
5439
+ markdown: ocrMarkdown,
5440
+ blocks: ocrBlocks,
5441
+ metadata,
5442
+ warnings: warnings.length > 0 ? warnings : void 0,
5443
+ isImageBased: true
5444
+ };
5437
5445
  }
5438
5446
  }
5439
5447
  if (ocrMode === "off") {
@@ -9616,4 +9624,4 @@ export {
9616
9624
  cfb/cfb.js:
9617
9625
  (*! crc32.js (C) 2014-present SheetJS -- http://sheetjs.com *)
9618
9626
  */
9619
- //# sourceMappingURL=chunk-FC6BQOWD.js.map
9627
+ //# sourceMappingURL=chunk-2ZGLFZCN.js.map