@clazic/kordoc 2.2.7 → 2.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -4,11 +4,11 @@ import {
4
4
  markdownToHwpx,
5
5
  markdownToXlsx,
6
6
  parse
7
- } from "./chunk-UFIRSH5G.js";
7
+ } from "./chunk-FC6BQOWD.js";
8
8
  import {
9
9
  VERSION,
10
10
  toArrayBuffer
11
- } from "./chunk-3CNYQD23.js";
11
+ } from "./chunk-6KLTURMA.js";
12
12
  import "./chunk-MOL7MDBG.js";
13
13
  import "./chunk-ZWE3DS7E.js";
14
14
 
@@ -60,6 +60,10 @@ async function runParse(files, opts) {
60
60
  } else {
61
61
  parseOptions.ocrMode = "auto";
62
62
  }
63
+ if (opts.ocrJobs) {
64
+ const n = parseInt(opts.ocrJobs, 10);
65
+ if (n > 0) parseOptions.ocrConcurrency = n;
66
+ }
63
67
  if (!opts.silent) {
64
68
  parseOptions.onProgress = (current, total) => {
65
69
  process.stderr.write(`\r[kordoc] ${filePrefix}${fileName} (${format}) [${current}/${total}]`);
@@ -121,7 +125,7 @@ async function runParse(files, opts) {
121
125
  saveImages(absPath);
122
126
  }
123
127
  } catch (err) {
124
- const { sanitizeError } = await import("./utils-3EDZ5QEH.js");
128
+ const { sanitizeError } = await import("./utils-JRBHPKTC.js");
125
129
  process.stderr.write(`
126
130
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
127
131
  `);
@@ -130,7 +134,7 @@ async function runParse(files, opts) {
130
134
  }
131
135
  }
132
136
  function addParseOptions(cmd) {
133
- return cmd.option("-o, --output <path>", "\uCD9C\uB825 \uD30C\uC77C \uACBD\uB85C (\uB2E8\uC77C \uD30C\uC77C \uC2DC)").option("-d, --out-dir <dir>", "\uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC (\uB2E4\uC911 \uD30C\uC77C \uC2DC)").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704 (\uC608: 1-3, 1,3,5)").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown (\uAE30\uBCF8) \uB610\uB294 json", "markdown").option("--no-header-footer", "PDF \uBA38\uB9AC\uAE00/\uBC14\uB2E5\uAE00 \uC790\uB3D9 \uC81C\uAC70").option("--image-dir <dir>", "\uC774\uBBF8\uC9C0 \uC800\uC7A5 \uD3F4\uB354 (\uAE30\uBCF8: \uC785\uB825 \uD30C\uC77C\uBA85_images \uD3F4\uB354)").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").option("--ocr <mode>", "OCR \uBAA8\uB4DC: auto(\uAE30\uBCF8), gemini, claude, codex, ollama, tesseract, off");
137
+ return cmd.option("-o, --output <path>", "\uCD9C\uB825 \uD30C\uC77C \uACBD\uB85C (\uB2E8\uC77C \uD30C\uC77C \uC2DC)").option("-d, --out-dir <dir>", "\uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC (\uB2E4\uC911 \uD30C\uC77C \uC2DC)").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704 (\uC608: 1-3, 1,3,5)").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown (\uAE30\uBCF8) \uB610\uB294 json", "markdown").option("--no-header-footer", "PDF \uBA38\uB9AC\uAE00/\uBC14\uB2E5\uAE00 \uC790\uB3D9 \uC81C\uAC70").option("--image-dir <dir>", "\uC774\uBBF8\uC9C0 \uC800\uC7A5 \uD3F4\uB354 (\uAE30\uBCF8: \uC785\uB825 \uD30C\uC77C\uBA85_images \uD3F4\uB354)").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").option("--ocr <mode>", "OCR \uBAA8\uB4DC: auto(\uAE30\uBCF8), gemini, claude, codex, ollama, tesseract, off").option("--ocr-jobs <n>", "OCR \uBCD1\uB82C \uCC98\uB9AC \uC218 (\uAE30\uBCF8: CPU \uCF54\uC5B4 \uC218, tesseract \uC804\uC6A9)");
134
138
  }
135
139
  program.enablePositionalOptions().name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\uACA0\uB2E4 \u2014 HWP, HWPX, PDF, XLSX, DOCX \u2192 Markdown").version(VERSION);
136
140
  addParseOptions(
@@ -205,7 +209,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
205
209
  `));
206
210
  }
207
211
  } catch (err) {
208
- const { sanitizeError } = await import("./utils-3EDZ5QEH.js");
212
+ const { sanitizeError } = await import("./utils-JRBHPKTC.js");
209
213
  process.stderr.write(` FAIL
210
214
  `);
211
215
  process.stderr.write(` \u2192 ${sanitizeError(err)}
@@ -214,7 +218,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
214
218
  }
215
219
  });
216
220
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
217
- const { watchDirectory } = await import("./watch-BDL7I557.js");
221
+ const { watchDirectory } = await import("./watch-JANDW746.js");
218
222
  await watchDirectory({
219
223
  dir,
220
224
  outDir: opts.outDir,
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\n\nimport { readFileSync, writeFileSync, mkdirSync, statSync, existsSync, readdirSync } from \"fs\"\nimport { basename, resolve, extname } from \"path\"\nimport { Command } from \"commander\"\nimport { parse, detectFormat, markdownToHwpx, markdownToXlsx } from \"./index.js\"\nimport type { ParseOptions, OcrMode } from \"./types.js\"\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\n\nconst program = new Command()\n\n/** 공통 parse 옵션 타입 */\ninterface ParseOpts {\n output?: string\n outDir?: string\n pages?: string\n format: string\n headerFooter: boolean\n imageDir?: string\n silent?: boolean\n ocr?: string\n}\n\n/** parse 액션 공통 구현 — 루트 커맨드와 `parse` 서브커맨드가 공유 */\nasync function runParse(files: string[], opts: ParseOpts) {\n const validFormats = [\"markdown\", \"json\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\n process.exit(1)\n }\n for (let fi = 0; fi < files.length; fi++) {\n const filePath = files[fi]\n const absPath = resolve(filePath)\n const fileName = basename(absPath)\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\n\n try {\n const fileSize = statSync(absPath).size\n if (fileSize > 500 * 1024 * 1024) {\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\n process.exitCode = 1\n continue\n }\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const format = detectFormat(arrayBuffer)\n\n if (!opts.silent) {\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\n }\n\n const parseOptions: ParseOptions = {}\n if (opts.pages) parseOptions.pages = opts.pages as string\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\n\n // OCR 모드: CLI 기본값 \"auto\" (라이브러리 API는 undefined 유지)\n const validOcrModes = [\"auto\", \"gemini\", \"claude\", \"codex\", \"ollama\", \"tesseract\", \"off\"]\n if (opts.ocr) {\n if (!validOcrModes.includes(opts.ocr)) {\n process.stderr.write(`[kordoc] 지원하지 않는 OCR 모드: ${opts.ocr}\\n`)\n process.stderr.write(` 사용 가능: ${validOcrModes.join(\", \")}\\n`)\n process.exit(1)\n }\n parseOptions.ocrMode = opts.ocr as OcrMode\n } else {\n parseOptions.ocrMode = \"auto\"\n }\n\n if (!opts.silent) {\n parseOptions.onProgress = (current: number, total: number) => {\n process.stderr.write(`\\r[kordoc] ${filePrefix}${fileName} (${format}) [${current}/${total}]`)\n }\n }\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${result.error}\\n`)\n process.exitCode = 1\n continue\n }\n\n if (!opts.silent) process.stderr.write(` OK\\n`)\n\n // 이미지 기반 PDF OCR 결과 표시\n if (!opts.silent && result.success && result.isImageBased) {\n process.stderr.write(` → 이미지 기반 PDF — OCR 처리됨\\n`)\n }\n\n // 경고 표시\n if (!opts.silent && result.success && result.warnings?.length) {\n for (const w of result.warnings) {\n process.stderr.write(` ⚠ ${w.message}\\n`)\n }\n }\n\n let markdown = result.markdown\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\n if (opts.outDir && result.images?.length) {\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"![image](images/image_\")\n }\n const output = opts.format === \"json\"\n ? JSON.stringify(result, null, 2)\n : markdown\n\n // 이미지 저장: 출력 MD 파일 기준 폴더 사용 (convert와 일치)\n const saveImages = (outFilePath: string) => {\n if (!result.images?.length) return\n const stem = basename(outFilePath).replace(/\\.[^.]+$/, \"\")\n const defaultDir = resolve(outFilePath, \"..\", stem + \"_images\")\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : defaultDir\n mkdirSync(imgDir, { recursive: true })\n for (const img of result.images) {\n writeFileSync(resolve(imgDir, img.filename), img.data)\n }\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\n }\n\n if (opts.output && files.length === 1) {\n writeFileSync(opts.output, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\n saveImages(resolve(opts.output))\n } else if (opts.outDir) {\n mkdirSync(opts.outDir, { recursive: true })\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\n saveImages(outPath)\n } else {\n process.stdout.write(output + \"\\n\")\n saveImages(absPath) // stdout 출력 시 입력 파일 기준\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\n process.exitCode = 1\n }\n }\n}\n\n/** 공통 parse 옵션 등록 헬퍼 */\nfunction addParseOptions(cmd: Command): Command {\n return cmd\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\n .option(\"--image-dir <dir>\", \"이미지 저장 폴더 (기본: 입력 파일명_images 폴더)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .option(\"--ocr <mode>\", \"OCR 모드: auto(기본), gemini, claude, codex, ollama, tesseract, off\")\n}\n\nprogram\n .enablePositionalOptions()\n .name(\"kordoc\")\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\n .version(VERSION)\n\n// `kordoc parse <files>` 서브커맨드 (권장)\naddParseOptions(\n program\n .command(\"parse\")\n .description(\"파일을 마크다운으로 파싱 (HWP, HWPX, PDF, XLSX, DOCX)\")\n .argument(\"<files...>\", \"변환할 파일 경로\")\n).action(runParse)\n\n// `kordoc <files>` 루트 커맨드 (하위 호환)\naddParseOptions(\n program\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\n).action(runParse)\n\nprogram\n .command(\"convert <input>\")\n .description(\"마크다운 파일을 HWPX 또는 XLSX로 변환\")\n .option(\"-f, --format <type>\", \"출력 포맷: hwpx | xlsx\", \"hwpx\")\n .option(\"-o, --output <path>\", \"출력 파일 경로 (기본: 입력명 + 포맷 확장자)\")\n .option(\"--image-dir <dir>\", \"이미지 폴더 경로 (기본: 입력 MD 파일명_images 폴더)\")\n .option(\"--images\", \"이미지 포함 (기본: 생략, 레이아웃 문제 방지)\")\n .option(\"--template <path>\", \"HWPX 템플릿 파일 경로 (hwpx 전용)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (input: string, opts) => {\n const validFormats = [\"hwpx\", \"xlsx\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 포맷: ${opts.format} (hwpx 또는 xlsx)\\n`)\n process.exit(1)\n }\n\n const absInput = resolve(input)\n if (!existsSync(absInput)) {\n process.stderr.write(`[kordoc] 파일을 찾을 수 없습니다: ${input}\\n`)\n process.exit(1)\n }\n\n const stem = basename(absInput).replace(/\\.[^.]+$/, \"\")\n const outPath = opts.output\n ? resolve(opts.output)\n : resolve(absInput, \"..\", `${stem}.${opts.format}`)\n\n if (!opts.silent) process.stderr.write(`[kordoc] ${basename(absInput)} → ${basename(outPath)} ...`)\n\n try {\n const markdown = readFileSync(absInput, \"utf-8\")\n\n // 이미지 폴더에서 이미지 로드 (--images 플래그 필요)\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : resolve(absInput, \"..\", stem + \"_images\")\n const images: import(\"./types.js\").ExtractedImage[] = []\n if (opts.images && existsSync(imgDir)) {\n const mimeMap: Record<string, string> = {\n png: \"image/png\", jpg: \"image/jpeg\", jpeg: \"image/jpeg\",\n gif: \"image/gif\", bmp: \"image/bmp\",\n }\n for (const entry of readdirSync(imgDir, { withFileTypes: true })) {\n if (!entry.isFile()) continue\n const fname = entry.name\n const ext = extname(fname).slice(1).toLowerCase()\n if (!mimeMap[ext]) continue\n const data = readFileSync(resolve(imgDir, fname))\n images.push({ filename: fname, data: new Uint8Array(data), mimeType: mimeMap[ext] })\n }\n if (!opts.silent) process.stderr.write(` → 이미지 ${images.length}개 로드\\n`)\n }\n\n const warnings: string[] = []\n\n let buf: ArrayBuffer\n if (opts.format === \"xlsx\") {\n if (opts.template && !opts.silent) {\n process.stderr.write(`\\n[kordoc] 경고: --template은 hwpx 전용입니다. 무시됩니다.\\n`)\n }\n buf = await markdownToXlsx(markdown, { warnings, images: images.length ? images : undefined })\n } else {\n let templateArrayBuffer: ArrayBuffer | undefined\n if (opts.template) {\n const tmplBuf = readFileSync(resolve(opts.template))\n templateArrayBuffer = tmplBuf.buffer.slice(tmplBuf.byteOffset, tmplBuf.byteOffset + tmplBuf.byteLength)\n }\n buf = await markdownToHwpx(markdown, { warnings, images: images.length ? images : undefined, templateArrayBuffer })\n }\n\n writeFileSync(outPath, Buffer.from(buf))\n\n if (!opts.silent) {\n process.stderr.write(` OK\\n`)\n process.stderr.write(` → ${outPath}\\n`)\n if (warnings.length) warnings.forEach(w => process.stderr.write(` ${w}\\n`))\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${sanitizeError(err)}\\n`)\n process.exit(1)\n }\n })\n\nprogram\n .command(\"watch <dir>\")\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (dir: string, opts) => {\n const { watchDirectory } = await import(\"./watch.js\")\n await watchDirectory({\n dir,\n outDir: opts.outDir,\n webhook: opts.webhook,\n format: opts.format,\n pages: opts.pages,\n silent: opts.silent,\n })\n })\n\nprogram.parse()\n"],"mappings":";;;;;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,UAAU,YAAY,mBAAmB;AAC1F,SAAS,UAAU,SAAS,eAAe;AAC3C,SAAS,eAAe;AAKxB,IAAM,UAAU,IAAI,QAAQ;AAe5B,eAAe,SAAS,OAAiB,MAAiB;AACxD,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AAEA,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AAGnE,YAAM,gBAAgB,CAAC,QAAQ,UAAU,UAAU,SAAS,UAAU,aAAa,KAAK;AACxF,UAAI,KAAK,KAAK;AACZ,YAAI,CAAC,cAAc,SAAS,KAAK,GAAG,GAAG;AACrC,kBAAQ,OAAO,MAAM,oEAA4B,KAAK,GAAG;AAAA,CAAI;AAC7D,kBAAQ,OAAO,MAAM,gCAAY,cAAc,KAAK,IAAI,CAAC;AAAA,CAAI;AAC7D,kBAAQ,KAAK,CAAC;AAAA,QAChB;AACA,qBAAa,UAAU,KAAK;AAAA,MAC9B,OAAO;AACL,qBAAa,UAAU;AAAA,MACzB;AAEA,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,kBAAQ,OAAO,MAAM,cAAc,UAAU,GAAG,QAAQ,KAAK,MAAM,MAAM,OAAO,IAAI,KAAK,GAAG;AAAA,QAC9F;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAG9C,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,cAAc;AACzD,gBAAQ,OAAO,MAAM;AAAA,CAA4B;AAAA,MACnD;AAGA,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,UAAU,QAAQ;AAC7D,mBAAW,KAAK,OAAO,UAAU;AAC/B,kBAAQ,OAAO,MAAM,YAAO,EAAE,OAAO;AAAA,CAAI;AAAA,QAC3C;AAAA,MACF;AAEA,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,gBAAwB;AAC1C,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,OAAO,SAAS,WAAW,EAAE,QAAQ,YAAY,EAAE;AACzD,cAAM,aAAa,QAAQ,aAAa,MAAM,OAAO,SAAS;AAC9D,cAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI;AACxD,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,MAAM,CAAC;AAAA,MACjC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,OAAO;AAAA,MACpB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAClC,mBAAW,OAAO;AAAA,MACpB;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF;AAGA,SAAS,gBAAgB,KAAuB;AAC9C,SAAO,IACJ,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,qBAAqB,kHAAkC,EAC9D,OAAO,YAAY,oDAAY,EAC/B,OAAO,gBAAgB,qFAAiE;AAC7F;AAEA,QACG,wBAAwB,EACxB,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO;AAGlB;AAAA,EACE,QACG,QAAQ,OAAO,EACf,YAAY,mGAA4C,EACxD,SAAS,cAAc,8CAAW;AACvC,EAAE,OAAO,QAAQ;AAGjB;AAAA,EACE,QACG,SAAS,cAAc,2EAAwC;AACpE,EAAE,OAAO,QAAQ;AAEjB,QACG,QAAQ,iBAAiB,EACzB,YAAY,uFAA2B,EACvC,OAAO,uBAAuB,0CAAsB,MAAM,EAC1D,OAAO,uBAAuB,6GAA6B,EAC3D,OAAO,qBAAqB,qHAAqC,EACjE,OAAO,YAAY,kHAA6B,EAChD,OAAO,qBAAqB,uEAA0B,EACtD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAe,SAAS;AACrC,QAAM,eAAe,CAAC,QAAQ,MAAM;AACpC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAmB;AAC3E,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,WAAW,QAAQ,KAAK;AAC9B,MAAI,CAAC,WAAW,QAAQ,GAAG;AACzB,YAAQ,OAAO,MAAM,6EAA2B,KAAK;AAAA,CAAI;AACzD,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,OAAO,SAAS,QAAQ,EAAE,QAAQ,YAAY,EAAE;AACtD,QAAM,UAAU,KAAK,SACjB,QAAQ,KAAK,MAAM,IACnB,QAAQ,UAAU,MAAM,GAAG,IAAI,IAAI,KAAK,MAAM,EAAE;AAEpD,MAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAY,SAAS,QAAQ,CAAC,WAAM,SAAS,OAAO,CAAC,MAAM;AAElG,MAAI;AACF,UAAM,WAAW,aAAa,UAAU,OAAO;AAG/C,UAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI,QAAQ,UAAU,MAAM,OAAO,SAAS;AAChG,UAAM,SAAgD,CAAC;AACvD,QAAI,KAAK,UAAU,WAAW,MAAM,GAAG;AACrC,YAAM,UAAkC;AAAA,QACtC,KAAK;AAAA,QAAa,KAAK;AAAA,QAAc,MAAM;AAAA,QAC3C,KAAK;AAAA,QAAa,KAAK;AAAA,MACzB;AACA,iBAAW,SAAS,YAAY,QAAQ,EAAE,eAAe,KAAK,CAAC,GAAG;AAChE,YAAI,CAAC,MAAM,OAAO,EAAG;AACrB,cAAM,QAAQ,MAAM;AACpB,cAAM,MAAM,QAAQ,KAAK,EAAE,MAAM,CAAC,EAAE,YAAY;AAChD,YAAI,CAAC,QAAQ,GAAG,EAAG;AACnB,cAAM,OAAO,aAAa,QAAQ,QAAQ,KAAK,CAAC;AAChD,eAAO,KAAK,EAAE,UAAU,OAAO,MAAM,IAAI,WAAW,IAAI,GAAG,UAAU,QAAQ,GAAG,EAAE,CAAC;AAAA,MACrF;AACA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,+BAAW,OAAO,MAAM;AAAA,CAAQ;AAAA,IACzE;AAEA,UAAM,WAAqB,CAAC;AAE5B,QAAI;AACJ,QAAI,KAAK,WAAW,QAAQ;AAC1B,UAAI,KAAK,YAAY,CAAC,KAAK,QAAQ;AACjC,gBAAQ,OAAO,MAAM;AAAA;AAAA,CAAiD;AAAA,MACxE;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,OAAU,CAAC;AAAA,IAC/F,OAAO;AACL,UAAI;AACJ,UAAI,KAAK,UAAU;AACjB,cAAM,UAAU,aAAa,QAAQ,KAAK,QAAQ,CAAC;AACnD,8BAAsB,QAAQ,OAAO,MAAM,QAAQ,YAAY,QAAQ,aAAa,QAAQ,UAAU;AAAA,MACxG;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,QAAW,oBAAoB,CAAC;AAAA,IACpH;AAEA,kBAAc,SAAS,OAAO,KAAK,GAAG,CAAC;AAEvC,QAAI,CAAC,KAAK,QAAQ;AAChB,cAAQ,OAAO,MAAM;AAAA,CAAO;AAC5B,cAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACvC,UAAI,SAAS,OAAQ,UAAS,QAAQ,OAAK,QAAQ,OAAO,MAAM,KAAK,CAAC;AAAA,CAAI,CAAC;AAAA,IAC7E;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,YAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,YAAQ,OAAO,MAAM,YAAO,cAAc,GAAG,CAAC;AAAA,CAAI;AAClD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAEH,QAAQ,MAAM;","names":[]}
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\n\nimport { readFileSync, writeFileSync, mkdirSync, statSync, existsSync, readdirSync } from \"fs\"\nimport { basename, resolve, extname } from \"path\"\nimport { Command } from \"commander\"\nimport { parse, detectFormat, markdownToHwpx, markdownToXlsx } from \"./index.js\"\nimport type { ParseOptions, OcrMode } from \"./types.js\"\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\n\nconst program = new Command()\n\n/** 공통 parse 옵션 타입 */\ninterface ParseOpts {\n output?: string\n outDir?: string\n pages?: string\n format: string\n headerFooter: boolean\n imageDir?: string\n silent?: boolean\n ocr?: string\n ocrJobs?: string\n}\n\n/** parse 액션 공통 구현 — 루트 커맨드와 `parse` 서브커맨드가 공유 */\nasync function runParse(files: string[], opts: ParseOpts) {\n const validFormats = [\"markdown\", \"json\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\n process.exit(1)\n }\n for (let fi = 0; fi < files.length; fi++) {\n const filePath = files[fi]\n const absPath = resolve(filePath)\n const fileName = basename(absPath)\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\n\n try {\n const fileSize = statSync(absPath).size\n if (fileSize > 500 * 1024 * 1024) {\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\n process.exitCode = 1\n continue\n }\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const format = detectFormat(arrayBuffer)\n\n if (!opts.silent) {\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\n }\n\n const parseOptions: ParseOptions = {}\n if (opts.pages) parseOptions.pages = opts.pages as string\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\n\n // OCR 모드: CLI 기본값 \"auto\" (라이브러리 API는 undefined 유지)\n const validOcrModes = [\"auto\", \"gemini\", \"claude\", \"codex\", \"ollama\", \"tesseract\", \"off\"]\n if (opts.ocr) {\n if (!validOcrModes.includes(opts.ocr)) {\n process.stderr.write(`[kordoc] 지원하지 않는 OCR 모드: ${opts.ocr}\\n`)\n process.stderr.write(` 사용 가능: ${validOcrModes.join(\", \")}\\n`)\n process.exit(1)\n }\n parseOptions.ocrMode = opts.ocr as OcrMode\n } else {\n parseOptions.ocrMode = \"auto\"\n }\n\n // OCR 병렬 처리 수 (--ocr-jobs)\n if (opts.ocrJobs) {\n const n = parseInt(opts.ocrJobs, 10)\n if (n > 0) parseOptions.ocrConcurrency = n\n }\n\n if (!opts.silent) {\n parseOptions.onProgress = (current: number, total: number) => {\n process.stderr.write(`\\r[kordoc] ${filePrefix}${fileName} (${format}) [${current}/${total}]`)\n }\n }\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${result.error}\\n`)\n process.exitCode = 1\n continue\n }\n\n if (!opts.silent) process.stderr.write(` OK\\n`)\n\n // 이미지 기반 PDF OCR 결과 표시\n if (!opts.silent && result.success && result.isImageBased) {\n process.stderr.write(` → 이미지 기반 PDF — OCR 처리됨\\n`)\n }\n\n // 경고 표시\n if (!opts.silent && result.success && result.warnings?.length) {\n for (const w of result.warnings) {\n process.stderr.write(` ⚠ ${w.message}\\n`)\n }\n }\n\n let markdown = result.markdown\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\n if (opts.outDir && result.images?.length) {\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"![image](images/image_\")\n }\n const output = opts.format === \"json\"\n ? JSON.stringify(result, null, 2)\n : markdown\n\n // 이미지 저장: 출력 MD 파일 기준 폴더 사용 (convert와 일치)\n const saveImages = (outFilePath: string) => {\n if (!result.images?.length) return\n const stem = basename(outFilePath).replace(/\\.[^.]+$/, \"\")\n const defaultDir = resolve(outFilePath, \"..\", stem + \"_images\")\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : defaultDir\n mkdirSync(imgDir, { recursive: true })\n for (const img of result.images) {\n writeFileSync(resolve(imgDir, img.filename), img.data)\n }\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\n }\n\n if (opts.output && files.length === 1) {\n writeFileSync(opts.output, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\n saveImages(resolve(opts.output))\n } else if (opts.outDir) {\n mkdirSync(opts.outDir, { recursive: true })\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\n saveImages(outPath)\n } else {\n process.stdout.write(output + \"\\n\")\n saveImages(absPath) // stdout 출력 시 입력 파일 기준\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\n process.exitCode = 1\n }\n }\n}\n\n/** 공통 parse 옵션 등록 헬퍼 */\nfunction addParseOptions(cmd: Command): Command {\n return cmd\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\n .option(\"--image-dir <dir>\", \"이미지 저장 폴더 (기본: 입력 파일명_images 폴더)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .option(\"--ocr <mode>\", \"OCR 모드: auto(기본), gemini, claude, codex, ollama, tesseract, off\")\n .option(\"--ocr-jobs <n>\", \"OCR 병렬 처리 수 (기본: CPU 코어 수, tesseract 전용)\")\n}\n\nprogram\n .enablePositionalOptions()\n .name(\"kordoc\")\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\n .version(VERSION)\n\n// `kordoc parse <files>` 서브커맨드 (권장)\naddParseOptions(\n program\n .command(\"parse\")\n .description(\"파일을 마크다운으로 파싱 (HWP, HWPX, PDF, XLSX, DOCX)\")\n .argument(\"<files...>\", \"변환할 파일 경로\")\n).action(runParse)\n\n// `kordoc <files>` 루트 커맨드 (하위 호환)\naddParseOptions(\n program\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\n).action(runParse)\n\nprogram\n .command(\"convert <input>\")\n .description(\"마크다운 파일을 HWPX 또는 XLSX로 변환\")\n .option(\"-f, --format <type>\", \"출력 포맷: hwpx | xlsx\", \"hwpx\")\n .option(\"-o, --output <path>\", \"출력 파일 경로 (기본: 입력명 + 포맷 확장자)\")\n .option(\"--image-dir <dir>\", \"이미지 폴더 경로 (기본: 입력 MD 파일명_images 폴더)\")\n .option(\"--images\", \"이미지 포함 (기본: 생략, 레이아웃 문제 방지)\")\n .option(\"--template <path>\", \"HWPX 템플릿 파일 경로 (hwpx 전용)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (input: string, opts) => {\n const validFormats = [\"hwpx\", \"xlsx\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 포맷: ${opts.format} (hwpx 또는 xlsx)\\n`)\n process.exit(1)\n }\n\n const absInput = resolve(input)\n if (!existsSync(absInput)) {\n process.stderr.write(`[kordoc] 파일을 찾을 수 없습니다: ${input}\\n`)\n process.exit(1)\n }\n\n const stem = basename(absInput).replace(/\\.[^.]+$/, \"\")\n const outPath = opts.output\n ? resolve(opts.output)\n : resolve(absInput, \"..\", `${stem}.${opts.format}`)\n\n if (!opts.silent) process.stderr.write(`[kordoc] ${basename(absInput)} → ${basename(outPath)} ...`)\n\n try {\n const markdown = readFileSync(absInput, \"utf-8\")\n\n // 이미지 폴더에서 이미지 로드 (--images 플래그 필요)\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : resolve(absInput, \"..\", stem + \"_images\")\n const images: import(\"./types.js\").ExtractedImage[] = []\n if (opts.images && existsSync(imgDir)) {\n const mimeMap: Record<string, string> = {\n png: \"image/png\", jpg: \"image/jpeg\", jpeg: \"image/jpeg\",\n gif: \"image/gif\", bmp: \"image/bmp\",\n }\n for (const entry of readdirSync(imgDir, { withFileTypes: true })) {\n if (!entry.isFile()) continue\n const fname = entry.name\n const ext = extname(fname).slice(1).toLowerCase()\n if (!mimeMap[ext]) continue\n const data = readFileSync(resolve(imgDir, fname))\n images.push({ filename: fname, data: new Uint8Array(data), mimeType: mimeMap[ext] })\n }\n if (!opts.silent) process.stderr.write(` → 이미지 ${images.length}개 로드\\n`)\n }\n\n const warnings: string[] = []\n\n let buf: ArrayBuffer\n if (opts.format === \"xlsx\") {\n if (opts.template && !opts.silent) {\n process.stderr.write(`\\n[kordoc] 경고: --template은 hwpx 전용입니다. 무시됩니다.\\n`)\n }\n buf = await markdownToXlsx(markdown, { warnings, images: images.length ? images : undefined })\n } else {\n let templateArrayBuffer: ArrayBuffer | undefined\n if (opts.template) {\n const tmplBuf = readFileSync(resolve(opts.template))\n templateArrayBuffer = tmplBuf.buffer.slice(tmplBuf.byteOffset, tmplBuf.byteOffset + tmplBuf.byteLength)\n }\n buf = await markdownToHwpx(markdown, { warnings, images: images.length ? images : undefined, templateArrayBuffer })\n }\n\n writeFileSync(outPath, Buffer.from(buf))\n\n if (!opts.silent) {\n process.stderr.write(` OK\\n`)\n process.stderr.write(` → ${outPath}\\n`)\n if (warnings.length) warnings.forEach(w => process.stderr.write(` ${w}\\n`))\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${sanitizeError(err)}\\n`)\n process.exit(1)\n }\n })\n\nprogram\n .command(\"watch <dir>\")\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (dir: string, opts) => {\n const { watchDirectory } = await import(\"./watch.js\")\n await watchDirectory({\n dir,\n outDir: opts.outDir,\n webhook: opts.webhook,\n format: opts.format,\n pages: opts.pages,\n silent: opts.silent,\n })\n })\n\nprogram.parse()\n"],"mappings":";;;;;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,UAAU,YAAY,mBAAmB;AAC1F,SAAS,UAAU,SAAS,eAAe;AAC3C,SAAS,eAAe;AAKxB,IAAM,UAAU,IAAI,QAAQ;AAgB5B,eAAe,SAAS,OAAiB,MAAiB;AACxD,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AAEA,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AAGnE,YAAM,gBAAgB,CAAC,QAAQ,UAAU,UAAU,SAAS,UAAU,aAAa,KAAK;AACxF,UAAI,KAAK,KAAK;AACZ,YAAI,CAAC,cAAc,SAAS,KAAK,GAAG,GAAG;AACrC,kBAAQ,OAAO,MAAM,oEAA4B,KAAK,GAAG;AAAA,CAAI;AAC7D,kBAAQ,OAAO,MAAM,gCAAY,cAAc,KAAK,IAAI,CAAC;AAAA,CAAI;AAC7D,kBAAQ,KAAK,CAAC;AAAA,QAChB;AACA,qBAAa,UAAU,KAAK;AAAA,MAC9B,OAAO;AACL,qBAAa,UAAU;AAAA,MACzB;AAGA,UAAI,KAAK,SAAS;AAChB,cAAM,IAAI,SAAS,KAAK,SAAS,EAAE;AACnC,YAAI,IAAI,EAAG,cAAa,iBAAiB;AAAA,MAC3C;AAEA,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,kBAAQ,OAAO,MAAM,cAAc,UAAU,GAAG,QAAQ,KAAK,MAAM,MAAM,OAAO,IAAI,KAAK,GAAG;AAAA,QAC9F;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAG9C,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,cAAc;AACzD,gBAAQ,OAAO,MAAM;AAAA,CAA4B;AAAA,MACnD;AAGA,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,UAAU,QAAQ;AAC7D,mBAAW,KAAK,OAAO,UAAU;AAC/B,kBAAQ,OAAO,MAAM,YAAO,EAAE,OAAO;AAAA,CAAI;AAAA,QAC3C;AAAA,MACF;AAEA,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,gBAAwB;AAC1C,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,OAAO,SAAS,WAAW,EAAE,QAAQ,YAAY,EAAE;AACzD,cAAM,aAAa,QAAQ,aAAa,MAAM,OAAO,SAAS;AAC9D,cAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI;AACxD,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,MAAM,CAAC;AAAA,MACjC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,OAAO;AAAA,MACpB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAClC,mBAAW,OAAO;AAAA,MACpB;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF;AAGA,SAAS,gBAAgB,KAAuB;AAC9C,SAAO,IACJ,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,qBAAqB,kHAAkC,EAC9D,OAAO,YAAY,oDAAY,EAC/B,OAAO,gBAAgB,qFAAiE,EACxF,OAAO,kBAAkB,sGAA0C;AACxE;AAEA,QACG,wBAAwB,EACxB,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO;AAGlB;AAAA,EACE,QACG,QAAQ,OAAO,EACf,YAAY,mGAA4C,EACxD,SAAS,cAAc,8CAAW;AACvC,EAAE,OAAO,QAAQ;AAGjB;AAAA,EACE,QACG,SAAS,cAAc,2EAAwC;AACpE,EAAE,OAAO,QAAQ;AAEjB,QACG,QAAQ,iBAAiB,EACzB,YAAY,uFAA2B,EACvC,OAAO,uBAAuB,0CAAsB,MAAM,EAC1D,OAAO,uBAAuB,6GAA6B,EAC3D,OAAO,qBAAqB,qHAAqC,EACjE,OAAO,YAAY,kHAA6B,EAChD,OAAO,qBAAqB,uEAA0B,EACtD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAe,SAAS;AACrC,QAAM,eAAe,CAAC,QAAQ,MAAM;AACpC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAmB;AAC3E,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,WAAW,QAAQ,KAAK;AAC9B,MAAI,CAAC,WAAW,QAAQ,GAAG;AACzB,YAAQ,OAAO,MAAM,6EAA2B,KAAK;AAAA,CAAI;AACzD,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,OAAO,SAAS,QAAQ,EAAE,QAAQ,YAAY,EAAE;AACtD,QAAM,UAAU,KAAK,SACjB,QAAQ,KAAK,MAAM,IACnB,QAAQ,UAAU,MAAM,GAAG,IAAI,IAAI,KAAK,MAAM,EAAE;AAEpD,MAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAY,SAAS,QAAQ,CAAC,WAAM,SAAS,OAAO,CAAC,MAAM;AAElG,MAAI;AACF,UAAM,WAAW,aAAa,UAAU,OAAO;AAG/C,UAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI,QAAQ,UAAU,MAAM,OAAO,SAAS;AAChG,UAAM,SAAgD,CAAC;AACvD,QAAI,KAAK,UAAU,WAAW,MAAM,GAAG;AACrC,YAAM,UAAkC;AAAA,QACtC,KAAK;AAAA,QAAa,KAAK;AAAA,QAAc,MAAM;AAAA,QAC3C,KAAK;AAAA,QAAa,KAAK;AAAA,MACzB;AACA,iBAAW,SAAS,YAAY,QAAQ,EAAE,eAAe,KAAK,CAAC,GAAG;AAChE,YAAI,CAAC,MAAM,OAAO,EAAG;AACrB,cAAM,QAAQ,MAAM;AACpB,cAAM,MAAM,QAAQ,KAAK,EAAE,MAAM,CAAC,EAAE,YAAY;AAChD,YAAI,CAAC,QAAQ,GAAG,EAAG;AACnB,cAAM,OAAO,aAAa,QAAQ,QAAQ,KAAK,CAAC;AAChD,eAAO,KAAK,EAAE,UAAU,OAAO,MAAM,IAAI,WAAW,IAAI,GAAG,UAAU,QAAQ,GAAG,EAAE,CAAC;AAAA,MACrF;AACA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,+BAAW,OAAO,MAAM;AAAA,CAAQ;AAAA,IACzE;AAEA,UAAM,WAAqB,CAAC;AAE5B,QAAI;AACJ,QAAI,KAAK,WAAW,QAAQ;AAC1B,UAAI,KAAK,YAAY,CAAC,KAAK,QAAQ;AACjC,gBAAQ,OAAO,MAAM;AAAA;AAAA,CAAiD;AAAA,MACxE;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,OAAU,CAAC;AAAA,IAC/F,OAAO;AACL,UAAI;AACJ,UAAI,KAAK,UAAU;AACjB,cAAM,UAAU,aAAa,QAAQ,KAAK,QAAQ,CAAC;AACnD,8BAAsB,QAAQ,OAAO,MAAM,QAAQ,YAAY,QAAQ,aAAa,QAAQ,UAAU;AAAA,MACxG;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,QAAW,oBAAoB,CAAC;AAAA,IACpH;AAEA,kBAAc,SAAS,OAAO,KAAK,GAAG,CAAC;AAEvC,QAAI,CAAC,KAAK,QAAQ;AAChB,cAAQ,OAAO,MAAM;AAAA,CAAO;AAC5B,cAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACvC,UAAI,SAAS,OAAQ,UAAS,QAAQ,OAAK,QAAQ,OAAO,MAAM,KAAK,CAAC;AAAA,CAAI,CAAC;AAAA,IAC7E;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,YAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,YAAQ,OAAO,MAAM,YAAO,cAAc,GAAG,CAAC;AAAA,CAAI;AAClD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAEH,QAAQ,MAAM;","names":[]}
package/dist/index.cjs CHANGED
@@ -2156,6 +2156,7 @@ var init_cli_provider = __esm({
2156
2156
  // src/ocr/tesseract-provider.ts
2157
2157
  var tesseract_provider_exports = {};
2158
2158
  __export(tesseract_provider_exports, {
2159
+ createTesseractPoolProvider: () => createTesseractPoolProvider,
2159
2160
  createTesseractProvider: () => createTesseractProvider
2160
2161
  });
2161
2162
  async function createTesseractProvider() {
@@ -2173,6 +2174,37 @@ async function createTesseractProvider() {
2173
2174
  };
2174
2175
  return provider;
2175
2176
  }
2177
+ async function createTesseractPoolProvider(concurrency) {
2178
+ const workers = await Promise.all(
2179
+ Array.from({ length: concurrency }, () => (0, import_tesseract.createWorker)("kor+eng"))
2180
+ );
2181
+ const idle = [...workers];
2182
+ const waitQueue = [];
2183
+ function acquire() {
2184
+ if (idle.length > 0) return Promise.resolve(idle.pop());
2185
+ return new Promise((resolve) => waitQueue.push(resolve));
2186
+ }
2187
+ function release(w) {
2188
+ if (waitQueue.length > 0) {
2189
+ waitQueue.shift()(w);
2190
+ } else {
2191
+ idle.push(w);
2192
+ }
2193
+ }
2194
+ const provider = async (pageImage, _pageNumber, _mimeType) => {
2195
+ const w = await acquire();
2196
+ try {
2197
+ const { data } = await w.recognize(pageImage);
2198
+ return data.text;
2199
+ } finally {
2200
+ release(w);
2201
+ }
2202
+ };
2203
+ provider.terminate = async () => {
2204
+ await Promise.all(workers.map((w) => w.terminate()));
2205
+ };
2206
+ return provider;
2207
+ }
2176
2208
  var import_tesseract;
2177
2209
  var init_tesseract_provider = __esm({
2178
2210
  "src/ocr/tesseract-provider.ts"() {
@@ -2186,14 +2218,17 @@ var resolve_exports = {};
2186
2218
  __export(resolve_exports, {
2187
2219
  resolveOcrProvider: () => resolveOcrProvider
2188
2220
  });
2189
- async function resolveOcrProvider(mode, warnings) {
2221
+ async function resolveOcrProvider(mode, warnings, concurrency) {
2190
2222
  if (mode === "off") {
2191
2223
  throw new Error("OCR\uC774 \uBE44\uD65C\uC131\uD654\uB418\uC5B4 \uC788\uC2B5\uB2C8\uB2E4 (--ocr off).");
2192
2224
  }
2193
2225
  if (mode !== "auto") {
2194
2226
  validateOcrMode(mode);
2195
2227
  if (mode === "tesseract") {
2196
- const { createTesseractProvider: createTesseractProvider2 } = await Promise.resolve().then(() => (init_tesseract_provider(), tesseract_provider_exports));
2228
+ const { createTesseractProvider: createTesseractProvider2, createTesseractPoolProvider: createTesseractPoolProvider2 } = await Promise.resolve().then(() => (init_tesseract_provider(), tesseract_provider_exports));
2229
+ if (concurrency && concurrency > 1) {
2230
+ return createTesseractPoolProvider2(concurrency);
2231
+ }
2197
2232
  return createTesseractProvider2();
2198
2233
  }
2199
2234
  return createCliOcrProvider(mode);
@@ -2213,7 +2248,10 @@ async function resolveOcrProvider(mode, warnings) {
2213
2248
  }
2214
2249
  }
2215
2250
  if (detected === "tesseract") {
2216
- const { createTesseractProvider: createTesseractProvider2 } = await Promise.resolve().then(() => (init_tesseract_provider(), tesseract_provider_exports));
2251
+ const { createTesseractProvider: createTesseractProvider2, createTesseractPoolProvider: createTesseractPoolProvider2 } = await Promise.resolve().then(() => (init_tesseract_provider(), tesseract_provider_exports));
2252
+ if (concurrency && concurrency > 1) {
2253
+ return createTesseractPoolProvider2(concurrency);
2254
+ }
2217
2255
  return createTesseractProvider2();
2218
2256
  }
2219
2257
  return createCliOcrProvider(detected);
@@ -2351,32 +2389,77 @@ var provider_exports = {};
2351
2389
  __export(provider_exports, {
2352
2390
  ocrPages: () => ocrPages
2353
2391
  });
2354
- async function ocrPages(doc, provider, pageFilter, effectivePageCount, warnings) {
2392
+ async function runWithConcurrency(tasks, limit) {
2393
+ const results = new Array(tasks.length);
2394
+ let nextIndex = 0;
2395
+ async function worker() {
2396
+ while (nextIndex < tasks.length) {
2397
+ const idx = nextIndex++;
2398
+ results[idx] = await tasks[idx]();
2399
+ }
2400
+ }
2401
+ await Promise.all(Array.from({ length: Math.min(limit, tasks.length) }, () => worker()));
2402
+ return results;
2403
+ }
2404
+ function ocrResultToBlocks(result, pageNum) {
2405
+ const pageBlocks = [];
2406
+ if (typeof result === "string") {
2407
+ if (result.trim()) {
2408
+ pageBlocks.push({ type: "paragraph", text: result.trim(), pageNumber: pageNum });
2409
+ }
2410
+ } else if (result && typeof result === "object" && "markdown" in result) {
2411
+ const structured = result;
2412
+ if (structured.markdown.trim()) {
2413
+ const converted = markdownToBlocks(structured.markdown, pageNum);
2414
+ for (const b of converted) pageBlocks.push(b);
2415
+ }
2416
+ }
2417
+ return pageBlocks;
2418
+ }
2419
+ async function ocrPages(doc, provider, pageFilter, effectivePageCount, warnings, concurrency = 1) {
2355
2420
  const blocks = [];
2421
+ if (concurrency <= 1) {
2422
+ for (let i = 1; i <= effectivePageCount; i++) {
2423
+ if (pageFilter && !pageFilter.has(i)) continue;
2424
+ const page = await doc.getPage(i);
2425
+ try {
2426
+ const imageData = await renderPageToPng(page);
2427
+ const result = await provider(imageData, i, "image/png");
2428
+ for (const b of ocrResultToBlocks(result, i)) blocks.push(b);
2429
+ } catch (err) {
2430
+ warnings?.push({
2431
+ page: i,
2432
+ message: `\uD398\uC774\uC9C0 ${i} OCR \uC2E4\uD328: ${err instanceof Error ? err.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`,
2433
+ code: "OCR_PAGE_FAILED"
2434
+ });
2435
+ }
2436
+ }
2437
+ return blocks;
2438
+ }
2439
+ const pageNumbers = [];
2356
2440
  for (let i = 1; i <= effectivePageCount; i++) {
2357
2441
  if (pageFilter && !pageFilter.has(i)) continue;
2358
- const page = await doc.getPage(i);
2442
+ pageNumbers.push(i);
2443
+ }
2444
+ const tasks = pageNumbers.map((pageNum) => async () => {
2359
2445
  try {
2446
+ const page = await doc.getPage(pageNum);
2360
2447
  const imageData = await renderPageToPng(page);
2361
- const result = await provider(imageData, i, "image/png");
2362
- if (typeof result === "string") {
2363
- if (result.trim()) {
2364
- blocks.push({ type: "paragraph", text: result.trim(), pageNumber: i });
2365
- }
2366
- } else if (result && typeof result === "object" && "markdown" in result) {
2367
- const structured = result;
2368
- if (structured.markdown.trim()) {
2369
- const pageBlocks = markdownToBlocks(structured.markdown, i);
2370
- for (const b of pageBlocks) blocks.push(b);
2371
- }
2372
- }
2448
+ const result = await provider(imageData, pageNum, "image/png");
2449
+ return { pageNum, pageBlocks: ocrResultToBlocks(result, pageNum) };
2373
2450
  } catch (err) {
2374
2451
  warnings?.push({
2375
- page: i,
2376
- message: `\uD398\uC774\uC9C0 ${i} OCR \uC2E4\uD328: ${err instanceof Error ? err.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`,
2452
+ page: pageNum,
2453
+ message: `\uD398\uC774\uC9C0 ${pageNum} OCR \uC2E4\uD328: ${err instanceof Error ? err.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`,
2377
2454
  code: "OCR_PAGE_FAILED"
2378
2455
  });
2456
+ return null;
2379
2457
  }
2458
+ });
2459
+ const taskResults = await runWithConcurrency(tasks, concurrency);
2460
+ for (const item of taskResults) {
2461
+ if (!item) continue;
2462
+ for (const b of item.pageBlocks) blocks.push(b);
2380
2463
  }
2381
2464
  return blocks;
2382
2465
  }
@@ -2468,7 +2551,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
2468
2551
  var import_xmldom = require("@xmldom/xmldom");
2469
2552
 
2470
2553
  // src/utils.ts
2471
- var VERSION = true ? "2.2.7" : "0.0.0-dev";
2554
+ var VERSION = true ? "2.2.8" : "0.0.0-dev";
2472
2555
  function toArrayBuffer(buf) {
2473
2556
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
2474
2557
  return buf.buffer;
@@ -5985,7 +6068,8 @@ async function parsePdfDocument(buffer, options) {
5985
6068
  if (!ocrProvider && ocrMode && ocrMode !== "off") {
5986
6069
  try {
5987
6070
  const { resolveOcrProvider: resolveOcrProvider2 } = await Promise.resolve().then(() => (init_resolve(), resolve_exports));
5988
- ocrProvider = await resolveOcrProvider2(ocrMode, warnings);
6071
+ const concurrency = options?.ocrConcurrency ?? 1;
6072
+ ocrProvider = await resolveOcrProvider2(ocrMode, warnings, concurrency);
5989
6073
  } catch (resolveErr) {
5990
6074
  if (ocrMode !== "auto") {
5991
6075
  throw Object.assign(
@@ -5998,7 +6082,8 @@ async function parsePdfDocument(buffer, options) {
5998
6082
  if (ocrProvider) {
5999
6083
  try {
6000
6084
  const { ocrPages: ocrPages2 } = await Promise.resolve().then(() => (init_provider(), provider_exports));
6001
- const ocrBlocks = await ocrPages2(doc, ocrProvider, pageFilter, effectivePageCount, warnings);
6085
+ const concurrency = options?.ocrConcurrency ?? 1;
6086
+ const ocrBlocks = await ocrPages2(doc, ocrProvider, pageFilter, effectivePageCount, warnings, concurrency);
6002
6087
  if (ocrBlocks.length > 0) {
6003
6088
  const ocrMarkdown = blocksToMarkdown(ocrBlocks);
6004
6089
  return {