@clazic/kordoc 2.2.8 → 2.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -4,17 +4,18 @@ import {
4
4
  markdownToHwpx,
5
5
  markdownToXlsx,
6
6
  parse
7
- } from "./chunk-FC6BQOWD.js";
7
+ } from "./chunk-OL2NDK3E.js";
8
8
  import {
9
9
  VERSION,
10
10
  toArrayBuffer
11
- } from "./chunk-6KLTURMA.js";
11
+ } from "./chunk-FF5M4SDK.js";
12
12
  import "./chunk-MOL7MDBG.js";
13
13
  import "./chunk-ZWE3DS7E.js";
14
14
 
15
15
  // src/cli.ts
16
16
  import { readFileSync, writeFileSync, mkdirSync, statSync, existsSync, readdirSync } from "fs";
17
17
  import { basename, resolve, extname } from "path";
18
+ import { cpus } from "os";
18
19
  import { Command } from "commander";
19
20
  var program = new Command();
20
21
  async function runParse(files, opts) {
@@ -63,6 +64,8 @@ async function runParse(files, opts) {
63
64
  if (opts.ocrJobs) {
64
65
  const n = parseInt(opts.ocrJobs, 10);
65
66
  if (n > 0) parseOptions.ocrConcurrency = n;
67
+ } else if (parseOptions.ocrMode === "tesseract") {
68
+ parseOptions.ocrConcurrency = cpus().length;
66
69
  }
67
70
  if (!opts.silent) {
68
71
  parseOptions.onProgress = (current, total) => {
@@ -125,7 +128,7 @@ async function runParse(files, opts) {
125
128
  saveImages(absPath);
126
129
  }
127
130
  } catch (err) {
128
- const { sanitizeError } = await import("./utils-JRBHPKTC.js");
131
+ const { sanitizeError } = await import("./utils-CU26KLDC.js");
129
132
  process.stderr.write(`
130
133
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
131
134
  `);
@@ -209,7 +212,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
209
212
  `));
210
213
  }
211
214
  } catch (err) {
212
- const { sanitizeError } = await import("./utils-JRBHPKTC.js");
215
+ const { sanitizeError } = await import("./utils-CU26KLDC.js");
213
216
  process.stderr.write(` FAIL
214
217
  `);
215
218
  process.stderr.write(` \u2192 ${sanitizeError(err)}
@@ -218,7 +221,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
218
221
  }
219
222
  });
220
223
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
221
- const { watchDirectory } = await import("./watch-JANDW746.js");
224
+ const { watchDirectory } = await import("./watch-Z6SH4KRB.js");
222
225
  await watchDirectory({
223
226
  dir,
224
227
  outDir: opts.outDir,
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\n\nimport { readFileSync, writeFileSync, mkdirSync, statSync, existsSync, readdirSync } from \"fs\"\nimport { basename, resolve, extname } from \"path\"\nimport { Command } from \"commander\"\nimport { parse, detectFormat, markdownToHwpx, markdownToXlsx } from \"./index.js\"\nimport type { ParseOptions, OcrMode } from \"./types.js\"\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\n\nconst program = new Command()\n\n/** 공통 parse 옵션 타입 */\ninterface ParseOpts {\n output?: string\n outDir?: string\n pages?: string\n format: string\n headerFooter: boolean\n imageDir?: string\n silent?: boolean\n ocr?: string\n ocrJobs?: string\n}\n\n/** parse 액션 공통 구현 — 루트 커맨드와 `parse` 서브커맨드가 공유 */\nasync function runParse(files: string[], opts: ParseOpts) {\n const validFormats = [\"markdown\", \"json\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\n process.exit(1)\n }\n for (let fi = 0; fi < files.length; fi++) {\n const filePath = files[fi]\n const absPath = resolve(filePath)\n const fileName = basename(absPath)\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\n\n try {\n const fileSize = statSync(absPath).size\n if (fileSize > 500 * 1024 * 1024) {\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\n process.exitCode = 1\n continue\n }\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const format = detectFormat(arrayBuffer)\n\n if (!opts.silent) {\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\n }\n\n const parseOptions: ParseOptions = {}\n if (opts.pages) parseOptions.pages = opts.pages as string\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\n\n // OCR 모드: CLI 기본값 \"auto\" (라이브러리 API는 undefined 유지)\n const validOcrModes = [\"auto\", \"gemini\", \"claude\", \"codex\", \"ollama\", \"tesseract\", \"off\"]\n if (opts.ocr) {\n if (!validOcrModes.includes(opts.ocr)) {\n process.stderr.write(`[kordoc] 지원하지 않는 OCR 모드: ${opts.ocr}\\n`)\n process.stderr.write(` 사용 가능: ${validOcrModes.join(\", \")}\\n`)\n process.exit(1)\n }\n parseOptions.ocrMode = opts.ocr as OcrMode\n } else {\n parseOptions.ocrMode = \"auto\"\n }\n\n // OCR 병렬 처리 수 (--ocr-jobs)\n if (opts.ocrJobs) {\n const n = parseInt(opts.ocrJobs, 10)\n if (n > 0) parseOptions.ocrConcurrency = n\n }\n\n if (!opts.silent) {\n parseOptions.onProgress = (current: number, total: number) => {\n process.stderr.write(`\\r[kordoc] ${filePrefix}${fileName} (${format}) [${current}/${total}]`)\n }\n }\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${result.error}\\n`)\n process.exitCode = 1\n continue\n }\n\n if (!opts.silent) process.stderr.write(` OK\\n`)\n\n // 이미지 기반 PDF OCR 결과 표시\n if (!opts.silent && result.success && result.isImageBased) {\n process.stderr.write(` → 이미지 기반 PDF — OCR 처리됨\\n`)\n }\n\n // 경고 표시\n if (!opts.silent && result.success && result.warnings?.length) {\n for (const w of result.warnings) {\n process.stderr.write(` ⚠ ${w.message}\\n`)\n }\n }\n\n let markdown = result.markdown\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\n if (opts.outDir && result.images?.length) {\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"![image](images/image_\")\n }\n const output = opts.format === \"json\"\n ? JSON.stringify(result, null, 2)\n : markdown\n\n // 이미지 저장: 출력 MD 파일 기준 폴더 사용 (convert와 일치)\n const saveImages = (outFilePath: string) => {\n if (!result.images?.length) return\n const stem = basename(outFilePath).replace(/\\.[^.]+$/, \"\")\n const defaultDir = resolve(outFilePath, \"..\", stem + \"_images\")\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : defaultDir\n mkdirSync(imgDir, { recursive: true })\n for (const img of result.images) {\n writeFileSync(resolve(imgDir, img.filename), img.data)\n }\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\n }\n\n if (opts.output && files.length === 1) {\n writeFileSync(opts.output, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\n saveImages(resolve(opts.output))\n } else if (opts.outDir) {\n mkdirSync(opts.outDir, { recursive: true })\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\n saveImages(outPath)\n } else {\n process.stdout.write(output + \"\\n\")\n saveImages(absPath) // stdout 출력 시 입력 파일 기준\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\n process.exitCode = 1\n }\n }\n}\n\n/** 공통 parse 옵션 등록 헬퍼 */\nfunction addParseOptions(cmd: Command): Command {\n return cmd\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\n .option(\"--image-dir <dir>\", \"이미지 저장 폴더 (기본: 입력 파일명_images 폴더)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .option(\"--ocr <mode>\", \"OCR 모드: auto(기본), gemini, claude, codex, ollama, tesseract, off\")\n .option(\"--ocr-jobs <n>\", \"OCR 병렬 처리 수 (기본: CPU 코어 수, tesseract 전용)\")\n}\n\nprogram\n .enablePositionalOptions()\n .name(\"kordoc\")\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\n .version(VERSION)\n\n// `kordoc parse <files>` 서브커맨드 (권장)\naddParseOptions(\n program\n .command(\"parse\")\n .description(\"파일을 마크다운으로 파싱 (HWP, HWPX, PDF, XLSX, DOCX)\")\n .argument(\"<files...>\", \"변환할 파일 경로\")\n).action(runParse)\n\n// `kordoc <files>` 루트 커맨드 (하위 호환)\naddParseOptions(\n program\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\n).action(runParse)\n\nprogram\n .command(\"convert <input>\")\n .description(\"마크다운 파일을 HWPX 또는 XLSX로 변환\")\n .option(\"-f, --format <type>\", \"출력 포맷: hwpx | xlsx\", \"hwpx\")\n .option(\"-o, --output <path>\", \"출력 파일 경로 (기본: 입력명 + 포맷 확장자)\")\n .option(\"--image-dir <dir>\", \"이미지 폴더 경로 (기본: 입력 MD 파일명_images 폴더)\")\n .option(\"--images\", \"이미지 포함 (기본: 생략, 레이아웃 문제 방지)\")\n .option(\"--template <path>\", \"HWPX 템플릿 파일 경로 (hwpx 전용)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (input: string, opts) => {\n const validFormats = [\"hwpx\", \"xlsx\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 포맷: ${opts.format} (hwpx 또는 xlsx)\\n`)\n process.exit(1)\n }\n\n const absInput = resolve(input)\n if (!existsSync(absInput)) {\n process.stderr.write(`[kordoc] 파일을 찾을 수 없습니다: ${input}\\n`)\n process.exit(1)\n }\n\n const stem = basename(absInput).replace(/\\.[^.]+$/, \"\")\n const outPath = opts.output\n ? resolve(opts.output)\n : resolve(absInput, \"..\", `${stem}.${opts.format}`)\n\n if (!opts.silent) process.stderr.write(`[kordoc] ${basename(absInput)} → ${basename(outPath)} ...`)\n\n try {\n const markdown = readFileSync(absInput, \"utf-8\")\n\n // 이미지 폴더에서 이미지 로드 (--images 플래그 필요)\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : resolve(absInput, \"..\", stem + \"_images\")\n const images: import(\"./types.js\").ExtractedImage[] = []\n if (opts.images && existsSync(imgDir)) {\n const mimeMap: Record<string, string> = {\n png: \"image/png\", jpg: \"image/jpeg\", jpeg: \"image/jpeg\",\n gif: \"image/gif\", bmp: \"image/bmp\",\n }\n for (const entry of readdirSync(imgDir, { withFileTypes: true })) {\n if (!entry.isFile()) continue\n const fname = entry.name\n const ext = extname(fname).slice(1).toLowerCase()\n if (!mimeMap[ext]) continue\n const data = readFileSync(resolve(imgDir, fname))\n images.push({ filename: fname, data: new Uint8Array(data), mimeType: mimeMap[ext] })\n }\n if (!opts.silent) process.stderr.write(` → 이미지 ${images.length}개 로드\\n`)\n }\n\n const warnings: string[] = []\n\n let buf: ArrayBuffer\n if (opts.format === \"xlsx\") {\n if (opts.template && !opts.silent) {\n process.stderr.write(`\\n[kordoc] 경고: --template은 hwpx 전용입니다. 무시됩니다.\\n`)\n }\n buf = await markdownToXlsx(markdown, { warnings, images: images.length ? images : undefined })\n } else {\n let templateArrayBuffer: ArrayBuffer | undefined\n if (opts.template) {\n const tmplBuf = readFileSync(resolve(opts.template))\n templateArrayBuffer = tmplBuf.buffer.slice(tmplBuf.byteOffset, tmplBuf.byteOffset + tmplBuf.byteLength)\n }\n buf = await markdownToHwpx(markdown, { warnings, images: images.length ? images : undefined, templateArrayBuffer })\n }\n\n writeFileSync(outPath, Buffer.from(buf))\n\n if (!opts.silent) {\n process.stderr.write(` OK\\n`)\n process.stderr.write(` → ${outPath}\\n`)\n if (warnings.length) warnings.forEach(w => process.stderr.write(` ${w}\\n`))\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${sanitizeError(err)}\\n`)\n process.exit(1)\n }\n })\n\nprogram\n .command(\"watch <dir>\")\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (dir: string, opts) => {\n const { watchDirectory } = await import(\"./watch.js\")\n await watchDirectory({\n dir,\n outDir: opts.outDir,\n webhook: opts.webhook,\n format: opts.format,\n pages: opts.pages,\n silent: opts.silent,\n })\n })\n\nprogram.parse()\n"],"mappings":";;;;;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,UAAU,YAAY,mBAAmB;AAC1F,SAAS,UAAU,SAAS,eAAe;AAC3C,SAAS,eAAe;AAKxB,IAAM,UAAU,IAAI,QAAQ;AAgB5B,eAAe,SAAS,OAAiB,MAAiB;AACxD,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AAEA,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AAGnE,YAAM,gBAAgB,CAAC,QAAQ,UAAU,UAAU,SAAS,UAAU,aAAa,KAAK;AACxF,UAAI,KAAK,KAAK;AACZ,YAAI,CAAC,cAAc,SAAS,KAAK,GAAG,GAAG;AACrC,kBAAQ,OAAO,MAAM,oEAA4B,KAAK,GAAG;AAAA,CAAI;AAC7D,kBAAQ,OAAO,MAAM,gCAAY,cAAc,KAAK,IAAI,CAAC;AAAA,CAAI;AAC7D,kBAAQ,KAAK,CAAC;AAAA,QAChB;AACA,qBAAa,UAAU,KAAK;AAAA,MAC9B,OAAO;AACL,qBAAa,UAAU;AAAA,MACzB;AAGA,UAAI,KAAK,SAAS;AAChB,cAAM,IAAI,SAAS,KAAK,SAAS,EAAE;AACnC,YAAI,IAAI,EAAG,cAAa,iBAAiB;AAAA,MAC3C;AAEA,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,kBAAQ,OAAO,MAAM,cAAc,UAAU,GAAG,QAAQ,KAAK,MAAM,MAAM,OAAO,IAAI,KAAK,GAAG;AAAA,QAC9F;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAG9C,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,cAAc;AACzD,gBAAQ,OAAO,MAAM;AAAA,CAA4B;AAAA,MACnD;AAGA,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,UAAU,QAAQ;AAC7D,mBAAW,KAAK,OAAO,UAAU;AAC/B,kBAAQ,OAAO,MAAM,YAAO,EAAE,OAAO;AAAA,CAAI;AAAA,QAC3C;AAAA,MACF;AAEA,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,gBAAwB;AAC1C,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,OAAO,SAAS,WAAW,EAAE,QAAQ,YAAY,EAAE;AACzD,cAAM,aAAa,QAAQ,aAAa,MAAM,OAAO,SAAS;AAC9D,cAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI;AACxD,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,MAAM,CAAC;AAAA,MACjC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,OAAO;AAAA,MACpB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAClC,mBAAW,OAAO;AAAA,MACpB;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF;AAGA,SAAS,gBAAgB,KAAuB;AAC9C,SAAO,IACJ,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,qBAAqB,kHAAkC,EAC9D,OAAO,YAAY,oDAAY,EAC/B,OAAO,gBAAgB,qFAAiE,EACxF,OAAO,kBAAkB,sGAA0C;AACxE;AAEA,QACG,wBAAwB,EACxB,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO;AAGlB;AAAA,EACE,QACG,QAAQ,OAAO,EACf,YAAY,mGAA4C,EACxD,SAAS,cAAc,8CAAW;AACvC,EAAE,OAAO,QAAQ;AAGjB;AAAA,EACE,QACG,SAAS,cAAc,2EAAwC;AACpE,EAAE,OAAO,QAAQ;AAEjB,QACG,QAAQ,iBAAiB,EACzB,YAAY,uFAA2B,EACvC,OAAO,uBAAuB,0CAAsB,MAAM,EAC1D,OAAO,uBAAuB,6GAA6B,EAC3D,OAAO,qBAAqB,qHAAqC,EACjE,OAAO,YAAY,kHAA6B,EAChD,OAAO,qBAAqB,uEAA0B,EACtD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAe,SAAS;AACrC,QAAM,eAAe,CAAC,QAAQ,MAAM;AACpC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAmB;AAC3E,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,WAAW,QAAQ,KAAK;AAC9B,MAAI,CAAC,WAAW,QAAQ,GAAG;AACzB,YAAQ,OAAO,MAAM,6EAA2B,KAAK;AAAA,CAAI;AACzD,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,OAAO,SAAS,QAAQ,EAAE,QAAQ,YAAY,EAAE;AACtD,QAAM,UAAU,KAAK,SACjB,QAAQ,KAAK,MAAM,IACnB,QAAQ,UAAU,MAAM,GAAG,IAAI,IAAI,KAAK,MAAM,EAAE;AAEpD,MAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAY,SAAS,QAAQ,CAAC,WAAM,SAAS,OAAO,CAAC,MAAM;AAElG,MAAI;AACF,UAAM,WAAW,aAAa,UAAU,OAAO;AAG/C,UAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI,QAAQ,UAAU,MAAM,OAAO,SAAS;AAChG,UAAM,SAAgD,CAAC;AACvD,QAAI,KAAK,UAAU,WAAW,MAAM,GAAG;AACrC,YAAM,UAAkC;AAAA,QACtC,KAAK;AAAA,QAAa,KAAK;AAAA,QAAc,MAAM;AAAA,QAC3C,KAAK;AAAA,QAAa,KAAK;AAAA,MACzB;AACA,iBAAW,SAAS,YAAY,QAAQ,EAAE,eAAe,KAAK,CAAC,GAAG;AAChE,YAAI,CAAC,MAAM,OAAO,EAAG;AACrB,cAAM,QAAQ,MAAM;AACpB,cAAM,MAAM,QAAQ,KAAK,EAAE,MAAM,CAAC,EAAE,YAAY;AAChD,YAAI,CAAC,QAAQ,GAAG,EAAG;AACnB,cAAM,OAAO,aAAa,QAAQ,QAAQ,KAAK,CAAC;AAChD,eAAO,KAAK,EAAE,UAAU,OAAO,MAAM,IAAI,WAAW,IAAI,GAAG,UAAU,QAAQ,GAAG,EAAE,CAAC;AAAA,MACrF;AACA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,+BAAW,OAAO,MAAM;AAAA,CAAQ;AAAA,IACzE;AAEA,UAAM,WAAqB,CAAC;AAE5B,QAAI;AACJ,QAAI,KAAK,WAAW,QAAQ;AAC1B,UAAI,KAAK,YAAY,CAAC,KAAK,QAAQ;AACjC,gBAAQ,OAAO,MAAM;AAAA;AAAA,CAAiD;AAAA,MACxE;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,OAAU,CAAC;AAAA,IAC/F,OAAO;AACL,UAAI;AACJ,UAAI,KAAK,UAAU;AACjB,cAAM,UAAU,aAAa,QAAQ,KAAK,QAAQ,CAAC;AACnD,8BAAsB,QAAQ,OAAO,MAAM,QAAQ,YAAY,QAAQ,aAAa,QAAQ,UAAU;AAAA,MACxG;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,QAAW,oBAAoB,CAAC;AAAA,IACpH;AAEA,kBAAc,SAAS,OAAO,KAAK,GAAG,CAAC;AAEvC,QAAI,CAAC,KAAK,QAAQ;AAChB,cAAQ,OAAO,MAAM;AAAA,CAAO;AAC5B,cAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACvC,UAAI,SAAS,OAAQ,UAAS,QAAQ,OAAK,QAAQ,OAAO,MAAM,KAAK,CAAC;AAAA,CAAI,CAAC;AAAA,IAC7E;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,YAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,YAAQ,OAAO,MAAM,YAAO,cAAc,GAAG,CAAC;AAAA,CAAI;AAClD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAEH,QAAQ,MAAM;","names":[]}
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\n\nimport { readFileSync, writeFileSync, mkdirSync, statSync, existsSync, readdirSync } from \"fs\"\nimport { basename, resolve, extname } from \"path\"\nimport { cpus } from \"os\"\nimport { Command } from \"commander\"\nimport { parse, detectFormat, markdownToHwpx, markdownToXlsx } from \"./index.js\"\nimport type { ParseOptions, OcrMode } from \"./types.js\"\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\n\nconst program = new Command()\n\n/** 공통 parse 옵션 타입 */\ninterface ParseOpts {\n output?: string\n outDir?: string\n pages?: string\n format: string\n headerFooter: boolean\n imageDir?: string\n silent?: boolean\n ocr?: string\n ocrJobs?: string\n}\n\n/** parse 액션 공통 구현 — 루트 커맨드와 `parse` 서브커맨드가 공유 */\nasync function runParse(files: string[], opts: ParseOpts) {\n const validFormats = [\"markdown\", \"json\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\n process.exit(1)\n }\n for (let fi = 0; fi < files.length; fi++) {\n const filePath = files[fi]\n const absPath = resolve(filePath)\n const fileName = basename(absPath)\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\n\n try {\n const fileSize = statSync(absPath).size\n if (fileSize > 500 * 1024 * 1024) {\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\n process.exitCode = 1\n continue\n }\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const format = detectFormat(arrayBuffer)\n\n if (!opts.silent) {\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\n }\n\n const parseOptions: ParseOptions = {}\n if (opts.pages) parseOptions.pages = opts.pages as string\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\n\n // OCR 모드: CLI 기본값 \"auto\" (라이브러리 API는 undefined 유지)\n const validOcrModes = [\"auto\", \"gemini\", \"claude\", \"codex\", \"ollama\", \"tesseract\", \"off\"]\n if (opts.ocr) {\n if (!validOcrModes.includes(opts.ocr)) {\n process.stderr.write(`[kordoc] 지원하지 않는 OCR 모드: ${opts.ocr}\\n`)\n process.stderr.write(` 사용 가능: ${validOcrModes.join(\", \")}\\n`)\n process.exit(1)\n }\n parseOptions.ocrMode = opts.ocr as OcrMode\n } else {\n parseOptions.ocrMode = \"auto\"\n }\n\n // OCR 병렬 처리 수 (--ocr-jobs): tesseract 기본값은 CPU 코어 수\n if (opts.ocrJobs) {\n const n = parseInt(opts.ocrJobs, 10)\n if (n > 0) parseOptions.ocrConcurrency = n\n } else if (parseOptions.ocrMode === \"tesseract\") {\n parseOptions.ocrConcurrency = cpus().length\n }\n\n if (!opts.silent) {\n parseOptions.onProgress = (current: number, total: number) => {\n process.stderr.write(`\\r[kordoc] ${filePrefix}${fileName} (${format}) [${current}/${total}]`)\n }\n }\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${result.error}\\n`)\n process.exitCode = 1\n continue\n }\n\n if (!opts.silent) process.stderr.write(` OK\\n`)\n\n // 이미지 기반 PDF OCR 결과 표시\n if (!opts.silent && result.success && result.isImageBased) {\n process.stderr.write(` → 이미지 기반 PDF — OCR 처리됨\\n`)\n }\n\n // 경고 표시\n if (!opts.silent && result.success && result.warnings?.length) {\n for (const w of result.warnings) {\n process.stderr.write(` ⚠ ${w.message}\\n`)\n }\n }\n\n let markdown = result.markdown\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\n if (opts.outDir && result.images?.length) {\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"![image](images/image_\")\n }\n const output = opts.format === \"json\"\n ? JSON.stringify(result, null, 2)\n : markdown\n\n // 이미지 저장: 출력 MD 파일 기준 폴더 사용 (convert와 일치)\n const saveImages = (outFilePath: string) => {\n if (!result.images?.length) return\n const stem = basename(outFilePath).replace(/\\.[^.]+$/, \"\")\n const defaultDir = resolve(outFilePath, \"..\", stem + \"_images\")\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : defaultDir\n mkdirSync(imgDir, { recursive: true })\n for (const img of result.images) {\n writeFileSync(resolve(imgDir, img.filename), img.data)\n }\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\n }\n\n if (opts.output && files.length === 1) {\n writeFileSync(opts.output, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\n saveImages(resolve(opts.output))\n } else if (opts.outDir) {\n mkdirSync(opts.outDir, { recursive: true })\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\n saveImages(outPath)\n } else {\n process.stdout.write(output + \"\\n\")\n saveImages(absPath) // stdout 출력 시 입력 파일 기준\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\n process.exitCode = 1\n }\n }\n}\n\n/** 공통 parse 옵션 등록 헬퍼 */\nfunction addParseOptions(cmd: Command): Command {\n return cmd\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\n .option(\"--image-dir <dir>\", \"이미지 저장 폴더 (기본: 입력 파일명_images 폴더)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .option(\"--ocr <mode>\", \"OCR 모드: auto(기본), gemini, claude, codex, ollama, tesseract, off\")\n .option(\"--ocr-jobs <n>\", \"OCR 병렬 처리 수 (기본: CPU 코어 수, tesseract 전용)\")\n}\n\nprogram\n .enablePositionalOptions()\n .name(\"kordoc\")\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\n .version(VERSION)\n\n// `kordoc parse <files>` 서브커맨드 (권장)\naddParseOptions(\n program\n .command(\"parse\")\n .description(\"파일을 마크다운으로 파싱 (HWP, HWPX, PDF, XLSX, DOCX)\")\n .argument(\"<files...>\", \"변환할 파일 경로\")\n).action(runParse)\n\n// `kordoc <files>` 루트 커맨드 (하위 호환)\naddParseOptions(\n program\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\n).action(runParse)\n\nprogram\n .command(\"convert <input>\")\n .description(\"마크다운 파일을 HWPX 또는 XLSX로 변환\")\n .option(\"-f, --format <type>\", \"출력 포맷: hwpx | xlsx\", \"hwpx\")\n .option(\"-o, --output <path>\", \"출력 파일 경로 (기본: 입력명 + 포맷 확장자)\")\n .option(\"--image-dir <dir>\", \"이미지 폴더 경로 (기본: 입력 MD 파일명_images 폴더)\")\n .option(\"--images\", \"이미지 포함 (기본: 생략, 레이아웃 문제 방지)\")\n .option(\"--template <path>\", \"HWPX 템플릿 파일 경로 (hwpx 전용)\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (input: string, opts) => {\n const validFormats = [\"hwpx\", \"xlsx\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 포맷: ${opts.format} (hwpx 또는 xlsx)\\n`)\n process.exit(1)\n }\n\n const absInput = resolve(input)\n if (!existsSync(absInput)) {\n process.stderr.write(`[kordoc] 파일을 찾을 수 없습니다: ${input}\\n`)\n process.exit(1)\n }\n\n const stem = basename(absInput).replace(/\\.[^.]+$/, \"\")\n const outPath = opts.output\n ? resolve(opts.output)\n : resolve(absInput, \"..\", `${stem}.${opts.format}`)\n\n if (!opts.silent) process.stderr.write(`[kordoc] ${basename(absInput)} → ${basename(outPath)} ...`)\n\n try {\n const markdown = readFileSync(absInput, \"utf-8\")\n\n // 이미지 폴더에서 이미지 로드 (--images 플래그 필요)\n const imgDir = opts.imageDir ? resolve(opts.imageDir) : resolve(absInput, \"..\", stem + \"_images\")\n const images: import(\"./types.js\").ExtractedImage[] = []\n if (opts.images && existsSync(imgDir)) {\n const mimeMap: Record<string, string> = {\n png: \"image/png\", jpg: \"image/jpeg\", jpeg: \"image/jpeg\",\n gif: \"image/gif\", bmp: \"image/bmp\",\n }\n for (const entry of readdirSync(imgDir, { withFileTypes: true })) {\n if (!entry.isFile()) continue\n const fname = entry.name\n const ext = extname(fname).slice(1).toLowerCase()\n if (!mimeMap[ext]) continue\n const data = readFileSync(resolve(imgDir, fname))\n images.push({ filename: fname, data: new Uint8Array(data), mimeType: mimeMap[ext] })\n }\n if (!opts.silent) process.stderr.write(` → 이미지 ${images.length}개 로드\\n`)\n }\n\n const warnings: string[] = []\n\n let buf: ArrayBuffer\n if (opts.format === \"xlsx\") {\n if (opts.template && !opts.silent) {\n process.stderr.write(`\\n[kordoc] 경고: --template은 hwpx 전용입니다. 무시됩니다.\\n`)\n }\n buf = await markdownToXlsx(markdown, { warnings, images: images.length ? images : undefined })\n } else {\n let templateArrayBuffer: ArrayBuffer | undefined\n if (opts.template) {\n const tmplBuf = readFileSync(resolve(opts.template))\n templateArrayBuffer = tmplBuf.buffer.slice(tmplBuf.byteOffset, tmplBuf.byteOffset + tmplBuf.byteLength)\n }\n buf = await markdownToHwpx(markdown, { warnings, images: images.length ? images : undefined, templateArrayBuffer })\n }\n\n writeFileSync(outPath, Buffer.from(buf))\n\n if (!opts.silent) {\n process.stderr.write(` OK\\n`)\n process.stderr.write(` → ${outPath}\\n`)\n if (warnings.length) warnings.forEach(w => process.stderr.write(` ${w}\\n`))\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${sanitizeError(err)}\\n`)\n process.exit(1)\n }\n })\n\nprogram\n .command(\"watch <dir>\")\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (dir: string, opts) => {\n const { watchDirectory } = await import(\"./watch.js\")\n await watchDirectory({\n dir,\n outDir: opts.outDir,\n webhook: opts.webhook,\n format: opts.format,\n pages: opts.pages,\n silent: opts.silent,\n })\n })\n\nprogram.parse()\n"],"mappings":";;;;;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,UAAU,YAAY,mBAAmB;AAC1F,SAAS,UAAU,SAAS,eAAe;AAC3C,SAAS,YAAY;AACrB,SAAS,eAAe;AAKxB,IAAM,UAAU,IAAI,QAAQ;AAgB5B,eAAe,SAAS,OAAiB,MAAiB;AACxD,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AAEA,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AAGnE,YAAM,gBAAgB,CAAC,QAAQ,UAAU,UAAU,SAAS,UAAU,aAAa,KAAK;AACxF,UAAI,KAAK,KAAK;AACZ,YAAI,CAAC,cAAc,SAAS,KAAK,GAAG,GAAG;AACrC,kBAAQ,OAAO,MAAM,oEAA4B,KAAK,GAAG;AAAA,CAAI;AAC7D,kBAAQ,OAAO,MAAM,gCAAY,cAAc,KAAK,IAAI,CAAC;AAAA,CAAI;AAC7D,kBAAQ,KAAK,CAAC;AAAA,QAChB;AACA,qBAAa,UAAU,KAAK;AAAA,MAC9B,OAAO;AACL,qBAAa,UAAU;AAAA,MACzB;AAGA,UAAI,KAAK,SAAS;AAChB,cAAM,IAAI,SAAS,KAAK,SAAS,EAAE;AACnC,YAAI,IAAI,EAAG,cAAa,iBAAiB;AAAA,MAC3C,WAAW,aAAa,YAAY,aAAa;AAC/C,qBAAa,iBAAiB,KAAK,EAAE;AAAA,MACvC;AAEA,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,kBAAQ,OAAO,MAAM,cAAc,UAAU,GAAG,QAAQ,KAAK,MAAM,MAAM,OAAO,IAAI,KAAK,GAAG;AAAA,QAC9F;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAG9C,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,cAAc;AACzD,gBAAQ,OAAO,MAAM;AAAA,CAA4B;AAAA,MACnD;AAGA,UAAI,CAAC,KAAK,UAAU,OAAO,WAAW,OAAO,UAAU,QAAQ;AAC7D,mBAAW,KAAK,OAAO,UAAU;AAC/B,kBAAQ,OAAO,MAAM,YAAO,EAAE,OAAO;AAAA,CAAI;AAAA,QAC3C;AAAA,MACF;AAEA,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,gBAAwB;AAC1C,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,OAAO,SAAS,WAAW,EAAE,QAAQ,YAAY,EAAE;AACzD,cAAM,aAAa,QAAQ,aAAa,MAAM,OAAO,SAAS;AAC9D,cAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI;AACxD,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,MAAM,CAAC;AAAA,MACjC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,OAAO;AAAA,MACpB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAClC,mBAAW,OAAO;AAAA,MACpB;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF;AAGA,SAAS,gBAAgB,KAAuB;AAC9C,SAAO,IACJ,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,qBAAqB,kHAAkC,EAC9D,OAAO,YAAY,oDAAY,EAC/B,OAAO,gBAAgB,qFAAiE,EACxF,OAAO,kBAAkB,sGAA0C;AACxE;AAEA,QACG,wBAAwB,EACxB,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO;AAGlB;AAAA,EACE,QACG,QAAQ,OAAO,EACf,YAAY,mGAA4C,EACxD,SAAS,cAAc,8CAAW;AACvC,EAAE,OAAO,QAAQ;AAGjB;AAAA,EACE,QACG,SAAS,cAAc,2EAAwC;AACpE,EAAE,OAAO,QAAQ;AAEjB,QACG,QAAQ,iBAAiB,EACzB,YAAY,uFAA2B,EACvC,OAAO,uBAAuB,0CAAsB,MAAM,EAC1D,OAAO,uBAAuB,6GAA6B,EAC3D,OAAO,qBAAqB,qHAAqC,EACjE,OAAO,YAAY,kHAA6B,EAChD,OAAO,qBAAqB,uEAA0B,EACtD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAe,SAAS;AACrC,QAAM,eAAe,CAAC,QAAQ,MAAM;AACpC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAmB;AAC3E,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,WAAW,QAAQ,KAAK;AAC9B,MAAI,CAAC,WAAW,QAAQ,GAAG;AACzB,YAAQ,OAAO,MAAM,6EAA2B,KAAK;AAAA,CAAI;AACzD,YAAQ,KAAK,CAAC;AAAA,EAChB;AAEA,QAAM,OAAO,SAAS,QAAQ,EAAE,QAAQ,YAAY,EAAE;AACtD,QAAM,UAAU,KAAK,SACjB,QAAQ,KAAK,MAAM,IACnB,QAAQ,UAAU,MAAM,GAAG,IAAI,IAAI,KAAK,MAAM,EAAE;AAEpD,MAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAY,SAAS,QAAQ,CAAC,WAAM,SAAS,OAAO,CAAC,MAAM;AAElG,MAAI;AACF,UAAM,WAAW,aAAa,UAAU,OAAO;AAG/C,UAAM,SAAS,KAAK,WAAW,QAAQ,KAAK,QAAQ,IAAI,QAAQ,UAAU,MAAM,OAAO,SAAS;AAChG,UAAM,SAAgD,CAAC;AACvD,QAAI,KAAK,UAAU,WAAW,MAAM,GAAG;AACrC,YAAM,UAAkC;AAAA,QACtC,KAAK;AAAA,QAAa,KAAK;AAAA,QAAc,MAAM;AAAA,QAC3C,KAAK;AAAA,QAAa,KAAK;AAAA,MACzB;AACA,iBAAW,SAAS,YAAY,QAAQ,EAAE,eAAe,KAAK,CAAC,GAAG;AAChE,YAAI,CAAC,MAAM,OAAO,EAAG;AACrB,cAAM,QAAQ,MAAM;AACpB,cAAM,MAAM,QAAQ,KAAK,EAAE,MAAM,CAAC,EAAE,YAAY;AAChD,YAAI,CAAC,QAAQ,GAAG,EAAG;AACnB,cAAM,OAAO,aAAa,QAAQ,QAAQ,KAAK,CAAC;AAChD,eAAO,KAAK,EAAE,UAAU,OAAO,MAAM,IAAI,WAAW,IAAI,GAAG,UAAU,QAAQ,GAAG,EAAE,CAAC;AAAA,MACrF;AACA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,+BAAW,OAAO,MAAM;AAAA,CAAQ;AAAA,IACzE;AAEA,UAAM,WAAqB,CAAC;AAE5B,QAAI;AACJ,QAAI,KAAK,WAAW,QAAQ;AAC1B,UAAI,KAAK,YAAY,CAAC,KAAK,QAAQ;AACjC,gBAAQ,OAAO,MAAM;AAAA;AAAA,CAAiD;AAAA,MACxE;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,OAAU,CAAC;AAAA,IAC/F,OAAO;AACL,UAAI;AACJ,UAAI,KAAK,UAAU;AACjB,cAAM,UAAU,aAAa,QAAQ,KAAK,QAAQ,CAAC;AACnD,8BAAsB,QAAQ,OAAO,MAAM,QAAQ,YAAY,QAAQ,aAAa,QAAQ,UAAU;AAAA,MACxG;AACA,YAAM,MAAM,eAAe,UAAU,EAAE,UAAU,QAAQ,OAAO,SAAS,SAAS,QAAW,oBAAoB,CAAC;AAAA,IACpH;AAEA,kBAAc,SAAS,OAAO,KAAK,GAAG,CAAC;AAEvC,QAAI,CAAC,KAAK,QAAQ;AAChB,cAAQ,OAAO,MAAM;AAAA,CAAO;AAC5B,cAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACvC,UAAI,SAAS,OAAQ,UAAS,QAAQ,OAAK,QAAQ,OAAO,MAAM,KAAK,CAAC;AAAA,CAAI,CAAC;AAAA,IAC7E;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,YAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,YAAQ,OAAO,MAAM,YAAO,cAAc,GAAG,CAAC;AAAA,CAAI;AAClD,YAAQ,KAAK,CAAC;AAAA,EAChB;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAEH,QAAQ,MAAM;","names":[]}
package/dist/index.cjs CHANGED
@@ -2551,7 +2551,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
2551
2551
  var import_xmldom = require("@xmldom/xmldom");
2552
2552
 
2553
2553
  // src/utils.ts
2554
- var VERSION = true ? "2.2.8" : "0.0.0-dev";
2554
+ var VERSION = true ? "2.2.9" : "0.0.0-dev";
2555
2555
  function toArrayBuffer(buf) {
2556
2556
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
2557
2557
  return buf.buffer;
@@ -6080,21 +6080,28 @@ async function parsePdfDocument(buffer, options) {
6080
6080
  }
6081
6081
  }
6082
6082
  if (ocrProvider) {
6083
+ let ocrBlocks = [];
6083
6084
  try {
6084
6085
  const { ocrPages: ocrPages2 } = await Promise.resolve().then(() => (init_provider(), provider_exports));
6085
6086
  const concurrency = options?.ocrConcurrency ?? 1;
6086
- const ocrBlocks = await ocrPages2(doc, ocrProvider, pageFilter, effectivePageCount, warnings, concurrency);
6087
- if (ocrBlocks.length > 0) {
6088
- const ocrMarkdown = blocksToMarkdown(ocrBlocks);
6089
- return {
6090
- markdown: ocrMarkdown,
6091
- blocks: ocrBlocks,
6092
- metadata,
6093
- warnings: warnings.length > 0 ? warnings : void 0,
6094
- isImageBased: true
6095
- };
6096
- }
6087
+ ocrBlocks = await ocrPages2(doc, ocrProvider, pageFilter, effectivePageCount, warnings, concurrency);
6097
6088
  } catch {
6089
+ } finally {
6090
+ const terminable = ocrProvider;
6091
+ if (typeof terminable.terminate === "function") {
6092
+ await terminable.terminate().catch(() => {
6093
+ });
6094
+ }
6095
+ }
6096
+ if (ocrBlocks.length > 0) {
6097
+ const ocrMarkdown = blocksToMarkdown(ocrBlocks);
6098
+ return {
6099
+ markdown: ocrMarkdown,
6100
+ blocks: ocrBlocks,
6101
+ metadata,
6102
+ warnings: warnings.length > 0 ? warnings : void 0,
6103
+ isImageBased: true
6104
+ };
6098
6105
  }
6099
6106
  }
6100
6107
  if (ocrMode === "off") {