@clazic/kordoc 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/mcp.js CHANGED
@@ -10,13 +10,13 @@ import {
10
10
  markdownToHwpx,
11
11
  markdownToXlsx,
12
12
  parse
13
- } from "./chunk-2ZGLFZCN.js";
13
+ } from "./chunk-ZOEUKD77.js";
14
14
  import {
15
15
  KordocError,
16
16
  VERSION,
17
17
  sanitizeError,
18
18
  toArrayBuffer
19
- } from "./chunk-WWILSVMJ.js";
19
+ } from "./chunk-W5KUC23B.js";
20
20
  import "./chunk-MOL7MDBG.js";
21
21
  import "./chunk-ZWE3DS7E.js";
22
22
 
@@ -150,7 +150,7 @@ function isBatchProvider(p) {
150
150
  async function ocrPages(doc, provider, pageFilter, effectivePageCount, warnings, concurrency = 1, onProgress) {
151
151
  const blocks = [];
152
152
  if (isBatchProvider(provider)) {
153
- return ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, onProgress);
153
+ return ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, concurrency, onProgress);
154
154
  }
155
155
  if (concurrency <= 1) {
156
156
  for (let i = 1; i <= effectivePageCount; i++) {
@@ -197,8 +197,7 @@ async function ocrPages(doc, provider, pageFilter, effectivePageCount, warnings,
197
197
  }
198
198
  return blocks;
199
199
  }
200
- async function ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, onProgress) {
201
- const blocks = [];
200
+ async function ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, concurrency = 1, onProgress) {
202
201
  const pageNumbers = [];
203
202
  for (let i = 1; i <= effectivePageCount; i++) {
204
203
  if (pageFilter && !pageFilter.has(i)) continue;
@@ -215,16 +214,16 @@ async function ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warn
215
214
  batches.push(pageImages.slice(i, i + provider.batchSize));
216
215
  }
217
216
  let processed = 0;
218
- for (const batch of batches) {
217
+ const batchTasks = batches.map((batch, batchIdx) => async () => {
218
+ const pageBlocks = [];
219
219
  try {
220
220
  const results = await provider.processBatch(batch);
221
221
  for (const { pageNum } of batch) {
222
222
  const result = results.get(pageNum);
223
- if (result) {
224
- for (const b of ocrResultToBlocks(result, pageNum)) blocks.push(b);
225
- }
226
- processed++;
227
- onProgress?.(processed, pageNumbers.length);
223
+ pageBlocks.push({
224
+ pageNum,
225
+ blocks: result ? ocrResultToBlocks(result, pageNum) : []
226
+ });
228
227
  }
229
228
  } catch (err) {
230
229
  const range = `${batch[0].pageNum}-${batch[batch.length - 1].pageNum}`;
@@ -232,8 +231,20 @@ async function ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warn
232
231
  message: `\uBC30\uCE58 OCR \uC2E4\uD328 (\uD398\uC774\uC9C0 ${range}): ${err instanceof Error ? err.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`,
233
232
  code: "OCR_PAGE_FAILED"
234
233
  });
235
- processed += batch.length;
236
- onProgress?.(processed, pageNumbers.length);
234
+ for (const { pageNum } of batch) {
235
+ pageBlocks.push({ pageNum, blocks: [] });
236
+ }
237
+ }
238
+ processed += batch.length;
239
+ onProgress?.(processed, pageNumbers.length);
240
+ return { batchIdx, pageBlocks };
241
+ });
242
+ const effectiveConcurrency = Math.max(1, concurrency);
243
+ const batchResults = await runWithConcurrency(batchTasks, effectiveConcurrency);
244
+ const blocks = [];
245
+ for (const result of batchResults) {
246
+ for (const { blocks: pageBlks } of result.pageBlocks) {
247
+ for (const b of pageBlks) blocks.push(b);
237
248
  }
238
249
  }
239
250
  return blocks;
@@ -250,4 +261,4 @@ async function renderPageToPng(page) {
250
261
  export {
251
262
  ocrPages
252
263
  };
253
- //# sourceMappingURL=provider-OBY3XFSZ.js.map
264
+ //# sourceMappingURL=provider-WYHC4NHI.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/ocr/markdown-to-blocks.ts","../src/ocr/provider.ts"],"sourcesContent":["/**\n * Markdown → IRBlock[] 역파싱\n *\n * Vision LLM(gemini/claude/codex 등)이 반환한 Markdown 문자열을\n * kordoc의 IRBlock[] 중간 표현으로 변환.\n * 기존 blocksToMarkdown()의 역방향 처리.\n */\n\nimport type { IRBlock, IRTable, IRCell } from \"../types.js\"\n\n/**\n * Markdown 문자열을 IRBlock[] 배열로 변환.\n *\n * 지원 요소:\n * - 헤딩: # ~ ######\n * - 테이블: | col1 | col2 | (파이프 구분, |---|---| 구분선 포함)\n * - 순서/비순서 리스트: - / 1.\n * - 구분선: ---, ***, ___\n * - 일반 텍스트 (paragraph)\n */\nexport function markdownToBlocks(markdown: string, pageNumber: number): IRBlock[] {\n const blocks: IRBlock[] = []\n const lines = markdown.split(\"\\n\")\n let i = 0\n\n while (i < lines.length) {\n const line = lines[i]\n\n // 빈 줄 스킵\n if (line.trim() === \"\") {\n i++\n continue\n }\n\n // 1. 헤딩: # ~ ######\n const headingMatch = line.match(/^(#{1,6})\\s+(.+)$/)\n if (headingMatch) {\n blocks.push({\n type: \"heading\",\n level: headingMatch[1].length,\n text: headingMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 2. 구분선: ---, ***, ___\n if (/^[-*_]{3,}\\s*$/.test(line.trim())) {\n blocks.push({ type: \"separator\", pageNumber })\n i++\n continue\n }\n\n // 3. 테이블: | 로 시작하는 연속 행 수집\n if (line.trim().startsWith(\"|\")) {\n const tableLines: string[] = []\n while (i < lines.length && lines[i].trim().startsWith(\"|\")) {\n tableLines.push(lines[i])\n i++\n }\n const table = parseMarkdownTable(tableLines)\n if (table) {\n blocks.push({ type: \"table\", table, pageNumber })\n }\n continue\n }\n\n // 4. 비순서 리스트: -, *, +\n const ulMatch = line.match(/^(\\s*)[-*+]\\s+(.+)$/)\n if (ulMatch) {\n blocks.push({\n type: \"list\",\n listType: \"unordered\",\n text: ulMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 5. 순서 리스트: 1.\n const olMatch = line.match(/^(\\s*)\\d+\\.\\s+(.+)$/)\n if (olMatch) {\n blocks.push({\n type: \"list\",\n listType: \"ordered\",\n text: olMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 6. 일반 텍스트 — 구조적 행이 나올 때까지 병합\n const paraLines: string[] = []\n while (i < lines.length && lines[i].trim() !== \"\" && !isStructuralLine(lines[i])) {\n paraLines.push(lines[i].trim())\n i++\n }\n if (paraLines.length > 0) {\n blocks.push({\n type: \"paragraph\",\n text: paraLines.join(\"\\n\"),\n pageNumber,\n })\n }\n }\n\n return blocks\n}\n\n/**\n * 구조적 행 판별 — paragraph 병합 중단 트리거.\n */\nfunction isStructuralLine(line: string): boolean {\n if (/^#{1,6}\\s+/.test(line)) return true\n if (line.trim().startsWith(\"|\")) return true\n if (/^[-*_]{3,}\\s*$/.test(line.trim())) return true\n if (/^\\s*[-*+]\\s+/.test(line)) return true\n if (/^\\s*\\d+\\.\\s+/.test(line)) return true\n return false\n}\n\n/**\n * Markdown 테이블 행 배열을 IRTable로 변환.\n *\n * 구분선 행(|---|---|)은 제거 후 데이터 행만 파싱.\n * hasHeader: 구분선이 있었으면 true.\n */\nfunction parseMarkdownTable(lines: string[]): IRTable | null {\n const hasSeparator = lines.some(line => /^\\|[\\s:|-]+\\|$/.test(line.trim()))\n\n const rows: IRCell[][] = []\n let maxCols = 0\n\n for (const line of lines) {\n // 구분선 행 스킵: |---|---| 패턴\n if (/^\\|\\s*:?-+:?\\s*(\\|\\s*:?-+:?\\s*)+\\|?\\s*$/.test(line.trim())) continue\n\n const parts = line.split(\"|\")\n // 앞뒤 빈 요소 제거 (| 로 시작/종료하는 행)\n const cells: IRCell[] = parts\n .slice(1, parts[parts.length - 1].trim() === \"\" ? -1 : undefined)\n .map(cell => ({\n text: cell.trim(),\n colSpan: 1,\n rowSpan: 1,\n }))\n\n if (cells.length > 0) {\n rows.push(cells)\n maxCols = Math.max(maxCols, cells.length)\n }\n }\n\n if (rows.length === 0) return null\n\n // 열 수 통일 (부족한 셀은 빈 셀로 채움)\n for (const row of rows) {\n while (row.length < maxCols) {\n row.push({ text: \"\", colSpan: 1, rowSpan: 1 })\n }\n }\n\n return {\n rows: rows.length,\n cols: maxCols,\n cells: rows,\n hasHeader: hasSeparator && rows.length > 1,\n }\n}\n","/**\n * OCR 프로바이더 브릿지 — PDF 페이지를 이미지로 렌더링하여 OCR 호출\n *\n * kordoc은 OCR 라이브러리를 번들하지 않음.\n * 사용자가 OcrProvider 함수를 제공하면 이미지 기반 PDF도 텍스트 추출 가능.\n *\n * @example\n * ```ts\n * import { parse } from \"kordoc\"\n *\n * const result = await parse(buffer, {\n * ocr: async (pageImage, pageNumber, mimeType) => {\n * // Tesseract, Claude Vision, Google Vision 등 사용\n * return await myOcrService.recognize(pageImage)\n * }\n * })\n * ```\n */\n\nimport type { OcrProvider, IRBlock, ParseWarning, StructuredOcrResult, BatchOcrProvider } from \"../types.js\"\nimport { markdownToBlocks } from \"./markdown-to-blocks.js\"\n\n/**\n * 동시 실행 수를 제한한 병렬 태스크 실행 헬퍼.\n *\n * limit개의 워커를 만들어 tasks 배열을 순서대로 처리.\n * 각 워커는 완료되는 즉시 다음 태스크를 가져가므로 순서가 보존됨.\n *\n * @param tasks - 실행할 비동기 함수 배열\n * @param limit - 최대 동시 실행 수\n * @returns 입력 순서와 동일한 결과 배열\n */\nasync function runWithConcurrency<T>(\n tasks: (() => Promise<T>)[],\n limit: number\n): Promise<T[]> {\n const results: T[] = new Array(tasks.length)\n let nextIndex = 0\n\n // 각 워커는 처리할 태스크가 없을 때까지 반복\n async function worker() {\n while (nextIndex < tasks.length) {\n const idx = nextIndex++\n results[idx] = await tasks[idx]()\n }\n }\n\n // limit개 워커를 동시 실행 (tasks가 limit보다 적으면 tasks 수만큼)\n await Promise.all(Array.from({ length: Math.min(limit, tasks.length) }, () => worker()))\n return results\n}\n\n/**\n * OCR 결과(string | StructuredOcrResult)를 IRBlock[]으로 변환.\n */\nfunction ocrResultToBlocks(result: string | StructuredOcrResult, pageNum: number): IRBlock[] {\n const pageBlocks: IRBlock[] = []\n if (typeof result === \"string\") {\n // 순수 텍스트 → paragraph 블록\n if (result.trim()) {\n pageBlocks.push({ type: \"paragraph\", text: result.trim(), pageNumber: pageNum })\n }\n } else if (result && typeof result === \"object\" && \"markdown\" in result) {\n // 구조화된 결과 → Markdown → IRBlock[]\n const structured = result as StructuredOcrResult\n if (structured.markdown.trim()) {\n const converted = markdownToBlocks(structured.markdown, pageNum)\n for (const b of converted) pageBlocks.push(b)\n }\n }\n return pageBlocks\n}\n\n/** BatchOcrProvider 타입 가드 */\nfunction isBatchProvider(p: unknown): p is BatchOcrProvider {\n return !!p && typeof p === \"object\" && \"__batch\" in p && (p as BatchOcrProvider).__batch === true\n}\n\n/**\n * 이미지 기반 PDF 페이지에 OCR을 적용하여 IRBlock[] 반환.\n *\n * pdfjs page 객체에서 viewport + render를 통해 PNG 생성 후\n * 사용자 제공 OcrProvider 호출.\n *\n * - string 반환: 단순 텍스트 → paragraph 블록\n * - StructuredOcrResult 반환: Markdown → markdownToBlocks()로 구조화\n * - concurrency > 1: 병렬 처리 (워커 풀 프로바이더 권장)\n *\n * canvas 미설치 시 pdfjs render 불가하므로 에러 반환.\n */\nexport async function ocrPages(\n doc: { numPages: number; getPage(n: number): Promise<PdfPageProxy> },\n provider: OcrProvider | BatchOcrProvider,\n pageFilter: Set<number> | null,\n effectivePageCount: number,\n warnings?: ParseWarning[],\n concurrency: number = 1, // 기본값 1 = 순차 처리 (하위 호환)\n onProgress?: (current: number, total: number) => void\n): Promise<IRBlock[]> {\n const blocks: IRBlock[] = []\n\n // ── 배치 처리 (BatchOcrProvider) ────────────────────\n if (isBatchProvider(provider)) {\n return ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, concurrency, onProgress)\n }\n\n // ── 순차 처리 (concurrency === 1) ────────────────────\n if (concurrency <= 1) {\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n const page = await doc.getPage(i)\n try {\n const imageData = await renderPageToPng(page)\n const result = await provider(imageData, i, \"image/png\")\n for (const b of ocrResultToBlocks(result, i)) blocks.push(b)\n } catch (err) {\n // 개별 페이지 실패 시 경고 발행 후 계속 진행\n warnings?.push({\n page: i,\n message: `페이지 ${i} OCR 실패: ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n }\n }\n return blocks\n }\n\n // ── 병렬 처리 (concurrency > 1) ──────────────────────\n // 처리 대상 페이지 번호 수집\n const pageNumbers: number[] = []\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n pageNumbers.push(i)\n }\n\n // 각 페이지에 대한 태스크 생성 (에러는 개별 캐치)\n const tasks = pageNumbers.map(pageNum => async (): Promise<{ pageNum: number; pageBlocks: IRBlock[] } | null> => {\n try {\n const page = await doc.getPage(pageNum)\n const imageData = await renderPageToPng(page)\n const result = await provider(imageData, pageNum, \"image/png\")\n return { pageNum, pageBlocks: ocrResultToBlocks(result, pageNum) }\n } catch (err) {\n // 개별 페이지 실패 시 경고 발행 후 null 반환\n warnings?.push({\n page: pageNum,\n message: `페이지 ${pageNum} OCR 실패: ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n return null\n }\n })\n\n // 병렬 실행 — concurrency 수만큼 동시 처리\n const taskResults = await runWithConcurrency(tasks, concurrency)\n\n // 결과를 페이지 번호 순서대로 합산 (pageNumbers 순서 = 오름차순 보장)\n for (const item of taskResults) {\n if (!item) continue\n for (const b of item.pageBlocks) blocks.push(b)\n }\n\n return blocks\n}\n\n/**\n * 배치 OCR 처리 — BatchOcrProvider를 사용하여 N페이지씩 묶어 처리.\n *\n * concurrency > 1이면 여러 배치를 동시에 실행하여 속도 향상.\n * 예: 5페이지/배치 × 4 동시 = 20페이지 동시 처리.\n */\nasync function ocrPagesBatch(\n doc: { numPages: number; getPage(n: number): Promise<PdfPageProxy> },\n provider: BatchOcrProvider,\n pageFilter: Set<number> | null,\n effectivePageCount: number,\n warnings?: ParseWarning[],\n concurrency: number = 1,\n onProgress?: (current: number, total: number) => void\n): Promise<IRBlock[]> {\n // 1. 대상 페이지 번호 수집\n const pageNumbers: number[] = []\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n pageNumbers.push(i)\n }\n\n // 2. 모든 페이지를 PNG로 렌더링\n const pageImages: Array<{image: Uint8Array, pageNum: number}> = []\n for (const pageNum of pageNumbers) {\n const page = await doc.getPage(pageNum)\n const image = await renderPageToPng(page)\n pageImages.push({ image, pageNum })\n }\n\n // 3. batchSize개씩 그룹핑\n const batches: Array<typeof pageImages> = []\n for (let i = 0; i < pageImages.length; i += provider.batchSize) {\n batches.push(pageImages.slice(i, i + provider.batchSize))\n }\n\n // 4. 배치 태스크 생성 — 각 배치의 결과를 {batchIdx, pageBlocks} 형태로 반환\n let processed = 0\n type BatchResult = { batchIdx: number; pageBlocks: Array<{pageNum: number; blocks: IRBlock[]}> }\n\n const batchTasks = batches.map((batch, batchIdx) => async (): Promise<BatchResult> => {\n const pageBlocks: Array<{pageNum: number; blocks: IRBlock[]}> = []\n try {\n const results = await provider.processBatch(batch)\n for (const { pageNum } of batch) {\n const result = results.get(pageNum)\n pageBlocks.push({\n pageNum,\n blocks: result ? ocrResultToBlocks(result, pageNum) : [],\n })\n }\n } catch (err) {\n const range = `${batch[0].pageNum}-${batch[batch.length - 1].pageNum}`\n warnings?.push({\n message: `배치 OCR 실패 (페이지 ${range}): ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n // 실패 배치는 빈 결과\n for (const { pageNum } of batch) {\n pageBlocks.push({ pageNum, blocks: [] })\n }\n }\n // 진행률 갱신 (병렬 실행 중 atomic하지 않지만 표시용으로 충분)\n processed += batch.length\n onProgress?.(processed, pageNumbers.length)\n return { batchIdx, pageBlocks }\n })\n\n // 5. 병렬 실행 — concurrency개 배치를 동시 처리\n const effectiveConcurrency = Math.max(1, concurrency)\n const batchResults = await runWithConcurrency(batchTasks, effectiveConcurrency)\n\n // 6. 배치 순서대로 블록 합산 (페이지 순서 보존)\n const blocks: IRBlock[] = []\n for (const result of batchResults) {\n for (const { blocks: pageBlks } of result.pageBlocks) {\n for (const b of pageBlks) blocks.push(b)\n }\n }\n\n return blocks\n}\n\ninterface PdfPageProxy {\n getViewport(params: { scale: number }): { width: number; height: number }\n render(params: { canvasContext: unknown; viewport: unknown }): { promise: Promise<void> }\n}\n\n/**\n * PDF 페이지를 PNG로 렌더링.\n * @napi-rs/canvas 사용 (kordoc 번들 의존성, 별도 설치 불필요)\n */\nasync function renderPageToPng(page: PdfPageProxy): Promise<Uint8Array> {\n const { createCanvas } = await import(\"@napi-rs/canvas\")\n\n const scale = 2.0 // 300 DPI 근사\n const viewport = page.getViewport({ scale })\n const canvas = createCanvas(Math.floor(viewport.width), Math.floor(viewport.height))\n const ctx = canvas.getContext(\"2d\")\n\n await page.render({ canvasContext: ctx as unknown, viewport }).promise\n return new Uint8Array(canvas.toBuffer(\"image/png\"))\n}\n"],"mappings":";;;;AAoBO,SAAS,iBAAiB,UAAkB,YAA+B;AAChF,QAAM,SAAoB,CAAC;AAC3B,QAAM,QAAQ,SAAS,MAAM,IAAI;AACjC,MAAI,IAAI;AAER,SAAO,IAAI,MAAM,QAAQ;AACvB,UAAM,OAAO,MAAM,CAAC;AAGpB,QAAI,KAAK,KAAK,MAAM,IAAI;AACtB;AACA;AAAA,IACF;AAGA,UAAM,eAAe,KAAK,MAAM,mBAAmB;AACnD,QAAI,cAAc;AAChB,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,OAAO,aAAa,CAAC,EAAE;AAAA,QACvB,MAAM,aAAa,CAAC,EAAE,KAAK;AAAA,QAC3B;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,QAAI,iBAAiB,KAAK,KAAK,KAAK,CAAC,GAAG;AACtC,aAAO,KAAK,EAAE,MAAM,aAAa,WAAW,CAAC;AAC7C;AACA;AAAA,IACF;AAGA,QAAI,KAAK,KAAK,EAAE,WAAW,GAAG,GAAG;AAC/B,YAAM,aAAuB,CAAC;AAC9B,aAAO,IAAI,MAAM,UAAU,MAAM,CAAC,EAAE,KAAK,EAAE,WAAW,GAAG,GAAG;AAC1D,mBAAW,KAAK,MAAM,CAAC,CAAC;AACxB;AAAA,MACF;AACA,YAAM,QAAQ,mBAAmB,UAAU;AAC3C,UAAI,OAAO;AACT,eAAO,KAAK,EAAE,MAAM,SAAS,OAAO,WAAW,CAAC;AAAA,MAClD;AACA;AAAA,IACF;AAGA,UAAM,UAAU,KAAK,MAAM,qBAAqB;AAChD,QAAI,SAAS;AACX,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,UAAU;AAAA,QACV,MAAM,QAAQ,CAAC,EAAE,KAAK;AAAA,QACtB;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,UAAM,UAAU,KAAK,MAAM,qBAAqB;AAChD,QAAI,SAAS;AACX,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,UAAU;AAAA,QACV,MAAM,QAAQ,CAAC,EAAE,KAAK;AAAA,QACtB;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,UAAM,YAAsB,CAAC;AAC7B,WAAO,IAAI,MAAM,UAAU,MAAM,CAAC,EAAE,KAAK,MAAM,MAAM,CAAC,iBAAiB,MAAM,CAAC,CAAC,GAAG;AAChF,gBAAU,KAAK,MAAM,CAAC,EAAE,KAAK,CAAC;AAC9B;AAAA,IACF;AACA,QAAI,UAAU,SAAS,GAAG;AACxB,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,MAAM,UAAU,KAAK,IAAI;AAAA,QACzB;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO;AACT;AAKA,SAAS,iBAAiB,MAAuB;AAC/C,MAAI,aAAa,KAAK,IAAI,EAAG,QAAO;AACpC,MAAI,KAAK,KAAK,EAAE,WAAW,GAAG,EAAG,QAAO;AACxC,MAAI,iBAAiB,KAAK,KAAK,KAAK,CAAC,EAAG,QAAO;AAC/C,MAAI,eAAe,KAAK,IAAI,EAAG,QAAO;AACtC,MAAI,eAAe,KAAK,IAAI,EAAG,QAAO;AACtC,SAAO;AACT;AAQA,SAAS,mBAAmB,OAAiC;AAC3D,QAAM,eAAe,MAAM,KAAK,UAAQ,iBAAiB,KAAK,KAAK,KAAK,CAAC,CAAC;AAE1E,QAAM,OAAmB,CAAC;AAC1B,MAAI,UAAU;AAEd,aAAW,QAAQ,OAAO;AAExB,QAAI,0CAA0C,KAAK,KAAK,KAAK,CAAC,EAAG;AAEjE,UAAM,QAAQ,KAAK,MAAM,GAAG;AAE5B,UAAM,QAAkB,MACrB,MAAM,GAAG,MAAM,MAAM,SAAS,CAAC,EAAE,KAAK,MAAM,KAAK,KAAK,MAAS,EAC/D,IAAI,WAAS;AAAA,MACZ,MAAM,KAAK,KAAK;AAAA,MAChB,SAAS;AAAA,MACT,SAAS;AAAA,IACX,EAAE;AAEJ,QAAI,MAAM,SAAS,GAAG;AACpB,WAAK,KAAK,KAAK;AACf,gBAAU,KAAK,IAAI,SAAS,MAAM,MAAM;AAAA,IAC1C;AAAA,EACF;AAEA,MAAI,KAAK,WAAW,EAAG,QAAO;AAG9B,aAAW,OAAO,MAAM;AACtB,WAAO,IAAI,SAAS,SAAS;AAC3B,UAAI,KAAK,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE,CAAC;AAAA,IAC/C;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM,KAAK;AAAA,IACX,MAAM;AAAA,IACN,OAAO;AAAA,IACP,WAAW,gBAAgB,KAAK,SAAS;AAAA,EAC3C;AACF;;;AC3IA,eAAe,mBACb,OACA,OACc;AACd,QAAM,UAAe,IAAI,MAAM,MAAM,MAAM;AAC3C,MAAI,YAAY;AAGhB,iBAAe,SAAS;AACtB,WAAO,YAAY,MAAM,QAAQ;AAC/B,YAAM,MAAM;AACZ,cAAQ,GAAG,IAAI,MAAM,MAAM,GAAG,EAAE;AAAA,IAClC;AAAA,EACF;AAGA,QAAM,QAAQ,IAAI,MAAM,KAAK,EAAE,QAAQ,KAAK,IAAI,OAAO,MAAM,MAAM,EAAE,GAAG,MAAM,OAAO,CAAC,CAAC;AACvF,SAAO;AACT;AAKA,SAAS,kBAAkB,QAAsC,SAA4B;AAC3F,QAAM,aAAwB,CAAC;AAC/B,MAAI,OAAO,WAAW,UAAU;AAE9B,QAAI,OAAO,KAAK,GAAG;AACjB,iBAAW,KAAK,EAAE,MAAM,aAAa,MAAM,OAAO,KAAK,GAAG,YAAY,QAAQ,CAAC;AAAA,IACjF;AAAA,EACF,WAAW,UAAU,OAAO,WAAW,YAAY,cAAc,QAAQ;AAEvE,UAAM,aAAa;AACnB,QAAI,WAAW,SAAS,KAAK,GAAG;AAC9B,YAAM,YAAY,iBAAiB,WAAW,UAAU,OAAO;AAC/D,iBAAW,KAAK,UAAW,YAAW,KAAK,CAAC;AAAA,IAC9C;AAAA,EACF;AACA,SAAO;AACT;AAGA,SAAS,gBAAgB,GAAmC;AAC1D,SAAO,CAAC,CAAC,KAAK,OAAO,MAAM,YAAY,aAAa,KAAM,EAAuB,YAAY;AAC/F;AAcA,eAAsB,SACpB,KACA,UACA,YACA,oBACA,UACA,cAAsB,GACtB,YACoB;AACpB,QAAM,SAAoB,CAAC;AAG3B,MAAI,gBAAgB,QAAQ,GAAG;AAC7B,WAAO,cAAc,KAAK,UAAU,YAAY,oBAAoB,UAAU,aAAa,UAAU;AAAA,EACvG;AAGA,MAAI,eAAe,GAAG;AACpB,aAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,UAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,YAAM,OAAO,MAAM,IAAI,QAAQ,CAAC;AAChC,UAAI;AACF,cAAM,YAAY,MAAM,gBAAgB,IAAI;AAC5C,cAAM,SAAS,MAAM,SAAS,WAAW,GAAG,WAAW;AACvD,mBAAW,KAAK,kBAAkB,QAAQ,CAAC,EAAG,QAAO,KAAK,CAAC;AAAA,MAC7D,SAAS,KAAK;AAEZ,kBAAU,KAAK;AAAA,UACb,MAAM;AAAA,UACN,SAAS,sBAAO,CAAC,sBAAY,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,UAC7E,MAAM;AAAA,QACR,CAAC;AAAA,MACH;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAIA,QAAM,cAAwB,CAAC;AAC/B,WAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,QAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,gBAAY,KAAK,CAAC;AAAA,EACpB;AAGA,QAAM,QAAQ,YAAY,IAAI,aAAW,YAAwE;AAC/G,QAAI;AACF,YAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AACtC,YAAM,YAAY,MAAM,gBAAgB,IAAI;AAC5C,YAAM,SAAS,MAAM,SAAS,WAAW,SAAS,WAAW;AAC7D,aAAO,EAAE,SAAS,YAAY,kBAAkB,QAAQ,OAAO,EAAE;AAAA,IACnE,SAAS,KAAK;AAEZ,gBAAU,KAAK;AAAA,QACb,MAAM;AAAA,QACN,SAAS,sBAAO,OAAO,sBAAY,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,QACnF,MAAM;AAAA,MACR,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF,CAAC;AAGD,QAAM,cAAc,MAAM,mBAAmB,OAAO,WAAW;AAG/D,aAAW,QAAQ,aAAa;AAC9B,QAAI,CAAC,KAAM;AACX,eAAW,KAAK,KAAK,WAAY,QAAO,KAAK,CAAC;AAAA,EAChD;AAEA,SAAO;AACT;AAQA,eAAe,cACb,KACA,UACA,YACA,oBACA,UACA,cAAsB,GACtB,YACoB;AAEpB,QAAM,cAAwB,CAAC;AAC/B,WAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,QAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,gBAAY,KAAK,CAAC;AAAA,EACpB;AAGA,QAAM,aAA0D,CAAC;AACjE,aAAW,WAAW,aAAa;AACjC,UAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AACtC,UAAM,QAAQ,MAAM,gBAAgB,IAAI;AACxC,eAAW,KAAK,EAAE,OAAO,QAAQ,CAAC;AAAA,EACpC;AAGA,QAAM,UAAoC,CAAC;AAC3C,WAAS,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK,SAAS,WAAW;AAC9D,YAAQ,KAAK,WAAW,MAAM,GAAG,IAAI,SAAS,SAAS,CAAC;AAAA,EAC1D;AAGA,MAAI,YAAY;AAGhB,QAAM,aAAa,QAAQ,IAAI,CAAC,OAAO,aAAa,YAAkC;AACpF,UAAM,aAA0D,CAAC;AACjE,QAAI;AACF,YAAM,UAAU,MAAM,SAAS,aAAa,KAAK;AACjD,iBAAW,EAAE,QAAQ,KAAK,OAAO;AAC/B,cAAM,SAAS,QAAQ,IAAI,OAAO;AAClC,mBAAW,KAAK;AAAA,UACd;AAAA,UACA,QAAQ,SAAS,kBAAkB,QAAQ,OAAO,IAAI,CAAC;AAAA,QACzD,CAAC;AAAA,MACH;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,QAAQ,GAAG,MAAM,CAAC,EAAE,OAAO,IAAI,MAAM,MAAM,SAAS,CAAC,EAAE,OAAO;AACpE,gBAAU,KAAK;AAAA,QACb,SAAS,qDAAkB,KAAK,MAAM,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,QACtF,MAAM;AAAA,MACR,CAAC;AAED,iBAAW,EAAE,QAAQ,KAAK,OAAO;AAC/B,mBAAW,KAAK,EAAE,SAAS,QAAQ,CAAC,EAAE,CAAC;AAAA,MACzC;AAAA,IACF;AAEA,iBAAa,MAAM;AACnB,iBAAa,WAAW,YAAY,MAAM;AAC1C,WAAO,EAAE,UAAU,WAAW;AAAA,EAChC,CAAC;AAGD,QAAM,uBAAuB,KAAK,IAAI,GAAG,WAAW;AACpD,QAAM,eAAe,MAAM,mBAAmB,YAAY,oBAAoB;AAG9E,QAAM,SAAoB,CAAC;AAC3B,aAAW,UAAU,cAAc;AACjC,eAAW,EAAE,QAAQ,SAAS,KAAK,OAAO,YAAY;AACpD,iBAAW,KAAK,SAAU,QAAO,KAAK,CAAC;AAAA,IACzC;AAAA,EACF;AAEA,SAAO;AACT;AAWA,eAAe,gBAAgB,MAAyC;AACtE,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,iBAAiB;AAEvD,QAAM,QAAQ;AACd,QAAM,WAAW,KAAK,YAAY,EAAE,MAAM,CAAC;AAC3C,QAAM,SAAS,aAAa,KAAK,MAAM,SAAS,KAAK,GAAG,KAAK,MAAM,SAAS,MAAM,CAAC;AACnF,QAAM,MAAM,OAAO,WAAW,IAAI;AAElC,QAAM,KAAK,OAAO,EAAE,eAAe,KAAgB,SAAS,CAAC,EAAE;AAC/D,SAAO,IAAI,WAAW,OAAO,SAAS,WAAW,CAAC;AACpD;","names":[]}
@@ -201,7 +201,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
201
201
  return createTesseractProvider();
202
202
  }
203
203
  if (mode === "gemini" || mode === "claude" || mode === "codex") {
204
- const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-FUCIIS4M.js");
204
+ const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-PCT4I4LK.js");
205
205
  const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[mode];
206
206
  if (effectiveBatch > 1) {
207
207
  return createBatchCliProvider(mode, effectiveBatch);
@@ -232,7 +232,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
232
232
  return createTesseractProvider();
233
233
  }
234
234
  if (detected === "gemini" || detected === "codex" || detected === "claude") {
235
- const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-FUCIIS4M.js");
235
+ const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-PCT4I4LK.js");
236
236
  const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[detected];
237
237
  if (effectiveBatch > 1) {
238
238
  return createBatchCliProvider(detected, effectiveBatch);
@@ -244,4 +244,4 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
244
244
  export {
245
245
  resolveOcrProvider
246
246
  };
247
- //# sourceMappingURL=resolve-LBFYRHJI.js.map
247
+ //# sourceMappingURL=resolve-4FSAQF2S.js.map
@@ -8,7 +8,7 @@ import {
8
8
  sanitizeError,
9
9
  sanitizeHref,
10
10
  toArrayBuffer
11
- } from "./chunk-WWILSVMJ.js";
11
+ } from "./chunk-W5KUC23B.js";
12
12
  import "./chunk-ZWE3DS7E.js";
13
13
  export {
14
14
  KordocError,
@@ -20,4 +20,4 @@ export {
20
20
  sanitizeHref,
21
21
  toArrayBuffer
22
22
  };
23
- //# sourceMappingURL=utils-QAK24RJS.js.map
23
+ //# sourceMappingURL=utils-HSF5HI5T.js.map
@@ -2,10 +2,10 @@
2
2
  import {
3
3
  detectFormat,
4
4
  parse
5
- } from "./chunk-2ZGLFZCN.js";
5
+ } from "./chunk-ZOEUKD77.js";
6
6
  import {
7
7
  toArrayBuffer
8
- } from "./chunk-WWILSVMJ.js";
8
+ } from "./chunk-W5KUC23B.js";
9
9
  import "./chunk-MOL7MDBG.js";
10
10
  import "./chunk-ZWE3DS7E.js";
11
11
 
@@ -126,4 +126,4 @@ async function sendWebhook(url, payload) {
126
126
  export {
127
127
  watchDirectory
128
128
  };
129
- //# sourceMappingURL=watch-MPHX3QIH.js.map
129
+ //# sourceMappingURL=watch-R2JHXDGF.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@clazic/kordoc",
3
- "version": "2.3.0",
3
+ "version": "2.3.1",
4
4
  "description": "Parse Korean documents (HWP, HWPX, PDF, XLSX, DOCX) to Markdown",
5
5
  "type": "module",
6
6
  "exports": {
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/ocr/batch-provider.ts"],"sourcesContent":["/**\n * CLI 배치 OCR 프로바이더\n *\n * 여러 페이지 이미지를 단일 CLI 호출로 처리하여 API 호출 수를 대폭 감소.\n * gemini/claude: @file 멀티 참조, codex: --image 멀티 플래그\n *\n * 299페이지 기준:\n * - 기존: CLI 299회 호출 (~30분)\n * - 배치: CLI 3~6회 호출 (~3분)\n */\n\nimport { spawnSync } from \"child_process\"\nimport { writeFileSync, readFileSync, unlinkSync, mkdirSync } from \"fs\"\nimport { join } from \"path\"\nimport { tmpdir } from \"os\"\nimport type { StructuredOcrResult, BatchOcrProvider } from \"../types.js\"\n\n/** 배치 OCR 프롬프트 */\nconst BATCH_OCR_PROMPT =\n \"다음 문서 페이지 이미지들을 OCR하여 순수 Markdown으로 변환하세요.\\n\\n\" +\n \"규칙:\\n\" +\n \"- 각 페이지 결과 사이에 반드시 이 구분자를 삽입: <!-- PAGE_BREAK -->\\n\" +\n \"- 테이블은 Markdown 테이블 문법 사용 (| 구분, |---|---| 헤더 구분선 포함)\\n\" +\n \"- 병합된 셀은 해당 위치에 내용 기재\\n\" +\n \"- 헤딩은 글자 크기에 따라 ## ~ ###### 사용\\n\" +\n \"- 리스트는 - 또는 1. 사용\\n\" +\n \"- 이미지, 도형 등 비텍스트 요소는 무시\\n\" +\n \"- 원문의 읽기 순서와 구조를 유지\\n\" +\n \"- ```로 감싸지 말고 순수 Markdown만 출력\"\n\n/** 모드별 기본 배치 크기 (CLI 내부 타임아웃 + 실측 기반)\n *\n * gemini CLI: 10장 이상에서 AbortError 발생 (내부 타임아웃).\n * 5장 배치가 안정적으로 동작 확인 (35초/배치).\n * 299페이지 = 60배치 = 기존 299회 대비 80% 감소.\n */\nexport const DEFAULT_BATCH_SIZES: Record<string, number> = {\n gemini: 5,\n claude: 5,\n codex: 10,\n}\n\n/** 임시 디렉토리 — gemini CLI는 cwd 하위 + gitignore 밖만 @참조 가능 */\nlet _batchTempDir: string | null = null\nfunction getBatchTempDir(): string {\n if (!_batchTempDir) {\n _batchTempDir = join(process.cwd(), \"_kordoc_ocr_tmp\")\n mkdirSync(_batchTempDir, { recursive: true })\n }\n return _batchTempDir\n}\n\n/**\n * 배치 CLI 프로바이더 생성\n */\nexport function createBatchCliProvider(\n mode: \"gemini\" | \"claude\" | \"codex\",\n batchSize: number\n): BatchOcrProvider {\n return {\n __batch: true as const,\n batchSize,\n async processBatch(pages) {\n const results = new Map<number, StructuredOcrResult>()\n const tempDir = getBatchTempDir()\n const tempFiles: string[] = []\n\n try {\n // 1. Write all page images to temp files\n for (const { image, pageNum } of pages) {\n const path = join(tempDir, `batch-p${pageNum}.png`)\n writeFileSync(path, image)\n tempFiles.push(path)\n }\n\n // 2. Call CLI with all file references\n let output: string\n if (mode === \"codex\") {\n output = callBatchCodexCli(tempFiles)\n } else {\n output = callBatchCli(mode, tempFiles)\n }\n\n // 3. Parse response by PAGE_BREAK separator\n const cleaned = stripCodeFence(output.trim())\n const parts = cleaned.split(/<!--\\s*PAGE_BREAK\\s*-->/)\n .map(p => p.trim())\n .filter(p => p.length > 0)\n\n // 4. Map results to page numbers (best-effort if count mismatch)\n for (let i = 0; i < pages.length; i++) {\n const pageNum = pages[i].pageNum\n if (i < parts.length) {\n results.set(pageNum, { markdown: parts[i] })\n }\n // If fewer parts than pages, remaining pages get no result\n }\n } finally {\n // 5. Clean up temp files\n for (const f of tempFiles) {\n try { unlinkSync(f) } catch { /* ignore */ }\n }\n }\n\n return results\n },\n }\n}\n\n/** gemini/claude 배치 호출 */\nfunction callBatchCli(mode: \"gemini\" | \"claude\", imagePaths: string[]): string {\n const fileRefs = imagePaths.map(p => `@${p}`).join(\"\\n\")\n const prompt = `${BATCH_OCR_PROMPT}\\n\\n${fileRefs}`\n\n let args: string[]\n if (mode === \"gemini\") {\n args = [\"--prompt\", prompt, \"--yolo\"]\n const model = process.env.KORDOC_GEMINI_MODEL\n if (model) args.push(\"--model\", model)\n } else {\n // claude\n args = [\"--print\", prompt]\n const model = process.env.KORDOC_CLAUDE_MODEL\n if (model) args.push(\"--model\", model)\n }\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = spawnSync(mode, args, {\n encoding: \"utf-8\",\n timeout: timeoutMs,\n maxBuffer: 50 * 1024 * 1024, // 50MB (large batch output)\n ...(mode === \"claude\" ? { cwd: tmpdir() } : {}),\n })\n\n if (result.error) {\n throw new Error(`${mode} 배치 OCR 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`${mode} 배치 OCR 실패: ${errMsg}`)\n }\n\n return result.stdout || \"\"\n}\n\n/** codex 배치 호출 — --image를 여러 번 지정 */\nfunction callBatchCodexCli(imagePaths: string[]): string {\n const outPath = join(tmpdir(), `kordoc-codex-batch-${Date.now()}.txt`)\n try {\n const args = [\"exec\", BATCH_OCR_PROMPT]\n for (const p of imagePaths) {\n args.push(\"--image\", p)\n }\n args.push(\"--output-last-message\", outPath)\n const model = process.env.KORDOC_CODEX_MODEL\n if (model) args.push(\"--model\", model)\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = spawnSync(\"codex\", args, {\n encoding: \"utf-8\",\n timeout: timeoutMs,\n maxBuffer: 50 * 1024 * 1024,\n input: \"\",\n })\n\n if (result.error) {\n throw new Error(`codex 배치 OCR 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`codex 배치 OCR 실패: ${errMsg}`)\n }\n\n try {\n return readFileSync(outPath, \"utf-8\")\n } catch {\n return result.stdout || \"\"\n }\n } finally {\n try { unlinkSync(outPath) } catch { /* ignore */ }\n }\n}\n\n/** LLM 출력에서 코드 펜스 제거 (cli-provider.ts와 동일 로직) */\nfunction stripCodeFence(text: string): string {\n const match = text.match(/^```(?:markdown|md)?\\s*\\n([\\s\\S]*?)\\n```\\s*$/m)\n return match ? match[1].trim() : text\n}\n"],"mappings":";;;;AAWA,SAAS,iBAAiB;AAC1B,SAAS,eAAe,cAAc,YAAY,iBAAiB;AACnE,SAAS,YAAY;AACrB,SAAS,cAAc;AAIvB,IAAM,mBACJ;AAiBK,IAAM,sBAA8C;AAAA,EACzD,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,OAAO;AACT;AAGA,IAAI,gBAA+B;AACnC,SAAS,kBAA0B;AACjC,MAAI,CAAC,eAAe;AAClB,oBAAgB,KAAK,QAAQ,IAAI,GAAG,iBAAiB;AACrD,cAAU,eAAe,EAAE,WAAW,KAAK,CAAC;AAAA,EAC9C;AACA,SAAO;AACT;AAKO,SAAS,uBACd,MACA,WACkB;AAClB,SAAO;AAAA,IACL,SAAS;AAAA,IACT;AAAA,IACA,MAAM,aAAa,OAAO;AACxB,YAAM,UAAU,oBAAI,IAAiC;AACrD,YAAM,UAAU,gBAAgB;AAChC,YAAM,YAAsB,CAAC;AAE7B,UAAI;AAEF,mBAAW,EAAE,OAAO,QAAQ,KAAK,OAAO;AACtC,gBAAM,OAAO,KAAK,SAAS,UAAU,OAAO,MAAM;AAClD,wBAAc,MAAM,KAAK;AACzB,oBAAU,KAAK,IAAI;AAAA,QACrB;AAGA,YAAI;AACJ,YAAI,SAAS,SAAS;AACpB,mBAAS,kBAAkB,SAAS;AAAA,QACtC,OAAO;AACL,mBAAS,aAAa,MAAM,SAAS;AAAA,QACvC;AAGA,cAAM,UAAU,eAAe,OAAO,KAAK,CAAC;AAC5C,cAAM,QAAQ,QAAQ,MAAM,yBAAyB,EAClD,IAAI,OAAK,EAAE,KAAK,CAAC,EACjB,OAAO,OAAK,EAAE,SAAS,CAAC;AAG3B,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,gBAAM,UAAU,MAAM,CAAC,EAAE;AACzB,cAAI,IAAI,MAAM,QAAQ;AACpB,oBAAQ,IAAI,SAAS,EAAE,UAAU,MAAM,CAAC,EAAE,CAAC;AAAA,UAC7C;AAAA,QAEF;AAAA,MACF,UAAE;AAEA,mBAAW,KAAK,WAAW;AACzB,cAAI;AAAE,uBAAW,CAAC;AAAA,UAAE,QAAQ;AAAA,UAAe;AAAA,QAC7C;AAAA,MACF;AAEA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAGA,SAAS,aAAa,MAA2B,YAA8B;AAC7E,QAAM,WAAW,WAAW,IAAI,OAAK,IAAI,CAAC,EAAE,EAAE,KAAK,IAAI;AACvD,QAAM,SAAS,GAAG,gBAAgB;AAAA;AAAA,EAAO,QAAQ;AAEjD,MAAI;AACJ,MAAI,SAAS,UAAU;AACrB,WAAO,CAAC,YAAY,QAAQ,QAAQ;AACpC,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAAA,EACvC,OAAO;AAEL,WAAO,CAAC,WAAW,MAAM;AACzB,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAAA,EACvC;AAEA,QAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,QAAM,SAAS,UAAU,MAAM,MAAM;AAAA,IACnC,UAAU;AAAA,IACV,SAAS;AAAA,IACT,WAAW,KAAK,OAAO;AAAA;AAAA,IACvB,GAAI,SAAS,WAAW,EAAE,KAAK,OAAO,EAAE,IAAI,CAAC;AAAA,EAC/C,CAAC;AAED,MAAI,OAAO,OAAO;AAChB,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,OAAO,MAAM,OAAO,EAAE;AAAA,EAC9D;AACA,MAAI,OAAO,WAAW,GAAG;AACvB,UAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,MAAM,EAAE;AAAA,EAChD;AAEA,SAAO,OAAO,UAAU;AAC1B;AAGA,SAAS,kBAAkB,YAA8B;AACvD,QAAM,UAAU,KAAK,OAAO,GAAG,sBAAsB,KAAK,IAAI,CAAC,MAAM;AACrE,MAAI;AACF,UAAM,OAAO,CAAC,QAAQ,gBAAgB;AACtC,eAAW,KAAK,YAAY;AAC1B,WAAK,KAAK,WAAW,CAAC;AAAA,IACxB;AACA,SAAK,KAAK,yBAAyB,OAAO;AAC1C,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAErC,UAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,UAAM,SAAS,UAAU,SAAS,MAAM;AAAA,MACtC,UAAU;AAAA,MACV,SAAS;AAAA,MACT,WAAW,KAAK,OAAO;AAAA,MACvB,OAAO;AAAA,IACT,CAAC;AAED,QAAI,OAAO,OAAO;AAChB,YAAM,IAAI,MAAM,wCAAoB,OAAO,MAAM,OAAO,EAAE;AAAA,IAC5D;AACA,QAAI,OAAO,WAAW,GAAG;AACvB,YAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,YAAM,IAAI,MAAM,wCAAoB,MAAM,EAAE;AAAA,IAC9C;AAEA,QAAI;AACF,aAAO,aAAa,SAAS,OAAO;AAAA,IACtC,QAAQ;AACN,aAAO,OAAO,UAAU;AAAA,IAC1B;AAAA,EACF,UAAE;AACA,QAAI;AAAE,iBAAW,OAAO;AAAA,IAAE,QAAQ;AAAA,IAAe;AAAA,EACnD;AACF;AAGA,SAAS,eAAe,MAAsB;AAC5C,QAAM,QAAQ,KAAK,MAAM,+CAA+C;AACxE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;","names":[]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/ocr/markdown-to-blocks.ts","../src/ocr/provider.ts"],"sourcesContent":["/**\n * Markdown → IRBlock[] 역파싱\n *\n * Vision LLM(gemini/claude/codex 등)이 반환한 Markdown 문자열을\n * kordoc의 IRBlock[] 중간 표현으로 변환.\n * 기존 blocksToMarkdown()의 역방향 처리.\n */\n\nimport type { IRBlock, IRTable, IRCell } from \"../types.js\"\n\n/**\n * Markdown 문자열을 IRBlock[] 배열로 변환.\n *\n * 지원 요소:\n * - 헤딩: # ~ ######\n * - 테이블: | col1 | col2 | (파이프 구분, |---|---| 구분선 포함)\n * - 순서/비순서 리스트: - / 1.\n * - 구분선: ---, ***, ___\n * - 일반 텍스트 (paragraph)\n */\nexport function markdownToBlocks(markdown: string, pageNumber: number): IRBlock[] {\n const blocks: IRBlock[] = []\n const lines = markdown.split(\"\\n\")\n let i = 0\n\n while (i < lines.length) {\n const line = lines[i]\n\n // 빈 줄 스킵\n if (line.trim() === \"\") {\n i++\n continue\n }\n\n // 1. 헤딩: # ~ ######\n const headingMatch = line.match(/^(#{1,6})\\s+(.+)$/)\n if (headingMatch) {\n blocks.push({\n type: \"heading\",\n level: headingMatch[1].length,\n text: headingMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 2. 구분선: ---, ***, ___\n if (/^[-*_]{3,}\\s*$/.test(line.trim())) {\n blocks.push({ type: \"separator\", pageNumber })\n i++\n continue\n }\n\n // 3. 테이블: | 로 시작하는 연속 행 수집\n if (line.trim().startsWith(\"|\")) {\n const tableLines: string[] = []\n while (i < lines.length && lines[i].trim().startsWith(\"|\")) {\n tableLines.push(lines[i])\n i++\n }\n const table = parseMarkdownTable(tableLines)\n if (table) {\n blocks.push({ type: \"table\", table, pageNumber })\n }\n continue\n }\n\n // 4. 비순서 리스트: -, *, +\n const ulMatch = line.match(/^(\\s*)[-*+]\\s+(.+)$/)\n if (ulMatch) {\n blocks.push({\n type: \"list\",\n listType: \"unordered\",\n text: ulMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 5. 순서 리스트: 1.\n const olMatch = line.match(/^(\\s*)\\d+\\.\\s+(.+)$/)\n if (olMatch) {\n blocks.push({\n type: \"list\",\n listType: \"ordered\",\n text: olMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 6. 일반 텍스트 — 구조적 행이 나올 때까지 병합\n const paraLines: string[] = []\n while (i < lines.length && lines[i].trim() !== \"\" && !isStructuralLine(lines[i])) {\n paraLines.push(lines[i].trim())\n i++\n }\n if (paraLines.length > 0) {\n blocks.push({\n type: \"paragraph\",\n text: paraLines.join(\"\\n\"),\n pageNumber,\n })\n }\n }\n\n return blocks\n}\n\n/**\n * 구조적 행 판별 — paragraph 병합 중단 트리거.\n */\nfunction isStructuralLine(line: string): boolean {\n if (/^#{1,6}\\s+/.test(line)) return true\n if (line.trim().startsWith(\"|\")) return true\n if (/^[-*_]{3,}\\s*$/.test(line.trim())) return true\n if (/^\\s*[-*+]\\s+/.test(line)) return true\n if (/^\\s*\\d+\\.\\s+/.test(line)) return true\n return false\n}\n\n/**\n * Markdown 테이블 행 배열을 IRTable로 변환.\n *\n * 구분선 행(|---|---|)은 제거 후 데이터 행만 파싱.\n * hasHeader: 구분선이 있었으면 true.\n */\nfunction parseMarkdownTable(lines: string[]): IRTable | null {\n const hasSeparator = lines.some(line => /^\\|[\\s:|-]+\\|$/.test(line.trim()))\n\n const rows: IRCell[][] = []\n let maxCols = 0\n\n for (const line of lines) {\n // 구분선 행 스킵: |---|---| 패턴\n if (/^\\|\\s*:?-+:?\\s*(\\|\\s*:?-+:?\\s*)+\\|?\\s*$/.test(line.trim())) continue\n\n const parts = line.split(\"|\")\n // 앞뒤 빈 요소 제거 (| 로 시작/종료하는 행)\n const cells: IRCell[] = parts\n .slice(1, parts[parts.length - 1].trim() === \"\" ? -1 : undefined)\n .map(cell => ({\n text: cell.trim(),\n colSpan: 1,\n rowSpan: 1,\n }))\n\n if (cells.length > 0) {\n rows.push(cells)\n maxCols = Math.max(maxCols, cells.length)\n }\n }\n\n if (rows.length === 0) return null\n\n // 열 수 통일 (부족한 셀은 빈 셀로 채움)\n for (const row of rows) {\n while (row.length < maxCols) {\n row.push({ text: \"\", colSpan: 1, rowSpan: 1 })\n }\n }\n\n return {\n rows: rows.length,\n cols: maxCols,\n cells: rows,\n hasHeader: hasSeparator && rows.length > 1,\n }\n}\n","/**\n * OCR 프로바이더 브릿지 — PDF 페이지를 이미지로 렌더링하여 OCR 호출\n *\n * kordoc은 OCR 라이브러리를 번들하지 않음.\n * 사용자가 OcrProvider 함수를 제공하면 이미지 기반 PDF도 텍스트 추출 가능.\n *\n * @example\n * ```ts\n * import { parse } from \"kordoc\"\n *\n * const result = await parse(buffer, {\n * ocr: async (pageImage, pageNumber, mimeType) => {\n * // Tesseract, Claude Vision, Google Vision 등 사용\n * return await myOcrService.recognize(pageImage)\n * }\n * })\n * ```\n */\n\nimport type { OcrProvider, IRBlock, ParseWarning, StructuredOcrResult, BatchOcrProvider } from \"../types.js\"\nimport { markdownToBlocks } from \"./markdown-to-blocks.js\"\n\n/**\n * 동시 실행 수를 제한한 병렬 태스크 실행 헬퍼.\n *\n * limit개의 워커를 만들어 tasks 배열을 순서대로 처리.\n * 각 워커는 완료되는 즉시 다음 태스크를 가져가므로 순서가 보존됨.\n *\n * @param tasks - 실행할 비동기 함수 배열\n * @param limit - 최대 동시 실행 수\n * @returns 입력 순서와 동일한 결과 배열\n */\nasync function runWithConcurrency<T>(\n tasks: (() => Promise<T>)[],\n limit: number\n): Promise<T[]> {\n const results: T[] = new Array(tasks.length)\n let nextIndex = 0\n\n // 각 워커는 처리할 태스크가 없을 때까지 반복\n async function worker() {\n while (nextIndex < tasks.length) {\n const idx = nextIndex++\n results[idx] = await tasks[idx]()\n }\n }\n\n // limit개 워커를 동시 실행 (tasks가 limit보다 적으면 tasks 수만큼)\n await Promise.all(Array.from({ length: Math.min(limit, tasks.length) }, () => worker()))\n return results\n}\n\n/**\n * OCR 결과(string | StructuredOcrResult)를 IRBlock[]으로 변환.\n */\nfunction ocrResultToBlocks(result: string | StructuredOcrResult, pageNum: number): IRBlock[] {\n const pageBlocks: IRBlock[] = []\n if (typeof result === \"string\") {\n // 순수 텍스트 → paragraph 블록\n if (result.trim()) {\n pageBlocks.push({ type: \"paragraph\", text: result.trim(), pageNumber: pageNum })\n }\n } else if (result && typeof result === \"object\" && \"markdown\" in result) {\n // 구조화된 결과 → Markdown → IRBlock[]\n const structured = result as StructuredOcrResult\n if (structured.markdown.trim()) {\n const converted = markdownToBlocks(structured.markdown, pageNum)\n for (const b of converted) pageBlocks.push(b)\n }\n }\n return pageBlocks\n}\n\n/** BatchOcrProvider 타입 가드 */\nfunction isBatchProvider(p: unknown): p is BatchOcrProvider {\n return !!p && typeof p === \"object\" && \"__batch\" in p && (p as BatchOcrProvider).__batch === true\n}\n\n/**\n * 이미지 기반 PDF 페이지에 OCR을 적용하여 IRBlock[] 반환.\n *\n * pdfjs page 객체에서 viewport + render를 통해 PNG 생성 후\n * 사용자 제공 OcrProvider 호출.\n *\n * - string 반환: 단순 텍스트 → paragraph 블록\n * - StructuredOcrResult 반환: Markdown → markdownToBlocks()로 구조화\n * - concurrency > 1: 병렬 처리 (워커 풀 프로바이더 권장)\n *\n * canvas 미설치 시 pdfjs render 불가하므로 에러 반환.\n */\nexport async function ocrPages(\n doc: { numPages: number; getPage(n: number): Promise<PdfPageProxy> },\n provider: OcrProvider | BatchOcrProvider,\n pageFilter: Set<number> | null,\n effectivePageCount: number,\n warnings?: ParseWarning[],\n concurrency: number = 1, // 기본값 1 = 순차 처리 (하위 호환)\n onProgress?: (current: number, total: number) => void\n): Promise<IRBlock[]> {\n const blocks: IRBlock[] = []\n\n // ── 배치 처리 (BatchOcrProvider) ────────────────────\n if (isBatchProvider(provider)) {\n return ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, onProgress)\n }\n\n // ── 순차 처리 (concurrency === 1) ────────────────────\n if (concurrency <= 1) {\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n const page = await doc.getPage(i)\n try {\n const imageData = await renderPageToPng(page)\n const result = await provider(imageData, i, \"image/png\")\n for (const b of ocrResultToBlocks(result, i)) blocks.push(b)\n } catch (err) {\n // 개별 페이지 실패 시 경고 발행 후 계속 진행\n warnings?.push({\n page: i,\n message: `페이지 ${i} OCR 실패: ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n }\n }\n return blocks\n }\n\n // ── 병렬 처리 (concurrency > 1) ──────────────────────\n // 처리 대상 페이지 번호 수집\n const pageNumbers: number[] = []\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n pageNumbers.push(i)\n }\n\n // 각 페이지에 대한 태스크 생성 (에러는 개별 캐치)\n const tasks = pageNumbers.map(pageNum => async (): Promise<{ pageNum: number; pageBlocks: IRBlock[] } | null> => {\n try {\n const page = await doc.getPage(pageNum)\n const imageData = await renderPageToPng(page)\n const result = await provider(imageData, pageNum, \"image/png\")\n return { pageNum, pageBlocks: ocrResultToBlocks(result, pageNum) }\n } catch (err) {\n // 개별 페이지 실패 시 경고 발행 후 null 반환\n warnings?.push({\n page: pageNum,\n message: `페이지 ${pageNum} OCR 실패: ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n return null\n }\n })\n\n // 병렬 실행 — concurrency 수만큼 동시 처리\n const taskResults = await runWithConcurrency(tasks, concurrency)\n\n // 결과를 페이지 번호 순서대로 합산 (pageNumbers 순서 = 오름차순 보장)\n for (const item of taskResults) {\n if (!item) continue\n for (const b of item.pageBlocks) blocks.push(b)\n }\n\n return blocks\n}\n\n/**\n * 배치 OCR 처리 — BatchOcrProvider를 사용하여 N페이지씩 묶어 처리.\n */\nasync function ocrPagesBatch(\n doc: { numPages: number; getPage(n: number): Promise<PdfPageProxy> },\n provider: BatchOcrProvider,\n pageFilter: Set<number> | null,\n effectivePageCount: number,\n warnings?: ParseWarning[],\n onProgress?: (current: number, total: number) => void\n): Promise<IRBlock[]> {\n const blocks: IRBlock[] = []\n\n // 1. 대상 페이지 번호 수집\n const pageNumbers: number[] = []\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n pageNumbers.push(i)\n }\n\n // 2. 모든 페이지를 PNG로 렌더링\n const pageImages: Array<{image: Uint8Array, pageNum: number}> = []\n for (const pageNum of pageNumbers) {\n const page = await doc.getPage(pageNum)\n const image = await renderPageToPng(page)\n pageImages.push({ image, pageNum })\n }\n\n // 3. batchSize개씩 그룹핑\n const batches: Array<typeof pageImages> = []\n for (let i = 0; i < pageImages.length; i += provider.batchSize) {\n batches.push(pageImages.slice(i, i + provider.batchSize))\n }\n\n // 4. 각 배치 처리\n let processed = 0\n for (const batch of batches) {\n try {\n const results = await provider.processBatch(batch)\n for (const { pageNum } of batch) {\n const result = results.get(pageNum)\n if (result) {\n for (const b of ocrResultToBlocks(result, pageNum)) blocks.push(b)\n }\n processed++\n onProgress?.(processed, pageNumbers.length)\n }\n } catch (err) {\n // 배치 실패 — 해당 배치 전체 페이지에 경고 추가\n const range = `${batch[0].pageNum}-${batch[batch.length - 1].pageNum}`\n warnings?.push({\n message: `배치 OCR 실패 (페이지 ${range}): ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n processed += batch.length\n onProgress?.(processed, pageNumbers.length)\n }\n }\n\n return blocks\n}\n\ninterface PdfPageProxy {\n getViewport(params: { scale: number }): { width: number; height: number }\n render(params: { canvasContext: unknown; viewport: unknown }): { promise: Promise<void> }\n}\n\n/**\n * PDF 페이지를 PNG로 렌더링.\n * @napi-rs/canvas 사용 (kordoc 번들 의존성, 별도 설치 불필요)\n */\nasync function renderPageToPng(page: PdfPageProxy): Promise<Uint8Array> {\n const { createCanvas } = await import(\"@napi-rs/canvas\")\n\n const scale = 2.0 // 300 DPI 근사\n const viewport = page.getViewport({ scale })\n const canvas = createCanvas(Math.floor(viewport.width), Math.floor(viewport.height))\n const ctx = canvas.getContext(\"2d\")\n\n await page.render({ canvasContext: ctx as unknown, viewport }).promise\n return new Uint8Array(canvas.toBuffer(\"image/png\"))\n}\n"],"mappings":";;;;AAoBO,SAAS,iBAAiB,UAAkB,YAA+B;AAChF,QAAM,SAAoB,CAAC;AAC3B,QAAM,QAAQ,SAAS,MAAM,IAAI;AACjC,MAAI,IAAI;AAER,SAAO,IAAI,MAAM,QAAQ;AACvB,UAAM,OAAO,MAAM,CAAC;AAGpB,QAAI,KAAK,KAAK,MAAM,IAAI;AACtB;AACA;AAAA,IACF;AAGA,UAAM,eAAe,KAAK,MAAM,mBAAmB;AACnD,QAAI,cAAc;AAChB,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,OAAO,aAAa,CAAC,EAAE;AAAA,QACvB,MAAM,aAAa,CAAC,EAAE,KAAK;AAAA,QAC3B;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,QAAI,iBAAiB,KAAK,KAAK,KAAK,CAAC,GAAG;AACtC,aAAO,KAAK,EAAE,MAAM,aAAa,WAAW,CAAC;AAC7C;AACA;AAAA,IACF;AAGA,QAAI,KAAK,KAAK,EAAE,WAAW,GAAG,GAAG;AAC/B,YAAM,aAAuB,CAAC;AAC9B,aAAO,IAAI,MAAM,UAAU,MAAM,CAAC,EAAE,KAAK,EAAE,WAAW,GAAG,GAAG;AAC1D,mBAAW,KAAK,MAAM,CAAC,CAAC;AACxB;AAAA,MACF;AACA,YAAM,QAAQ,mBAAmB,UAAU;AAC3C,UAAI,OAAO;AACT,eAAO,KAAK,EAAE,MAAM,SAAS,OAAO,WAAW,CAAC;AAAA,MAClD;AACA;AAAA,IACF;AAGA,UAAM,UAAU,KAAK,MAAM,qBAAqB;AAChD,QAAI,SAAS;AACX,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,UAAU;AAAA,QACV,MAAM,QAAQ,CAAC,EAAE,KAAK;AAAA,QACtB;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,UAAM,UAAU,KAAK,MAAM,qBAAqB;AAChD,QAAI,SAAS;AACX,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,UAAU;AAAA,QACV,MAAM,QAAQ,CAAC,EAAE,KAAK;AAAA,QACtB;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,UAAM,YAAsB,CAAC;AAC7B,WAAO,IAAI,MAAM,UAAU,MAAM,CAAC,EAAE,KAAK,MAAM,MAAM,CAAC,iBAAiB,MAAM,CAAC,CAAC,GAAG;AAChF,gBAAU,KAAK,MAAM,CAAC,EAAE,KAAK,CAAC;AAC9B;AAAA,IACF;AACA,QAAI,UAAU,SAAS,GAAG;AACxB,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,MAAM,UAAU,KAAK,IAAI;AAAA,QACzB;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO;AACT;AAKA,SAAS,iBAAiB,MAAuB;AAC/C,MAAI,aAAa,KAAK,IAAI,EAAG,QAAO;AACpC,MAAI,KAAK,KAAK,EAAE,WAAW,GAAG,EAAG,QAAO;AACxC,MAAI,iBAAiB,KAAK,KAAK,KAAK,CAAC,EAAG,QAAO;AAC/C,MAAI,eAAe,KAAK,IAAI,EAAG,QAAO;AACtC,MAAI,eAAe,KAAK,IAAI,EAAG,QAAO;AACtC,SAAO;AACT;AAQA,SAAS,mBAAmB,OAAiC;AAC3D,QAAM,eAAe,MAAM,KAAK,UAAQ,iBAAiB,KAAK,KAAK,KAAK,CAAC,CAAC;AAE1E,QAAM,OAAmB,CAAC;AAC1B,MAAI,UAAU;AAEd,aAAW,QAAQ,OAAO;AAExB,QAAI,0CAA0C,KAAK,KAAK,KAAK,CAAC,EAAG;AAEjE,UAAM,QAAQ,KAAK,MAAM,GAAG;AAE5B,UAAM,QAAkB,MACrB,MAAM,GAAG,MAAM,MAAM,SAAS,CAAC,EAAE,KAAK,MAAM,KAAK,KAAK,MAAS,EAC/D,IAAI,WAAS;AAAA,MACZ,MAAM,KAAK,KAAK;AAAA,MAChB,SAAS;AAAA,MACT,SAAS;AAAA,IACX,EAAE;AAEJ,QAAI,MAAM,SAAS,GAAG;AACpB,WAAK,KAAK,KAAK;AACf,gBAAU,KAAK,IAAI,SAAS,MAAM,MAAM;AAAA,IAC1C;AAAA,EACF;AAEA,MAAI,KAAK,WAAW,EAAG,QAAO;AAG9B,aAAW,OAAO,MAAM;AACtB,WAAO,IAAI,SAAS,SAAS;AAC3B,UAAI,KAAK,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE,CAAC;AAAA,IAC/C;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM,KAAK;AAAA,IACX,MAAM;AAAA,IACN,OAAO;AAAA,IACP,WAAW,gBAAgB,KAAK,SAAS;AAAA,EAC3C;AACF;;;AC3IA,eAAe,mBACb,OACA,OACc;AACd,QAAM,UAAe,IAAI,MAAM,MAAM,MAAM;AAC3C,MAAI,YAAY;AAGhB,iBAAe,SAAS;AACtB,WAAO,YAAY,MAAM,QAAQ;AAC/B,YAAM,MAAM;AACZ,cAAQ,GAAG,IAAI,MAAM,MAAM,GAAG,EAAE;AAAA,IAClC;AAAA,EACF;AAGA,QAAM,QAAQ,IAAI,MAAM,KAAK,EAAE,QAAQ,KAAK,IAAI,OAAO,MAAM,MAAM,EAAE,GAAG,MAAM,OAAO,CAAC,CAAC;AACvF,SAAO;AACT;AAKA,SAAS,kBAAkB,QAAsC,SAA4B;AAC3F,QAAM,aAAwB,CAAC;AAC/B,MAAI,OAAO,WAAW,UAAU;AAE9B,QAAI,OAAO,KAAK,GAAG;AACjB,iBAAW,KAAK,EAAE,MAAM,aAAa,MAAM,OAAO,KAAK,GAAG,YAAY,QAAQ,CAAC;AAAA,IACjF;AAAA,EACF,WAAW,UAAU,OAAO,WAAW,YAAY,cAAc,QAAQ;AAEvE,UAAM,aAAa;AACnB,QAAI,WAAW,SAAS,KAAK,GAAG;AAC9B,YAAM,YAAY,iBAAiB,WAAW,UAAU,OAAO;AAC/D,iBAAW,KAAK,UAAW,YAAW,KAAK,CAAC;AAAA,IAC9C;AAAA,EACF;AACA,SAAO;AACT;AAGA,SAAS,gBAAgB,GAAmC;AAC1D,SAAO,CAAC,CAAC,KAAK,OAAO,MAAM,YAAY,aAAa,KAAM,EAAuB,YAAY;AAC/F;AAcA,eAAsB,SACpB,KACA,UACA,YACA,oBACA,UACA,cAAsB,GACtB,YACoB;AACpB,QAAM,SAAoB,CAAC;AAG3B,MAAI,gBAAgB,QAAQ,GAAG;AAC7B,WAAO,cAAc,KAAK,UAAU,YAAY,oBAAoB,UAAU,UAAU;AAAA,EAC1F;AAGA,MAAI,eAAe,GAAG;AACpB,aAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,UAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,YAAM,OAAO,MAAM,IAAI,QAAQ,CAAC;AAChC,UAAI;AACF,cAAM,YAAY,MAAM,gBAAgB,IAAI;AAC5C,cAAM,SAAS,MAAM,SAAS,WAAW,GAAG,WAAW;AACvD,mBAAW,KAAK,kBAAkB,QAAQ,CAAC,EAAG,QAAO,KAAK,CAAC;AAAA,MAC7D,SAAS,KAAK;AAEZ,kBAAU,KAAK;AAAA,UACb,MAAM;AAAA,UACN,SAAS,sBAAO,CAAC,sBAAY,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,UAC7E,MAAM;AAAA,QACR,CAAC;AAAA,MACH;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAIA,QAAM,cAAwB,CAAC;AAC/B,WAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,QAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,gBAAY,KAAK,CAAC;AAAA,EACpB;AAGA,QAAM,QAAQ,YAAY,IAAI,aAAW,YAAwE;AAC/G,QAAI;AACF,YAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AACtC,YAAM,YAAY,MAAM,gBAAgB,IAAI;AAC5C,YAAM,SAAS,MAAM,SAAS,WAAW,SAAS,WAAW;AAC7D,aAAO,EAAE,SAAS,YAAY,kBAAkB,QAAQ,OAAO,EAAE;AAAA,IACnE,SAAS,KAAK;AAEZ,gBAAU,KAAK;AAAA,QACb,MAAM;AAAA,QACN,SAAS,sBAAO,OAAO,sBAAY,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,QACnF,MAAM;AAAA,MACR,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF,CAAC;AAGD,QAAM,cAAc,MAAM,mBAAmB,OAAO,WAAW;AAG/D,aAAW,QAAQ,aAAa;AAC9B,QAAI,CAAC,KAAM;AACX,eAAW,KAAK,KAAK,WAAY,QAAO,KAAK,CAAC;AAAA,EAChD;AAEA,SAAO;AACT;AAKA,eAAe,cACb,KACA,UACA,YACA,oBACA,UACA,YACoB;AACpB,QAAM,SAAoB,CAAC;AAG3B,QAAM,cAAwB,CAAC;AAC/B,WAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,QAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,gBAAY,KAAK,CAAC;AAAA,EACpB;AAGA,QAAM,aAA0D,CAAC;AACjE,aAAW,WAAW,aAAa;AACjC,UAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AACtC,UAAM,QAAQ,MAAM,gBAAgB,IAAI;AACxC,eAAW,KAAK,EAAE,OAAO,QAAQ,CAAC;AAAA,EACpC;AAGA,QAAM,UAAoC,CAAC;AAC3C,WAAS,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK,SAAS,WAAW;AAC9D,YAAQ,KAAK,WAAW,MAAM,GAAG,IAAI,SAAS,SAAS,CAAC;AAAA,EAC1D;AAGA,MAAI,YAAY;AAChB,aAAW,SAAS,SAAS;AAC3B,QAAI;AACF,YAAM,UAAU,MAAM,SAAS,aAAa,KAAK;AACjD,iBAAW,EAAE,QAAQ,KAAK,OAAO;AAC/B,cAAM,SAAS,QAAQ,IAAI,OAAO;AAClC,YAAI,QAAQ;AACV,qBAAW,KAAK,kBAAkB,QAAQ,OAAO,EAAG,QAAO,KAAK,CAAC;AAAA,QACnE;AACA;AACA,qBAAa,WAAW,YAAY,MAAM;AAAA,MAC5C;AAAA,IACF,SAAS,KAAK;AAEZ,YAAM,QAAQ,GAAG,MAAM,CAAC,EAAE,OAAO,IAAI,MAAM,MAAM,SAAS,CAAC,EAAE,OAAO;AACpE,gBAAU,KAAK;AAAA,QACb,SAAS,qDAAkB,KAAK,MAAM,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,QACtF,MAAM;AAAA,MACR,CAAC;AACD,mBAAa,MAAM;AACnB,mBAAa,WAAW,YAAY,MAAM;AAAA,IAC5C;AAAA,EACF;AAEA,SAAO;AACT;AAWA,eAAe,gBAAgB,MAAyC;AACtE,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,iBAAiB;AAEvD,QAAM,QAAQ;AACd,QAAM,WAAW,KAAK,YAAY,EAAE,MAAM,CAAC;AAC3C,QAAM,SAAS,aAAa,KAAK,MAAM,SAAS,KAAK,GAAG,KAAK,MAAM,SAAS,MAAM,CAAC;AACnF,QAAM,MAAM,OAAO,WAAW,IAAI;AAElC,QAAM,KAAK,OAAO,EAAE,eAAe,KAAgB,SAAS,CAAC,EAAE;AAC/D,SAAO,IAAI,WAAW,OAAO,SAAS,WAAW,CAAC;AACpD;","names":[]}