@clazic/kordoc 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{batch-provider-FUCIIS4M.js → batch-provider-PCT4I4LK.js} +57 -27
- package/dist/batch-provider-PCT4I4LK.js.map +1 -0
- package/dist/{chunk-WWILSVMJ.js → chunk-W5KUC23B.js} +2 -2
- package/dist/{chunk-2ZGLFZCN.js → chunk-ZOEUKD77.js} +4 -4
- package/dist/cli.js +7 -5
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +78 -37
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +79 -38
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{provider-OBY3XFSZ.js → provider-WYHC4NHI.js} +23 -12
- package/dist/provider-WYHC4NHI.js.map +1 -0
- package/dist/{resolve-LBFYRHJI.js → resolve-4FSAQF2S.js} +3 -3
- package/dist/{utils-QAK24RJS.js → utils-HSF5HI5T.js} +2 -2
- package/dist/{watch-MPHX3QIH.js → watch-R2JHXDGF.js} +3 -3
- package/package.json +1 -1
- package/dist/batch-provider-FUCIIS4M.js.map +0 -1
- package/dist/provider-OBY3XFSZ.js.map +0 -1
- /package/dist/{chunk-WWILSVMJ.js.map → chunk-W5KUC23B.js.map} +0 -0
- /package/dist/{chunk-2ZGLFZCN.js.map → chunk-ZOEUKD77.js.map} +0 -0
- /package/dist/{resolve-LBFYRHJI.js.map → resolve-4FSAQF2S.js.map} +0 -0
- /package/dist/{utils-QAK24RJS.js.map → utils-HSF5HI5T.js.map} +0 -0
- /package/dist/{watch-MPHX3QIH.js.map → watch-R2JHXDGF.js.map} +0 -0
package/dist/mcp.js
CHANGED
|
@@ -10,13 +10,13 @@ import {
|
|
|
10
10
|
markdownToHwpx,
|
|
11
11
|
markdownToXlsx,
|
|
12
12
|
parse
|
|
13
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-ZOEUKD77.js";
|
|
14
14
|
import {
|
|
15
15
|
KordocError,
|
|
16
16
|
VERSION,
|
|
17
17
|
sanitizeError,
|
|
18
18
|
toArrayBuffer
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-W5KUC23B.js";
|
|
20
20
|
import "./chunk-MOL7MDBG.js";
|
|
21
21
|
import "./chunk-ZWE3DS7E.js";
|
|
22
22
|
|
|
@@ -150,7 +150,7 @@ function isBatchProvider(p) {
|
|
|
150
150
|
async function ocrPages(doc, provider, pageFilter, effectivePageCount, warnings, concurrency = 1, onProgress) {
|
|
151
151
|
const blocks = [];
|
|
152
152
|
if (isBatchProvider(provider)) {
|
|
153
|
-
return ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, onProgress);
|
|
153
|
+
return ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, concurrency, onProgress);
|
|
154
154
|
}
|
|
155
155
|
if (concurrency <= 1) {
|
|
156
156
|
for (let i = 1; i <= effectivePageCount; i++) {
|
|
@@ -197,8 +197,7 @@ async function ocrPages(doc, provider, pageFilter, effectivePageCount, warnings,
|
|
|
197
197
|
}
|
|
198
198
|
return blocks;
|
|
199
199
|
}
|
|
200
|
-
async function ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, onProgress) {
|
|
201
|
-
const blocks = [];
|
|
200
|
+
async function ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, concurrency = 1, onProgress) {
|
|
202
201
|
const pageNumbers = [];
|
|
203
202
|
for (let i = 1; i <= effectivePageCount; i++) {
|
|
204
203
|
if (pageFilter && !pageFilter.has(i)) continue;
|
|
@@ -215,16 +214,16 @@ async function ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warn
|
|
|
215
214
|
batches.push(pageImages.slice(i, i + provider.batchSize));
|
|
216
215
|
}
|
|
217
216
|
let processed = 0;
|
|
218
|
-
|
|
217
|
+
const batchTasks = batches.map((batch, batchIdx) => async () => {
|
|
218
|
+
const pageBlocks = [];
|
|
219
219
|
try {
|
|
220
220
|
const results = await provider.processBatch(batch);
|
|
221
221
|
for (const { pageNum } of batch) {
|
|
222
222
|
const result = results.get(pageNum);
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
onProgress?.(processed, pageNumbers.length);
|
|
223
|
+
pageBlocks.push({
|
|
224
|
+
pageNum,
|
|
225
|
+
blocks: result ? ocrResultToBlocks(result, pageNum) : []
|
|
226
|
+
});
|
|
228
227
|
}
|
|
229
228
|
} catch (err) {
|
|
230
229
|
const range = `${batch[0].pageNum}-${batch[batch.length - 1].pageNum}`;
|
|
@@ -232,8 +231,20 @@ async function ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warn
|
|
|
232
231
|
message: `\uBC30\uCE58 OCR \uC2E4\uD328 (\uD398\uC774\uC9C0 ${range}): ${err instanceof Error ? err.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`,
|
|
233
232
|
code: "OCR_PAGE_FAILED"
|
|
234
233
|
});
|
|
235
|
-
|
|
236
|
-
|
|
234
|
+
for (const { pageNum } of batch) {
|
|
235
|
+
pageBlocks.push({ pageNum, blocks: [] });
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
processed += batch.length;
|
|
239
|
+
onProgress?.(processed, pageNumbers.length);
|
|
240
|
+
return { batchIdx, pageBlocks };
|
|
241
|
+
});
|
|
242
|
+
const effectiveConcurrency = Math.max(1, concurrency);
|
|
243
|
+
const batchResults = await runWithConcurrency(batchTasks, effectiveConcurrency);
|
|
244
|
+
const blocks = [];
|
|
245
|
+
for (const result of batchResults) {
|
|
246
|
+
for (const { blocks: pageBlks } of result.pageBlocks) {
|
|
247
|
+
for (const b of pageBlks) blocks.push(b);
|
|
237
248
|
}
|
|
238
249
|
}
|
|
239
250
|
return blocks;
|
|
@@ -250,4 +261,4 @@ async function renderPageToPng(page) {
|
|
|
250
261
|
export {
|
|
251
262
|
ocrPages
|
|
252
263
|
};
|
|
253
|
-
//# sourceMappingURL=provider-
|
|
264
|
+
//# sourceMappingURL=provider-WYHC4NHI.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/ocr/markdown-to-blocks.ts","../src/ocr/provider.ts"],"sourcesContent":["/**\n * Markdown → IRBlock[] 역파싱\n *\n * Vision LLM(gemini/claude/codex 등)이 반환한 Markdown 문자열을\n * kordoc의 IRBlock[] 중간 표현으로 변환.\n * 기존 blocksToMarkdown()의 역방향 처리.\n */\n\nimport type { IRBlock, IRTable, IRCell } from \"../types.js\"\n\n/**\n * Markdown 문자열을 IRBlock[] 배열로 변환.\n *\n * 지원 요소:\n * - 헤딩: # ~ ######\n * - 테이블: | col1 | col2 | (파이프 구분, |---|---| 구분선 포함)\n * - 순서/비순서 리스트: - / 1.\n * - 구분선: ---, ***, ___\n * - 일반 텍스트 (paragraph)\n */\nexport function markdownToBlocks(markdown: string, pageNumber: number): IRBlock[] {\n const blocks: IRBlock[] = []\n const lines = markdown.split(\"\\n\")\n let i = 0\n\n while (i < lines.length) {\n const line = lines[i]\n\n // 빈 줄 스킵\n if (line.trim() === \"\") {\n i++\n continue\n }\n\n // 1. 헤딩: # ~ ######\n const headingMatch = line.match(/^(#{1,6})\\s+(.+)$/)\n if (headingMatch) {\n blocks.push({\n type: \"heading\",\n level: headingMatch[1].length,\n text: headingMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 2. 구분선: ---, ***, ___\n if (/^[-*_]{3,}\\s*$/.test(line.trim())) {\n blocks.push({ type: \"separator\", pageNumber })\n i++\n continue\n }\n\n // 3. 테이블: | 로 시작하는 연속 행 수집\n if (line.trim().startsWith(\"|\")) {\n const tableLines: string[] = []\n while (i < lines.length && lines[i].trim().startsWith(\"|\")) {\n tableLines.push(lines[i])\n i++\n }\n const table = parseMarkdownTable(tableLines)\n if (table) {\n blocks.push({ type: \"table\", table, pageNumber })\n }\n continue\n }\n\n // 4. 비순서 리스트: -, *, +\n const ulMatch = line.match(/^(\\s*)[-*+]\\s+(.+)$/)\n if (ulMatch) {\n blocks.push({\n type: \"list\",\n listType: \"unordered\",\n text: ulMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 5. 순서 리스트: 1.\n const olMatch = line.match(/^(\\s*)\\d+\\.\\s+(.+)$/)\n if (olMatch) {\n blocks.push({\n type: \"list\",\n listType: \"ordered\",\n text: olMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 6. 일반 텍스트 — 구조적 행이 나올 때까지 병합\n const paraLines: string[] = []\n while (i < lines.length && lines[i].trim() !== \"\" && !isStructuralLine(lines[i])) {\n paraLines.push(lines[i].trim())\n i++\n }\n if (paraLines.length > 0) {\n blocks.push({\n type: \"paragraph\",\n text: paraLines.join(\"\\n\"),\n pageNumber,\n })\n }\n }\n\n return blocks\n}\n\n/**\n * 구조적 행 판별 — paragraph 병합 중단 트리거.\n */\nfunction isStructuralLine(line: string): boolean {\n if (/^#{1,6}\\s+/.test(line)) return true\n if (line.trim().startsWith(\"|\")) return true\n if (/^[-*_]{3,}\\s*$/.test(line.trim())) return true\n if (/^\\s*[-*+]\\s+/.test(line)) return true\n if (/^\\s*\\d+\\.\\s+/.test(line)) return true\n return false\n}\n\n/**\n * Markdown 테이블 행 배열을 IRTable로 변환.\n *\n * 구분선 행(|---|---|)은 제거 후 데이터 행만 파싱.\n * hasHeader: 구분선이 있었으면 true.\n */\nfunction parseMarkdownTable(lines: string[]): IRTable | null {\n const hasSeparator = lines.some(line => /^\\|[\\s:|-]+\\|$/.test(line.trim()))\n\n const rows: IRCell[][] = []\n let maxCols = 0\n\n for (const line of lines) {\n // 구분선 행 스킵: |---|---| 패턴\n if (/^\\|\\s*:?-+:?\\s*(\\|\\s*:?-+:?\\s*)+\\|?\\s*$/.test(line.trim())) continue\n\n const parts = line.split(\"|\")\n // 앞뒤 빈 요소 제거 (| 로 시작/종료하는 행)\n const cells: IRCell[] = parts\n .slice(1, parts[parts.length - 1].trim() === \"\" ? -1 : undefined)\n .map(cell => ({\n text: cell.trim(),\n colSpan: 1,\n rowSpan: 1,\n }))\n\n if (cells.length > 0) {\n rows.push(cells)\n maxCols = Math.max(maxCols, cells.length)\n }\n }\n\n if (rows.length === 0) return null\n\n // 열 수 통일 (부족한 셀은 빈 셀로 채움)\n for (const row of rows) {\n while (row.length < maxCols) {\n row.push({ text: \"\", colSpan: 1, rowSpan: 1 })\n }\n }\n\n return {\n rows: rows.length,\n cols: maxCols,\n cells: rows,\n hasHeader: hasSeparator && rows.length > 1,\n }\n}\n","/**\n * OCR 프로바이더 브릿지 — PDF 페이지를 이미지로 렌더링하여 OCR 호출\n *\n * kordoc은 OCR 라이브러리를 번들하지 않음.\n * 사용자가 OcrProvider 함수를 제공하면 이미지 기반 PDF도 텍스트 추출 가능.\n *\n * @example\n * ```ts\n * import { parse } from \"kordoc\"\n *\n * const result = await parse(buffer, {\n * ocr: async (pageImage, pageNumber, mimeType) => {\n * // Tesseract, Claude Vision, Google Vision 등 사용\n * return await myOcrService.recognize(pageImage)\n * }\n * })\n * ```\n */\n\nimport type { OcrProvider, IRBlock, ParseWarning, StructuredOcrResult, BatchOcrProvider } from \"../types.js\"\nimport { markdownToBlocks } from \"./markdown-to-blocks.js\"\n\n/**\n * 동시 실행 수를 제한한 병렬 태스크 실행 헬퍼.\n *\n * limit개의 워커를 만들어 tasks 배열을 순서대로 처리.\n * 각 워커는 완료되는 즉시 다음 태스크를 가져가므로 순서가 보존됨.\n *\n * @param tasks - 실행할 비동기 함수 배열\n * @param limit - 최대 동시 실행 수\n * @returns 입력 순서와 동일한 결과 배열\n */\nasync function runWithConcurrency<T>(\n tasks: (() => Promise<T>)[],\n limit: number\n): Promise<T[]> {\n const results: T[] = new Array(tasks.length)\n let nextIndex = 0\n\n // 각 워커는 처리할 태스크가 없을 때까지 반복\n async function worker() {\n while (nextIndex < tasks.length) {\n const idx = nextIndex++\n results[idx] = await tasks[idx]()\n }\n }\n\n // limit개 워커를 동시 실행 (tasks가 limit보다 적으면 tasks 수만큼)\n await Promise.all(Array.from({ length: Math.min(limit, tasks.length) }, () => worker()))\n return results\n}\n\n/**\n * OCR 결과(string | StructuredOcrResult)를 IRBlock[]으로 변환.\n */\nfunction ocrResultToBlocks(result: string | StructuredOcrResult, pageNum: number): IRBlock[] {\n const pageBlocks: IRBlock[] = []\n if (typeof result === \"string\") {\n // 순수 텍스트 → paragraph 블록\n if (result.trim()) {\n pageBlocks.push({ type: \"paragraph\", text: result.trim(), pageNumber: pageNum })\n }\n } else if (result && typeof result === \"object\" && \"markdown\" in result) {\n // 구조화된 결과 → Markdown → IRBlock[]\n const structured = result as StructuredOcrResult\n if (structured.markdown.trim()) {\n const converted = markdownToBlocks(structured.markdown, pageNum)\n for (const b of converted) pageBlocks.push(b)\n }\n }\n return pageBlocks\n}\n\n/** BatchOcrProvider 타입 가드 */\nfunction isBatchProvider(p: unknown): p is BatchOcrProvider {\n return !!p && typeof p === \"object\" && \"__batch\" in p && (p as BatchOcrProvider).__batch === true\n}\n\n/**\n * 이미지 기반 PDF 페이지에 OCR을 적용하여 IRBlock[] 반환.\n *\n * pdfjs page 객체에서 viewport + render를 통해 PNG 생성 후\n * 사용자 제공 OcrProvider 호출.\n *\n * - string 반환: 단순 텍스트 → paragraph 블록\n * - StructuredOcrResult 반환: Markdown → markdownToBlocks()로 구조화\n * - concurrency > 1: 병렬 처리 (워커 풀 프로바이더 권장)\n *\n * canvas 미설치 시 pdfjs render 불가하므로 에러 반환.\n */\nexport async function ocrPages(\n doc: { numPages: number; getPage(n: number): Promise<PdfPageProxy> },\n provider: OcrProvider | BatchOcrProvider,\n pageFilter: Set<number> | null,\n effectivePageCount: number,\n warnings?: ParseWarning[],\n concurrency: number = 1, // 기본값 1 = 순차 처리 (하위 호환)\n onProgress?: (current: number, total: number) => void\n): Promise<IRBlock[]> {\n const blocks: IRBlock[] = []\n\n // ── 배치 처리 (BatchOcrProvider) ────────────────────\n if (isBatchProvider(provider)) {\n return ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, concurrency, onProgress)\n }\n\n // ── 순차 처리 (concurrency === 1) ────────────────────\n if (concurrency <= 1) {\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n const page = await doc.getPage(i)\n try {\n const imageData = await renderPageToPng(page)\n const result = await provider(imageData, i, \"image/png\")\n for (const b of ocrResultToBlocks(result, i)) blocks.push(b)\n } catch (err) {\n // 개별 페이지 실패 시 경고 발행 후 계속 진행\n warnings?.push({\n page: i,\n message: `페이지 ${i} OCR 실패: ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n }\n }\n return blocks\n }\n\n // ── 병렬 처리 (concurrency > 1) ──────────────────────\n // 처리 대상 페이지 번호 수집\n const pageNumbers: number[] = []\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n pageNumbers.push(i)\n }\n\n // 각 페이지에 대한 태스크 생성 (에러는 개별 캐치)\n const tasks = pageNumbers.map(pageNum => async (): Promise<{ pageNum: number; pageBlocks: IRBlock[] } | null> => {\n try {\n const page = await doc.getPage(pageNum)\n const imageData = await renderPageToPng(page)\n const result = await provider(imageData, pageNum, \"image/png\")\n return { pageNum, pageBlocks: ocrResultToBlocks(result, pageNum) }\n } catch (err) {\n // 개별 페이지 실패 시 경고 발행 후 null 반환\n warnings?.push({\n page: pageNum,\n message: `페이지 ${pageNum} OCR 실패: ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n return null\n }\n })\n\n // 병렬 실행 — concurrency 수만큼 동시 처리\n const taskResults = await runWithConcurrency(tasks, concurrency)\n\n // 결과를 페이지 번호 순서대로 합산 (pageNumbers 순서 = 오름차순 보장)\n for (const item of taskResults) {\n if (!item) continue\n for (const b of item.pageBlocks) blocks.push(b)\n }\n\n return blocks\n}\n\n/**\n * 배치 OCR 처리 — BatchOcrProvider를 사용하여 N페이지씩 묶어 처리.\n *\n * concurrency > 1이면 여러 배치를 동시에 실행하여 속도 향상.\n * 예: 5페이지/배치 × 4 동시 = 20페이지 동시 처리.\n */\nasync function ocrPagesBatch(\n doc: { numPages: number; getPage(n: number): Promise<PdfPageProxy> },\n provider: BatchOcrProvider,\n pageFilter: Set<number> | null,\n effectivePageCount: number,\n warnings?: ParseWarning[],\n concurrency: number = 1,\n onProgress?: (current: number, total: number) => void\n): Promise<IRBlock[]> {\n // 1. 대상 페이지 번호 수집\n const pageNumbers: number[] = []\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n pageNumbers.push(i)\n }\n\n // 2. 모든 페이지를 PNG로 렌더링\n const pageImages: Array<{image: Uint8Array, pageNum: number}> = []\n for (const pageNum of pageNumbers) {\n const page = await doc.getPage(pageNum)\n const image = await renderPageToPng(page)\n pageImages.push({ image, pageNum })\n }\n\n // 3. batchSize개씩 그룹핑\n const batches: Array<typeof pageImages> = []\n for (let i = 0; i < pageImages.length; i += provider.batchSize) {\n batches.push(pageImages.slice(i, i + provider.batchSize))\n }\n\n // 4. 배치 태스크 생성 — 각 배치의 결과를 {batchIdx, pageBlocks} 형태로 반환\n let processed = 0\n type BatchResult = { batchIdx: number; pageBlocks: Array<{pageNum: number; blocks: IRBlock[]}> }\n\n const batchTasks = batches.map((batch, batchIdx) => async (): Promise<BatchResult> => {\n const pageBlocks: Array<{pageNum: number; blocks: IRBlock[]}> = []\n try {\n const results = await provider.processBatch(batch)\n for (const { pageNum } of batch) {\n const result = results.get(pageNum)\n pageBlocks.push({\n pageNum,\n blocks: result ? ocrResultToBlocks(result, pageNum) : [],\n })\n }\n } catch (err) {\n const range = `${batch[0].pageNum}-${batch[batch.length - 1].pageNum}`\n warnings?.push({\n message: `배치 OCR 실패 (페이지 ${range}): ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n // 실패 배치는 빈 결과\n for (const { pageNum } of batch) {\n pageBlocks.push({ pageNum, blocks: [] })\n }\n }\n // 진행률 갱신 (병렬 실행 중 atomic하지 않지만 표시용으로 충분)\n processed += batch.length\n onProgress?.(processed, pageNumbers.length)\n return { batchIdx, pageBlocks }\n })\n\n // 5. 병렬 실행 — concurrency개 배치를 동시 처리\n const effectiveConcurrency = Math.max(1, concurrency)\n const batchResults = await runWithConcurrency(batchTasks, effectiveConcurrency)\n\n // 6. 배치 순서대로 블록 합산 (페이지 순서 보존)\n const blocks: IRBlock[] = []\n for (const result of batchResults) {\n for (const { blocks: pageBlks } of result.pageBlocks) {\n for (const b of pageBlks) blocks.push(b)\n }\n }\n\n return blocks\n}\n\ninterface PdfPageProxy {\n getViewport(params: { scale: number }): { width: number; height: number }\n render(params: { canvasContext: unknown; viewport: unknown }): { promise: Promise<void> }\n}\n\n/**\n * PDF 페이지를 PNG로 렌더링.\n * @napi-rs/canvas 사용 (kordoc 번들 의존성, 별도 설치 불필요)\n */\nasync function renderPageToPng(page: PdfPageProxy): Promise<Uint8Array> {\n const { createCanvas } = await import(\"@napi-rs/canvas\")\n\n const scale = 2.0 // 300 DPI 근사\n const viewport = page.getViewport({ scale })\n const canvas = createCanvas(Math.floor(viewport.width), Math.floor(viewport.height))\n const ctx = canvas.getContext(\"2d\")\n\n await page.render({ canvasContext: ctx as unknown, viewport }).promise\n return new Uint8Array(canvas.toBuffer(\"image/png\"))\n}\n"],"mappings":";;;;AAoBO,SAAS,iBAAiB,UAAkB,YAA+B;AAChF,QAAM,SAAoB,CAAC;AAC3B,QAAM,QAAQ,SAAS,MAAM,IAAI;AACjC,MAAI,IAAI;AAER,SAAO,IAAI,MAAM,QAAQ;AACvB,UAAM,OAAO,MAAM,CAAC;AAGpB,QAAI,KAAK,KAAK,MAAM,IAAI;AACtB;AACA;AAAA,IACF;AAGA,UAAM,eAAe,KAAK,MAAM,mBAAmB;AACnD,QAAI,cAAc;AAChB,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,OAAO,aAAa,CAAC,EAAE;AAAA,QACvB,MAAM,aAAa,CAAC,EAAE,KAAK;AAAA,QAC3B;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,QAAI,iBAAiB,KAAK,KAAK,KAAK,CAAC,GAAG;AACtC,aAAO,KAAK,EAAE,MAAM,aAAa,WAAW,CAAC;AAC7C;AACA;AAAA,IACF;AAGA,QAAI,KAAK,KAAK,EAAE,WAAW,GAAG,GAAG;AAC/B,YAAM,aAAuB,CAAC;AAC9B,aAAO,IAAI,MAAM,UAAU,MAAM,CAAC,EAAE,KAAK,EAAE,WAAW,GAAG,GAAG;AAC1D,mBAAW,KAAK,MAAM,CAAC,CAAC;AACxB;AAAA,MACF;AACA,YAAM,QAAQ,mBAAmB,UAAU;AAC3C,UAAI,OAAO;AACT,eAAO,KAAK,EAAE,MAAM,SAAS,OAAO,WAAW,CAAC;AAAA,MAClD;AACA;AAAA,IACF;AAGA,UAAM,UAAU,KAAK,MAAM,qBAAqB;AAChD,QAAI,SAAS;AACX,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,UAAU;AAAA,QACV,MAAM,QAAQ,CAAC,EAAE,KAAK;AAAA,QACtB;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,UAAM,UAAU,KAAK,MAAM,qBAAqB;AAChD,QAAI,SAAS;AACX,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,UAAU;AAAA,QACV,MAAM,QAAQ,CAAC,EAAE,KAAK;AAAA,QACtB;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,UAAM,YAAsB,CAAC;AAC7B,WAAO,IAAI,MAAM,UAAU,MAAM,CAAC,EAAE,KAAK,MAAM,MAAM,CAAC,iBAAiB,MAAM,CAAC,CAAC,GAAG;AAChF,gBAAU,KAAK,MAAM,CAAC,EAAE,KAAK,CAAC;AAC9B;AAAA,IACF;AACA,QAAI,UAAU,SAAS,GAAG;AACxB,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,MAAM,UAAU,KAAK,IAAI;AAAA,QACzB;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO;AACT;AAKA,SAAS,iBAAiB,MAAuB;AAC/C,MAAI,aAAa,KAAK,IAAI,EAAG,QAAO;AACpC,MAAI,KAAK,KAAK,EAAE,WAAW,GAAG,EAAG,QAAO;AACxC,MAAI,iBAAiB,KAAK,KAAK,KAAK,CAAC,EAAG,QAAO;AAC/C,MAAI,eAAe,KAAK,IAAI,EAAG,QAAO;AACtC,MAAI,eAAe,KAAK,IAAI,EAAG,QAAO;AACtC,SAAO;AACT;AAQA,SAAS,mBAAmB,OAAiC;AAC3D,QAAM,eAAe,MAAM,KAAK,UAAQ,iBAAiB,KAAK,KAAK,KAAK,CAAC,CAAC;AAE1E,QAAM,OAAmB,CAAC;AAC1B,MAAI,UAAU;AAEd,aAAW,QAAQ,OAAO;AAExB,QAAI,0CAA0C,KAAK,KAAK,KAAK,CAAC,EAAG;AAEjE,UAAM,QAAQ,KAAK,MAAM,GAAG;AAE5B,UAAM,QAAkB,MACrB,MAAM,GAAG,MAAM,MAAM,SAAS,CAAC,EAAE,KAAK,MAAM,KAAK,KAAK,MAAS,EAC/D,IAAI,WAAS;AAAA,MACZ,MAAM,KAAK,KAAK;AAAA,MAChB,SAAS;AAAA,MACT,SAAS;AAAA,IACX,EAAE;AAEJ,QAAI,MAAM,SAAS,GAAG;AACpB,WAAK,KAAK,KAAK;AACf,gBAAU,KAAK,IAAI,SAAS,MAAM,MAAM;AAAA,IAC1C;AAAA,EACF;AAEA,MAAI,KAAK,WAAW,EAAG,QAAO;AAG9B,aAAW,OAAO,MAAM;AACtB,WAAO,IAAI,SAAS,SAAS;AAC3B,UAAI,KAAK,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE,CAAC;AAAA,IAC/C;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM,KAAK;AAAA,IACX,MAAM;AAAA,IACN,OAAO;AAAA,IACP,WAAW,gBAAgB,KAAK,SAAS;AAAA,EAC3C;AACF;;;AC3IA,eAAe,mBACb,OACA,OACc;AACd,QAAM,UAAe,IAAI,MAAM,MAAM,MAAM;AAC3C,MAAI,YAAY;AAGhB,iBAAe,SAAS;AACtB,WAAO,YAAY,MAAM,QAAQ;AAC/B,YAAM,MAAM;AACZ,cAAQ,GAAG,IAAI,MAAM,MAAM,GAAG,EAAE;AAAA,IAClC;AAAA,EACF;AAGA,QAAM,QAAQ,IAAI,MAAM,KAAK,EAAE,QAAQ,KAAK,IAAI,OAAO,MAAM,MAAM,EAAE,GAAG,MAAM,OAAO,CAAC,CAAC;AACvF,SAAO;AACT;AAKA,SAAS,kBAAkB,QAAsC,SAA4B;AAC3F,QAAM,aAAwB,CAAC;AAC/B,MAAI,OAAO,WAAW,UAAU;AAE9B,QAAI,OAAO,KAAK,GAAG;AACjB,iBAAW,KAAK,EAAE,MAAM,aAAa,MAAM,OAAO,KAAK,GAAG,YAAY,QAAQ,CAAC;AAAA,IACjF;AAAA,EACF,WAAW,UAAU,OAAO,WAAW,YAAY,cAAc,QAAQ;AAEvE,UAAM,aAAa;AACnB,QAAI,WAAW,SAAS,KAAK,GAAG;AAC9B,YAAM,YAAY,iBAAiB,WAAW,UAAU,OAAO;AAC/D,iBAAW,KAAK,UAAW,YAAW,KAAK,CAAC;AAAA,IAC9C;AAAA,EACF;AACA,SAAO;AACT;AAGA,SAAS,gBAAgB,GAAmC;AAC1D,SAAO,CAAC,CAAC,KAAK,OAAO,MAAM,YAAY,aAAa,KAAM,EAAuB,YAAY;AAC/F;AAcA,eAAsB,SACpB,KACA,UACA,YACA,oBACA,UACA,cAAsB,GACtB,YACoB;AACpB,QAAM,SAAoB,CAAC;AAG3B,MAAI,gBAAgB,QAAQ,GAAG;AAC7B,WAAO,cAAc,KAAK,UAAU,YAAY,oBAAoB,UAAU,aAAa,UAAU;AAAA,EACvG;AAGA,MAAI,eAAe,GAAG;AACpB,aAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,UAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,YAAM,OAAO,MAAM,IAAI,QAAQ,CAAC;AAChC,UAAI;AACF,cAAM,YAAY,MAAM,gBAAgB,IAAI;AAC5C,cAAM,SAAS,MAAM,SAAS,WAAW,GAAG,WAAW;AACvD,mBAAW,KAAK,kBAAkB,QAAQ,CAAC,EAAG,QAAO,KAAK,CAAC;AAAA,MAC7D,SAAS,KAAK;AAEZ,kBAAU,KAAK;AAAA,UACb,MAAM;AAAA,UACN,SAAS,sBAAO,CAAC,sBAAY,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,UAC7E,MAAM;AAAA,QACR,CAAC;AAAA,MACH;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAIA,QAAM,cAAwB,CAAC;AAC/B,WAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,QAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,gBAAY,KAAK,CAAC;AAAA,EACpB;AAGA,QAAM,QAAQ,YAAY,IAAI,aAAW,YAAwE;AAC/G,QAAI;AACF,YAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AACtC,YAAM,YAAY,MAAM,gBAAgB,IAAI;AAC5C,YAAM,SAAS,MAAM,SAAS,WAAW,SAAS,WAAW;AAC7D,aAAO,EAAE,SAAS,YAAY,kBAAkB,QAAQ,OAAO,EAAE;AAAA,IACnE,SAAS,KAAK;AAEZ,gBAAU,KAAK;AAAA,QACb,MAAM;AAAA,QACN,SAAS,sBAAO,OAAO,sBAAY,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,QACnF,MAAM;AAAA,MACR,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF,CAAC;AAGD,QAAM,cAAc,MAAM,mBAAmB,OAAO,WAAW;AAG/D,aAAW,QAAQ,aAAa;AAC9B,QAAI,CAAC,KAAM;AACX,eAAW,KAAK,KAAK,WAAY,QAAO,KAAK,CAAC;AAAA,EAChD;AAEA,SAAO;AACT;AAQA,eAAe,cACb,KACA,UACA,YACA,oBACA,UACA,cAAsB,GACtB,YACoB;AAEpB,QAAM,cAAwB,CAAC;AAC/B,WAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,QAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,gBAAY,KAAK,CAAC;AAAA,EACpB;AAGA,QAAM,aAA0D,CAAC;AACjE,aAAW,WAAW,aAAa;AACjC,UAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AACtC,UAAM,QAAQ,MAAM,gBAAgB,IAAI;AACxC,eAAW,KAAK,EAAE,OAAO,QAAQ,CAAC;AAAA,EACpC;AAGA,QAAM,UAAoC,CAAC;AAC3C,WAAS,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK,SAAS,WAAW;AAC9D,YAAQ,KAAK,WAAW,MAAM,GAAG,IAAI,SAAS,SAAS,CAAC;AAAA,EAC1D;AAGA,MAAI,YAAY;AAGhB,QAAM,aAAa,QAAQ,IAAI,CAAC,OAAO,aAAa,YAAkC;AACpF,UAAM,aAA0D,CAAC;AACjE,QAAI;AACF,YAAM,UAAU,MAAM,SAAS,aAAa,KAAK;AACjD,iBAAW,EAAE,QAAQ,KAAK,OAAO;AAC/B,cAAM,SAAS,QAAQ,IAAI,OAAO;AAClC,mBAAW,KAAK;AAAA,UACd;AAAA,UACA,QAAQ,SAAS,kBAAkB,QAAQ,OAAO,IAAI,CAAC;AAAA,QACzD,CAAC;AAAA,MACH;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,QAAQ,GAAG,MAAM,CAAC,EAAE,OAAO,IAAI,MAAM,MAAM,SAAS,CAAC,EAAE,OAAO;AACpE,gBAAU,KAAK;AAAA,QACb,SAAS,qDAAkB,KAAK,MAAM,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,QACtF,MAAM;AAAA,MACR,CAAC;AAED,iBAAW,EAAE,QAAQ,KAAK,OAAO;AAC/B,mBAAW,KAAK,EAAE,SAAS,QAAQ,CAAC,EAAE,CAAC;AAAA,MACzC;AAAA,IACF;AAEA,iBAAa,MAAM;AACnB,iBAAa,WAAW,YAAY,MAAM;AAC1C,WAAO,EAAE,UAAU,WAAW;AAAA,EAChC,CAAC;AAGD,QAAM,uBAAuB,KAAK,IAAI,GAAG,WAAW;AACpD,QAAM,eAAe,MAAM,mBAAmB,YAAY,oBAAoB;AAG9E,QAAM,SAAoB,CAAC;AAC3B,aAAW,UAAU,cAAc;AACjC,eAAW,EAAE,QAAQ,SAAS,KAAK,OAAO,YAAY;AACpD,iBAAW,KAAK,SAAU,QAAO,KAAK,CAAC;AAAA,IACzC;AAAA,EACF;AAEA,SAAO;AACT;AAWA,eAAe,gBAAgB,MAAyC;AACtE,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,iBAAiB;AAEvD,QAAM,QAAQ;AACd,QAAM,WAAW,KAAK,YAAY,EAAE,MAAM,CAAC;AAC3C,QAAM,SAAS,aAAa,KAAK,MAAM,SAAS,KAAK,GAAG,KAAK,MAAM,SAAS,MAAM,CAAC;AACnF,QAAM,MAAM,OAAO,WAAW,IAAI;AAElC,QAAM,KAAK,OAAO,EAAE,eAAe,KAAgB,SAAS,CAAC,EAAE;AAC/D,SAAO,IAAI,WAAW,OAAO,SAAS,WAAW,CAAC;AACpD;","names":[]}
|
|
@@ -201,7 +201,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
201
201
|
return createTesseractProvider();
|
|
202
202
|
}
|
|
203
203
|
if (mode === "gemini" || mode === "claude" || mode === "codex") {
|
|
204
|
-
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-
|
|
204
|
+
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-PCT4I4LK.js");
|
|
205
205
|
const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[mode];
|
|
206
206
|
if (effectiveBatch > 1) {
|
|
207
207
|
return createBatchCliProvider(mode, effectiveBatch);
|
|
@@ -232,7 +232,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
232
232
|
return createTesseractProvider();
|
|
233
233
|
}
|
|
234
234
|
if (detected === "gemini" || detected === "codex" || detected === "claude") {
|
|
235
|
-
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-
|
|
235
|
+
const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-PCT4I4LK.js");
|
|
236
236
|
const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[detected];
|
|
237
237
|
if (effectiveBatch > 1) {
|
|
238
238
|
return createBatchCliProvider(detected, effectiveBatch);
|
|
@@ -244,4 +244,4 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
244
244
|
export {
|
|
245
245
|
resolveOcrProvider
|
|
246
246
|
};
|
|
247
|
-
//# sourceMappingURL=resolve-
|
|
247
|
+
//# sourceMappingURL=resolve-4FSAQF2S.js.map
|
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
sanitizeError,
|
|
9
9
|
sanitizeHref,
|
|
10
10
|
toArrayBuffer
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-W5KUC23B.js";
|
|
12
12
|
import "./chunk-ZWE3DS7E.js";
|
|
13
13
|
export {
|
|
14
14
|
KordocError,
|
|
@@ -20,4 +20,4 @@ export {
|
|
|
20
20
|
sanitizeHref,
|
|
21
21
|
toArrayBuffer
|
|
22
22
|
};
|
|
23
|
-
//# sourceMappingURL=utils-
|
|
23
|
+
//# sourceMappingURL=utils-HSF5HI5T.js.map
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
import {
|
|
3
3
|
detectFormat,
|
|
4
4
|
parse
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-ZOEUKD77.js";
|
|
6
6
|
import {
|
|
7
7
|
toArrayBuffer
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-W5KUC23B.js";
|
|
9
9
|
import "./chunk-MOL7MDBG.js";
|
|
10
10
|
import "./chunk-ZWE3DS7E.js";
|
|
11
11
|
|
|
@@ -126,4 +126,4 @@ async function sendWebhook(url, payload) {
|
|
|
126
126
|
export {
|
|
127
127
|
watchDirectory
|
|
128
128
|
};
|
|
129
|
-
//# sourceMappingURL=watch-
|
|
129
|
+
//# sourceMappingURL=watch-R2JHXDGF.js.map
|
package/package.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/ocr/batch-provider.ts"],"sourcesContent":["/**\n * CLI 배치 OCR 프로바이더\n *\n * 여러 페이지 이미지를 단일 CLI 호출로 처리하여 API 호출 수를 대폭 감소.\n * gemini/claude: @file 멀티 참조, codex: --image 멀티 플래그\n *\n * 299페이지 기준:\n * - 기존: CLI 299회 호출 (~30분)\n * - 배치: CLI 3~6회 호출 (~3분)\n */\n\nimport { spawnSync } from \"child_process\"\nimport { writeFileSync, readFileSync, unlinkSync, mkdirSync } from \"fs\"\nimport { join } from \"path\"\nimport { tmpdir } from \"os\"\nimport type { StructuredOcrResult, BatchOcrProvider } from \"../types.js\"\n\n/** 배치 OCR 프롬프트 */\nconst BATCH_OCR_PROMPT =\n \"다음 문서 페이지 이미지들을 OCR하여 순수 Markdown으로 변환하세요.\\n\\n\" +\n \"규칙:\\n\" +\n \"- 각 페이지 결과 사이에 반드시 이 구분자를 삽입: <!-- PAGE_BREAK -->\\n\" +\n \"- 테이블은 Markdown 테이블 문법 사용 (| 구분, |---|---| 헤더 구분선 포함)\\n\" +\n \"- 병합된 셀은 해당 위치에 내용 기재\\n\" +\n \"- 헤딩은 글자 크기에 따라 ## ~ ###### 사용\\n\" +\n \"- 리스트는 - 또는 1. 사용\\n\" +\n \"- 이미지, 도형 등 비텍스트 요소는 무시\\n\" +\n \"- 원문의 읽기 순서와 구조를 유지\\n\" +\n \"- ```로 감싸지 말고 순수 Markdown만 출력\"\n\n/** 모드별 기본 배치 크기 (CLI 내부 타임아웃 + 실측 기반)\n *\n * gemini CLI: 10장 이상에서 AbortError 발생 (내부 타임아웃).\n * 5장 배치가 안정적으로 동작 확인 (35초/배치).\n * 299페이지 = 60배치 = 기존 299회 대비 80% 감소.\n */\nexport const DEFAULT_BATCH_SIZES: Record<string, number> = {\n gemini: 5,\n claude: 5,\n codex: 10,\n}\n\n/** 임시 디렉토리 — gemini CLI는 cwd 하위 + gitignore 밖만 @참조 가능 */\nlet _batchTempDir: string | null = null\nfunction getBatchTempDir(): string {\n if (!_batchTempDir) {\n _batchTempDir = join(process.cwd(), \"_kordoc_ocr_tmp\")\n mkdirSync(_batchTempDir, { recursive: true })\n }\n return _batchTempDir\n}\n\n/**\n * 배치 CLI 프로바이더 생성\n */\nexport function createBatchCliProvider(\n mode: \"gemini\" | \"claude\" | \"codex\",\n batchSize: number\n): BatchOcrProvider {\n return {\n __batch: true as const,\n batchSize,\n async processBatch(pages) {\n const results = new Map<number, StructuredOcrResult>()\n const tempDir = getBatchTempDir()\n const tempFiles: string[] = []\n\n try {\n // 1. Write all page images to temp files\n for (const { image, pageNum } of pages) {\n const path = join(tempDir, `batch-p${pageNum}.png`)\n writeFileSync(path, image)\n tempFiles.push(path)\n }\n\n // 2. Call CLI with all file references\n let output: string\n if (mode === \"codex\") {\n output = callBatchCodexCli(tempFiles)\n } else {\n output = callBatchCli(mode, tempFiles)\n }\n\n // 3. Parse response by PAGE_BREAK separator\n const cleaned = stripCodeFence(output.trim())\n const parts = cleaned.split(/<!--\\s*PAGE_BREAK\\s*-->/)\n .map(p => p.trim())\n .filter(p => p.length > 0)\n\n // 4. Map results to page numbers (best-effort if count mismatch)\n for (let i = 0; i < pages.length; i++) {\n const pageNum = pages[i].pageNum\n if (i < parts.length) {\n results.set(pageNum, { markdown: parts[i] })\n }\n // If fewer parts than pages, remaining pages get no result\n }\n } finally {\n // 5. Clean up temp files\n for (const f of tempFiles) {\n try { unlinkSync(f) } catch { /* ignore */ }\n }\n }\n\n return results\n },\n }\n}\n\n/** gemini/claude 배치 호출 */\nfunction callBatchCli(mode: \"gemini\" | \"claude\", imagePaths: string[]): string {\n const fileRefs = imagePaths.map(p => `@${p}`).join(\"\\n\")\n const prompt = `${BATCH_OCR_PROMPT}\\n\\n${fileRefs}`\n\n let args: string[]\n if (mode === \"gemini\") {\n args = [\"--prompt\", prompt, \"--yolo\"]\n const model = process.env.KORDOC_GEMINI_MODEL\n if (model) args.push(\"--model\", model)\n } else {\n // claude\n args = [\"--print\", prompt]\n const model = process.env.KORDOC_CLAUDE_MODEL\n if (model) args.push(\"--model\", model)\n }\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = spawnSync(mode, args, {\n encoding: \"utf-8\",\n timeout: timeoutMs,\n maxBuffer: 50 * 1024 * 1024, // 50MB (large batch output)\n ...(mode === \"claude\" ? { cwd: tmpdir() } : {}),\n })\n\n if (result.error) {\n throw new Error(`${mode} 배치 OCR 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`${mode} 배치 OCR 실패: ${errMsg}`)\n }\n\n return result.stdout || \"\"\n}\n\n/** codex 배치 호출 — --image를 여러 번 지정 */\nfunction callBatchCodexCli(imagePaths: string[]): string {\n const outPath = join(tmpdir(), `kordoc-codex-batch-${Date.now()}.txt`)\n try {\n const args = [\"exec\", BATCH_OCR_PROMPT]\n for (const p of imagePaths) {\n args.push(\"--image\", p)\n }\n args.push(\"--output-last-message\", outPath)\n const model = process.env.KORDOC_CODEX_MODEL\n if (model) args.push(\"--model\", model)\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = spawnSync(\"codex\", args, {\n encoding: \"utf-8\",\n timeout: timeoutMs,\n maxBuffer: 50 * 1024 * 1024,\n input: \"\",\n })\n\n if (result.error) {\n throw new Error(`codex 배치 OCR 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`codex 배치 OCR 실패: ${errMsg}`)\n }\n\n try {\n return readFileSync(outPath, \"utf-8\")\n } catch {\n return result.stdout || \"\"\n }\n } finally {\n try { unlinkSync(outPath) } catch { /* ignore */ }\n }\n}\n\n/** LLM 출력에서 코드 펜스 제거 (cli-provider.ts와 동일 로직) */\nfunction stripCodeFence(text: string): string {\n const match = text.match(/^```(?:markdown|md)?\\s*\\n([\\s\\S]*?)\\n```\\s*$/m)\n return match ? match[1].trim() : text\n}\n"],"mappings":";;;;AAWA,SAAS,iBAAiB;AAC1B,SAAS,eAAe,cAAc,YAAY,iBAAiB;AACnE,SAAS,YAAY;AACrB,SAAS,cAAc;AAIvB,IAAM,mBACJ;AAiBK,IAAM,sBAA8C;AAAA,EACzD,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,OAAO;AACT;AAGA,IAAI,gBAA+B;AACnC,SAAS,kBAA0B;AACjC,MAAI,CAAC,eAAe;AAClB,oBAAgB,KAAK,QAAQ,IAAI,GAAG,iBAAiB;AACrD,cAAU,eAAe,EAAE,WAAW,KAAK,CAAC;AAAA,EAC9C;AACA,SAAO;AACT;AAKO,SAAS,uBACd,MACA,WACkB;AAClB,SAAO;AAAA,IACL,SAAS;AAAA,IACT;AAAA,IACA,MAAM,aAAa,OAAO;AACxB,YAAM,UAAU,oBAAI,IAAiC;AACrD,YAAM,UAAU,gBAAgB;AAChC,YAAM,YAAsB,CAAC;AAE7B,UAAI;AAEF,mBAAW,EAAE,OAAO,QAAQ,KAAK,OAAO;AACtC,gBAAM,OAAO,KAAK,SAAS,UAAU,OAAO,MAAM;AAClD,wBAAc,MAAM,KAAK;AACzB,oBAAU,KAAK,IAAI;AAAA,QACrB;AAGA,YAAI;AACJ,YAAI,SAAS,SAAS;AACpB,mBAAS,kBAAkB,SAAS;AAAA,QACtC,OAAO;AACL,mBAAS,aAAa,MAAM,SAAS;AAAA,QACvC;AAGA,cAAM,UAAU,eAAe,OAAO,KAAK,CAAC;AAC5C,cAAM,QAAQ,QAAQ,MAAM,yBAAyB,EAClD,IAAI,OAAK,EAAE,KAAK,CAAC,EACjB,OAAO,OAAK,EAAE,SAAS,CAAC;AAG3B,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,gBAAM,UAAU,MAAM,CAAC,EAAE;AACzB,cAAI,IAAI,MAAM,QAAQ;AACpB,oBAAQ,IAAI,SAAS,EAAE,UAAU,MAAM,CAAC,EAAE,CAAC;AAAA,UAC7C;AAAA,QAEF;AAAA,MACF,UAAE;AAEA,mBAAW,KAAK,WAAW;AACzB,cAAI;AAAE,uBAAW,CAAC;AAAA,UAAE,QAAQ;AAAA,UAAe;AAAA,QAC7C;AAAA,MACF;AAEA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAGA,SAAS,aAAa,MAA2B,YAA8B;AAC7E,QAAM,WAAW,WAAW,IAAI,OAAK,IAAI,CAAC,EAAE,EAAE,KAAK,IAAI;AACvD,QAAM,SAAS,GAAG,gBAAgB;AAAA;AAAA,EAAO,QAAQ;AAEjD,MAAI;AACJ,MAAI,SAAS,UAAU;AACrB,WAAO,CAAC,YAAY,QAAQ,QAAQ;AACpC,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAAA,EACvC,OAAO;AAEL,WAAO,CAAC,WAAW,MAAM;AACzB,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAAA,EACvC;AAEA,QAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,QAAM,SAAS,UAAU,MAAM,MAAM;AAAA,IACnC,UAAU;AAAA,IACV,SAAS;AAAA,IACT,WAAW,KAAK,OAAO;AAAA;AAAA,IACvB,GAAI,SAAS,WAAW,EAAE,KAAK,OAAO,EAAE,IAAI,CAAC;AAAA,EAC/C,CAAC;AAED,MAAI,OAAO,OAAO;AAChB,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,OAAO,MAAM,OAAO,EAAE;AAAA,EAC9D;AACA,MAAI,OAAO,WAAW,GAAG;AACvB,UAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,MAAM,EAAE;AAAA,EAChD;AAEA,SAAO,OAAO,UAAU;AAC1B;AAGA,SAAS,kBAAkB,YAA8B;AACvD,QAAM,UAAU,KAAK,OAAO,GAAG,sBAAsB,KAAK,IAAI,CAAC,MAAM;AACrE,MAAI;AACF,UAAM,OAAO,CAAC,QAAQ,gBAAgB;AACtC,eAAW,KAAK,YAAY;AAC1B,WAAK,KAAK,WAAW,CAAC;AAAA,IACxB;AACA,SAAK,KAAK,yBAAyB,OAAO;AAC1C,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAErC,UAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,UAAM,SAAS,UAAU,SAAS,MAAM;AAAA,MACtC,UAAU;AAAA,MACV,SAAS;AAAA,MACT,WAAW,KAAK,OAAO;AAAA,MACvB,OAAO;AAAA,IACT,CAAC;AAED,QAAI,OAAO,OAAO;AAChB,YAAM,IAAI,MAAM,wCAAoB,OAAO,MAAM,OAAO,EAAE;AAAA,IAC5D;AACA,QAAI,OAAO,WAAW,GAAG;AACvB,YAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,YAAM,IAAI,MAAM,wCAAoB,MAAM,EAAE;AAAA,IAC9C;AAEA,QAAI;AACF,aAAO,aAAa,SAAS,OAAO;AAAA,IACtC,QAAQ;AACN,aAAO,OAAO,UAAU;AAAA,IAC1B;AAAA,EACF,UAAE;AACA,QAAI;AAAE,iBAAW,OAAO;AAAA,IAAE,QAAQ;AAAA,IAAe;AAAA,EACnD;AACF;AAGA,SAAS,eAAe,MAAsB;AAC5C,QAAM,QAAQ,KAAK,MAAM,+CAA+C;AACxE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;","names":[]}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/ocr/markdown-to-blocks.ts","../src/ocr/provider.ts"],"sourcesContent":["/**\n * Markdown → IRBlock[] 역파싱\n *\n * Vision LLM(gemini/claude/codex 등)이 반환한 Markdown 문자열을\n * kordoc의 IRBlock[] 중간 표현으로 변환.\n * 기존 blocksToMarkdown()의 역방향 처리.\n */\n\nimport type { IRBlock, IRTable, IRCell } from \"../types.js\"\n\n/**\n * Markdown 문자열을 IRBlock[] 배열로 변환.\n *\n * 지원 요소:\n * - 헤딩: # ~ ######\n * - 테이블: | col1 | col2 | (파이프 구분, |---|---| 구분선 포함)\n * - 순서/비순서 리스트: - / 1.\n * - 구분선: ---, ***, ___\n * - 일반 텍스트 (paragraph)\n */\nexport function markdownToBlocks(markdown: string, pageNumber: number): IRBlock[] {\n const blocks: IRBlock[] = []\n const lines = markdown.split(\"\\n\")\n let i = 0\n\n while (i < lines.length) {\n const line = lines[i]\n\n // 빈 줄 스킵\n if (line.trim() === \"\") {\n i++\n continue\n }\n\n // 1. 헤딩: # ~ ######\n const headingMatch = line.match(/^(#{1,6})\\s+(.+)$/)\n if (headingMatch) {\n blocks.push({\n type: \"heading\",\n level: headingMatch[1].length,\n text: headingMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 2. 구분선: ---, ***, ___\n if (/^[-*_]{3,}\\s*$/.test(line.trim())) {\n blocks.push({ type: \"separator\", pageNumber })\n i++\n continue\n }\n\n // 3. 테이블: | 로 시작하는 연속 행 수집\n if (line.trim().startsWith(\"|\")) {\n const tableLines: string[] = []\n while (i < lines.length && lines[i].trim().startsWith(\"|\")) {\n tableLines.push(lines[i])\n i++\n }\n const table = parseMarkdownTable(tableLines)\n if (table) {\n blocks.push({ type: \"table\", table, pageNumber })\n }\n continue\n }\n\n // 4. 비순서 리스트: -, *, +\n const ulMatch = line.match(/^(\\s*)[-*+]\\s+(.+)$/)\n if (ulMatch) {\n blocks.push({\n type: \"list\",\n listType: \"unordered\",\n text: ulMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 5. 순서 리스트: 1.\n const olMatch = line.match(/^(\\s*)\\d+\\.\\s+(.+)$/)\n if (olMatch) {\n blocks.push({\n type: \"list\",\n listType: \"ordered\",\n text: olMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 6. 일반 텍스트 — 구조적 행이 나올 때까지 병합\n const paraLines: string[] = []\n while (i < lines.length && lines[i].trim() !== \"\" && !isStructuralLine(lines[i])) {\n paraLines.push(lines[i].trim())\n i++\n }\n if (paraLines.length > 0) {\n blocks.push({\n type: \"paragraph\",\n text: paraLines.join(\"\\n\"),\n pageNumber,\n })\n }\n }\n\n return blocks\n}\n\n/**\n * 구조적 행 판별 — paragraph 병합 중단 트리거.\n */\nfunction isStructuralLine(line: string): boolean {\n if (/^#{1,6}\\s+/.test(line)) return true\n if (line.trim().startsWith(\"|\")) return true\n if (/^[-*_]{3,}\\s*$/.test(line.trim())) return true\n if (/^\\s*[-*+]\\s+/.test(line)) return true\n if (/^\\s*\\d+\\.\\s+/.test(line)) return true\n return false\n}\n\n/**\n * Markdown 테이블 행 배열을 IRTable로 변환.\n *\n * 구분선 행(|---|---|)은 제거 후 데이터 행만 파싱.\n * hasHeader: 구분선이 있었으면 true.\n */\nfunction parseMarkdownTable(lines: string[]): IRTable | null {\n const hasSeparator = lines.some(line => /^\\|[\\s:|-]+\\|$/.test(line.trim()))\n\n const rows: IRCell[][] = []\n let maxCols = 0\n\n for (const line of lines) {\n // 구분선 행 스킵: |---|---| 패턴\n if (/^\\|\\s*:?-+:?\\s*(\\|\\s*:?-+:?\\s*)+\\|?\\s*$/.test(line.trim())) continue\n\n const parts = line.split(\"|\")\n // 앞뒤 빈 요소 제거 (| 로 시작/종료하는 행)\n const cells: IRCell[] = parts\n .slice(1, parts[parts.length - 1].trim() === \"\" ? -1 : undefined)\n .map(cell => ({\n text: cell.trim(),\n colSpan: 1,\n rowSpan: 1,\n }))\n\n if (cells.length > 0) {\n rows.push(cells)\n maxCols = Math.max(maxCols, cells.length)\n }\n }\n\n if (rows.length === 0) return null\n\n // 열 수 통일 (부족한 셀은 빈 셀로 채움)\n for (const row of rows) {\n while (row.length < maxCols) {\n row.push({ text: \"\", colSpan: 1, rowSpan: 1 })\n }\n }\n\n return {\n rows: rows.length,\n cols: maxCols,\n cells: rows,\n hasHeader: hasSeparator && rows.length > 1,\n }\n}\n","/**\n * OCR 프로바이더 브릿지 — PDF 페이지를 이미지로 렌더링하여 OCR 호출\n *\n * kordoc은 OCR 라이브러리를 번들하지 않음.\n * 사용자가 OcrProvider 함수를 제공하면 이미지 기반 PDF도 텍스트 추출 가능.\n *\n * @example\n * ```ts\n * import { parse } from \"kordoc\"\n *\n * const result = await parse(buffer, {\n * ocr: async (pageImage, pageNumber, mimeType) => {\n * // Tesseract, Claude Vision, Google Vision 등 사용\n * return await myOcrService.recognize(pageImage)\n * }\n * })\n * ```\n */\n\nimport type { OcrProvider, IRBlock, ParseWarning, StructuredOcrResult, BatchOcrProvider } from \"../types.js\"\nimport { markdownToBlocks } from \"./markdown-to-blocks.js\"\n\n/**\n * 동시 실행 수를 제한한 병렬 태스크 실행 헬퍼.\n *\n * limit개의 워커를 만들어 tasks 배열을 순서대로 처리.\n * 각 워커는 완료되는 즉시 다음 태스크를 가져가므로 순서가 보존됨.\n *\n * @param tasks - 실행할 비동기 함수 배열\n * @param limit - 최대 동시 실행 수\n * @returns 입력 순서와 동일한 결과 배열\n */\nasync function runWithConcurrency<T>(\n tasks: (() => Promise<T>)[],\n limit: number\n): Promise<T[]> {\n const results: T[] = new Array(tasks.length)\n let nextIndex = 0\n\n // 각 워커는 처리할 태스크가 없을 때까지 반복\n async function worker() {\n while (nextIndex < tasks.length) {\n const idx = nextIndex++\n results[idx] = await tasks[idx]()\n }\n }\n\n // limit개 워커를 동시 실행 (tasks가 limit보다 적으면 tasks 수만큼)\n await Promise.all(Array.from({ length: Math.min(limit, tasks.length) }, () => worker()))\n return results\n}\n\n/**\n * OCR 결과(string | StructuredOcrResult)를 IRBlock[]으로 변환.\n */\nfunction ocrResultToBlocks(result: string | StructuredOcrResult, pageNum: number): IRBlock[] {\n const pageBlocks: IRBlock[] = []\n if (typeof result === \"string\") {\n // 순수 텍스트 → paragraph 블록\n if (result.trim()) {\n pageBlocks.push({ type: \"paragraph\", text: result.trim(), pageNumber: pageNum })\n }\n } else if (result && typeof result === \"object\" && \"markdown\" in result) {\n // 구조화된 결과 → Markdown → IRBlock[]\n const structured = result as StructuredOcrResult\n if (structured.markdown.trim()) {\n const converted = markdownToBlocks(structured.markdown, pageNum)\n for (const b of converted) pageBlocks.push(b)\n }\n }\n return pageBlocks\n}\n\n/** BatchOcrProvider 타입 가드 */\nfunction isBatchProvider(p: unknown): p is BatchOcrProvider {\n return !!p && typeof p === \"object\" && \"__batch\" in p && (p as BatchOcrProvider).__batch === true\n}\n\n/**\n * 이미지 기반 PDF 페이지에 OCR을 적용하여 IRBlock[] 반환.\n *\n * pdfjs page 객체에서 viewport + render를 통해 PNG 생성 후\n * 사용자 제공 OcrProvider 호출.\n *\n * - string 반환: 단순 텍스트 → paragraph 블록\n * - StructuredOcrResult 반환: Markdown → markdownToBlocks()로 구조화\n * - concurrency > 1: 병렬 처리 (워커 풀 프로바이더 권장)\n *\n * canvas 미설치 시 pdfjs render 불가하므로 에러 반환.\n */\nexport async function ocrPages(\n doc: { numPages: number; getPage(n: number): Promise<PdfPageProxy> },\n provider: OcrProvider | BatchOcrProvider,\n pageFilter: Set<number> | null,\n effectivePageCount: number,\n warnings?: ParseWarning[],\n concurrency: number = 1, // 기본값 1 = 순차 처리 (하위 호환)\n onProgress?: (current: number, total: number) => void\n): Promise<IRBlock[]> {\n const blocks: IRBlock[] = []\n\n // ── 배치 처리 (BatchOcrProvider) ────────────────────\n if (isBatchProvider(provider)) {\n return ocrPagesBatch(doc, provider, pageFilter, effectivePageCount, warnings, onProgress)\n }\n\n // ── 순차 처리 (concurrency === 1) ────────────────────\n if (concurrency <= 1) {\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n const page = await doc.getPage(i)\n try {\n const imageData = await renderPageToPng(page)\n const result = await provider(imageData, i, \"image/png\")\n for (const b of ocrResultToBlocks(result, i)) blocks.push(b)\n } catch (err) {\n // 개별 페이지 실패 시 경고 발행 후 계속 진행\n warnings?.push({\n page: i,\n message: `페이지 ${i} OCR 실패: ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n }\n }\n return blocks\n }\n\n // ── 병렬 처리 (concurrency > 1) ──────────────────────\n // 처리 대상 페이지 번호 수집\n const pageNumbers: number[] = []\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n pageNumbers.push(i)\n }\n\n // 각 페이지에 대한 태스크 생성 (에러는 개별 캐치)\n const tasks = pageNumbers.map(pageNum => async (): Promise<{ pageNum: number; pageBlocks: IRBlock[] } | null> => {\n try {\n const page = await doc.getPage(pageNum)\n const imageData = await renderPageToPng(page)\n const result = await provider(imageData, pageNum, \"image/png\")\n return { pageNum, pageBlocks: ocrResultToBlocks(result, pageNum) }\n } catch (err) {\n // 개별 페이지 실패 시 경고 발행 후 null 반환\n warnings?.push({\n page: pageNum,\n message: `페이지 ${pageNum} OCR 실패: ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n return null\n }\n })\n\n // 병렬 실행 — concurrency 수만큼 동시 처리\n const taskResults = await runWithConcurrency(tasks, concurrency)\n\n // 결과를 페이지 번호 순서대로 합산 (pageNumbers 순서 = 오름차순 보장)\n for (const item of taskResults) {\n if (!item) continue\n for (const b of item.pageBlocks) blocks.push(b)\n }\n\n return blocks\n}\n\n/**\n * 배치 OCR 처리 — BatchOcrProvider를 사용하여 N페이지씩 묶어 처리.\n */\nasync function ocrPagesBatch(\n doc: { numPages: number; getPage(n: number): Promise<PdfPageProxy> },\n provider: BatchOcrProvider,\n pageFilter: Set<number> | null,\n effectivePageCount: number,\n warnings?: ParseWarning[],\n onProgress?: (current: number, total: number) => void\n): Promise<IRBlock[]> {\n const blocks: IRBlock[] = []\n\n // 1. 대상 페이지 번호 수집\n const pageNumbers: number[] = []\n for (let i = 1; i <= effectivePageCount; i++) {\n if (pageFilter && !pageFilter.has(i)) continue\n pageNumbers.push(i)\n }\n\n // 2. 모든 페이지를 PNG로 렌더링\n const pageImages: Array<{image: Uint8Array, pageNum: number}> = []\n for (const pageNum of pageNumbers) {\n const page = await doc.getPage(pageNum)\n const image = await renderPageToPng(page)\n pageImages.push({ image, pageNum })\n }\n\n // 3. batchSize개씩 그룹핑\n const batches: Array<typeof pageImages> = []\n for (let i = 0; i < pageImages.length; i += provider.batchSize) {\n batches.push(pageImages.slice(i, i + provider.batchSize))\n }\n\n // 4. 각 배치 처리\n let processed = 0\n for (const batch of batches) {\n try {\n const results = await provider.processBatch(batch)\n for (const { pageNum } of batch) {\n const result = results.get(pageNum)\n if (result) {\n for (const b of ocrResultToBlocks(result, pageNum)) blocks.push(b)\n }\n processed++\n onProgress?.(processed, pageNumbers.length)\n }\n } catch (err) {\n // 배치 실패 — 해당 배치 전체 페이지에 경고 추가\n const range = `${batch[0].pageNum}-${batch[batch.length - 1].pageNum}`\n warnings?.push({\n message: `배치 OCR 실패 (페이지 ${range}): ${err instanceof Error ? err.message : \"알 수 없는 오류\"}`,\n code: \"OCR_PAGE_FAILED\",\n })\n processed += batch.length\n onProgress?.(processed, pageNumbers.length)\n }\n }\n\n return blocks\n}\n\ninterface PdfPageProxy {\n getViewport(params: { scale: number }): { width: number; height: number }\n render(params: { canvasContext: unknown; viewport: unknown }): { promise: Promise<void> }\n}\n\n/**\n * PDF 페이지를 PNG로 렌더링.\n * @napi-rs/canvas 사용 (kordoc 번들 의존성, 별도 설치 불필요)\n */\nasync function renderPageToPng(page: PdfPageProxy): Promise<Uint8Array> {\n const { createCanvas } = await import(\"@napi-rs/canvas\")\n\n const scale = 2.0 // 300 DPI 근사\n const viewport = page.getViewport({ scale })\n const canvas = createCanvas(Math.floor(viewport.width), Math.floor(viewport.height))\n const ctx = canvas.getContext(\"2d\")\n\n await page.render({ canvasContext: ctx as unknown, viewport }).promise\n return new Uint8Array(canvas.toBuffer(\"image/png\"))\n}\n"],"mappings":";;;;AAoBO,SAAS,iBAAiB,UAAkB,YAA+B;AAChF,QAAM,SAAoB,CAAC;AAC3B,QAAM,QAAQ,SAAS,MAAM,IAAI;AACjC,MAAI,IAAI;AAER,SAAO,IAAI,MAAM,QAAQ;AACvB,UAAM,OAAO,MAAM,CAAC;AAGpB,QAAI,KAAK,KAAK,MAAM,IAAI;AACtB;AACA;AAAA,IACF;AAGA,UAAM,eAAe,KAAK,MAAM,mBAAmB;AACnD,QAAI,cAAc;AAChB,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,OAAO,aAAa,CAAC,EAAE;AAAA,QACvB,MAAM,aAAa,CAAC,EAAE,KAAK;AAAA,QAC3B;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,QAAI,iBAAiB,KAAK,KAAK,KAAK,CAAC,GAAG;AACtC,aAAO,KAAK,EAAE,MAAM,aAAa,WAAW,CAAC;AAC7C;AACA;AAAA,IACF;AAGA,QAAI,KAAK,KAAK,EAAE,WAAW,GAAG,GAAG;AAC/B,YAAM,aAAuB,CAAC;AAC9B,aAAO,IAAI,MAAM,UAAU,MAAM,CAAC,EAAE,KAAK,EAAE,WAAW,GAAG,GAAG;AAC1D,mBAAW,KAAK,MAAM,CAAC,CAAC;AACxB;AAAA,MACF;AACA,YAAM,QAAQ,mBAAmB,UAAU;AAC3C,UAAI,OAAO;AACT,eAAO,KAAK,EAAE,MAAM,SAAS,OAAO,WAAW,CAAC;AAAA,MAClD;AACA;AAAA,IACF;AAGA,UAAM,UAAU,KAAK,MAAM,qBAAqB;AAChD,QAAI,SAAS;AACX,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,UAAU;AAAA,QACV,MAAM,QAAQ,CAAC,EAAE,KAAK;AAAA,QACtB;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,UAAM,UAAU,KAAK,MAAM,qBAAqB;AAChD,QAAI,SAAS;AACX,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,UAAU;AAAA,QACV,MAAM,QAAQ,CAAC,EAAE,KAAK;AAAA,QACtB;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,UAAM,YAAsB,CAAC;AAC7B,WAAO,IAAI,MAAM,UAAU,MAAM,CAAC,EAAE,KAAK,MAAM,MAAM,CAAC,iBAAiB,MAAM,CAAC,CAAC,GAAG;AAChF,gBAAU,KAAK,MAAM,CAAC,EAAE,KAAK,CAAC;AAC9B;AAAA,IACF;AACA,QAAI,UAAU,SAAS,GAAG;AACxB,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,MAAM,UAAU,KAAK,IAAI;AAAA,QACzB;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO;AACT;AAKA,SAAS,iBAAiB,MAAuB;AAC/C,MAAI,aAAa,KAAK,IAAI,EAAG,QAAO;AACpC,MAAI,KAAK,KAAK,EAAE,WAAW,GAAG,EAAG,QAAO;AACxC,MAAI,iBAAiB,KAAK,KAAK,KAAK,CAAC,EAAG,QAAO;AAC/C,MAAI,eAAe,KAAK,IAAI,EAAG,QAAO;AACtC,MAAI,eAAe,KAAK,IAAI,EAAG,QAAO;AACtC,SAAO;AACT;AAQA,SAAS,mBAAmB,OAAiC;AAC3D,QAAM,eAAe,MAAM,KAAK,UAAQ,iBAAiB,KAAK,KAAK,KAAK,CAAC,CAAC;AAE1E,QAAM,OAAmB,CAAC;AAC1B,MAAI,UAAU;AAEd,aAAW,QAAQ,OAAO;AAExB,QAAI,0CAA0C,KAAK,KAAK,KAAK,CAAC,EAAG;AAEjE,UAAM,QAAQ,KAAK,MAAM,GAAG;AAE5B,UAAM,QAAkB,MACrB,MAAM,GAAG,MAAM,MAAM,SAAS,CAAC,EAAE,KAAK,MAAM,KAAK,KAAK,MAAS,EAC/D,IAAI,WAAS;AAAA,MACZ,MAAM,KAAK,KAAK;AAAA,MAChB,SAAS;AAAA,MACT,SAAS;AAAA,IACX,EAAE;AAEJ,QAAI,MAAM,SAAS,GAAG;AACpB,WAAK,KAAK,KAAK;AACf,gBAAU,KAAK,IAAI,SAAS,MAAM,MAAM;AAAA,IAC1C;AAAA,EACF;AAEA,MAAI,KAAK,WAAW,EAAG,QAAO;AAG9B,aAAW,OAAO,MAAM;AACtB,WAAO,IAAI,SAAS,SAAS;AAC3B,UAAI,KAAK,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE,CAAC;AAAA,IAC/C;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM,KAAK;AAAA,IACX,MAAM;AAAA,IACN,OAAO;AAAA,IACP,WAAW,gBAAgB,KAAK,SAAS;AAAA,EAC3C;AACF;;;AC3IA,eAAe,mBACb,OACA,OACc;AACd,QAAM,UAAe,IAAI,MAAM,MAAM,MAAM;AAC3C,MAAI,YAAY;AAGhB,iBAAe,SAAS;AACtB,WAAO,YAAY,MAAM,QAAQ;AAC/B,YAAM,MAAM;AACZ,cAAQ,GAAG,IAAI,MAAM,MAAM,GAAG,EAAE;AAAA,IAClC;AAAA,EACF;AAGA,QAAM,QAAQ,IAAI,MAAM,KAAK,EAAE,QAAQ,KAAK,IAAI,OAAO,MAAM,MAAM,EAAE,GAAG,MAAM,OAAO,CAAC,CAAC;AACvF,SAAO;AACT;AAKA,SAAS,kBAAkB,QAAsC,SAA4B;AAC3F,QAAM,aAAwB,CAAC;AAC/B,MAAI,OAAO,WAAW,UAAU;AAE9B,QAAI,OAAO,KAAK,GAAG;AACjB,iBAAW,KAAK,EAAE,MAAM,aAAa,MAAM,OAAO,KAAK,GAAG,YAAY,QAAQ,CAAC;AAAA,IACjF;AAAA,EACF,WAAW,UAAU,OAAO,WAAW,YAAY,cAAc,QAAQ;AAEvE,UAAM,aAAa;AACnB,QAAI,WAAW,SAAS,KAAK,GAAG;AAC9B,YAAM,YAAY,iBAAiB,WAAW,UAAU,OAAO;AAC/D,iBAAW,KAAK,UAAW,YAAW,KAAK,CAAC;AAAA,IAC9C;AAAA,EACF;AACA,SAAO;AACT;AAGA,SAAS,gBAAgB,GAAmC;AAC1D,SAAO,CAAC,CAAC,KAAK,OAAO,MAAM,YAAY,aAAa,KAAM,EAAuB,YAAY;AAC/F;AAcA,eAAsB,SACpB,KACA,UACA,YACA,oBACA,UACA,cAAsB,GACtB,YACoB;AACpB,QAAM,SAAoB,CAAC;AAG3B,MAAI,gBAAgB,QAAQ,GAAG;AAC7B,WAAO,cAAc,KAAK,UAAU,YAAY,oBAAoB,UAAU,UAAU;AAAA,EAC1F;AAGA,MAAI,eAAe,GAAG;AACpB,aAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,UAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,YAAM,OAAO,MAAM,IAAI,QAAQ,CAAC;AAChC,UAAI;AACF,cAAM,YAAY,MAAM,gBAAgB,IAAI;AAC5C,cAAM,SAAS,MAAM,SAAS,WAAW,GAAG,WAAW;AACvD,mBAAW,KAAK,kBAAkB,QAAQ,CAAC,EAAG,QAAO,KAAK,CAAC;AAAA,MAC7D,SAAS,KAAK;AAEZ,kBAAU,KAAK;AAAA,UACb,MAAM;AAAA,UACN,SAAS,sBAAO,CAAC,sBAAY,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,UAC7E,MAAM;AAAA,QACR,CAAC;AAAA,MACH;AAAA,IACF;AACA,WAAO;AAAA,EACT;AAIA,QAAM,cAAwB,CAAC;AAC/B,WAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,QAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,gBAAY,KAAK,CAAC;AAAA,EACpB;AAGA,QAAM,QAAQ,YAAY,IAAI,aAAW,YAAwE;AAC/G,QAAI;AACF,YAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AACtC,YAAM,YAAY,MAAM,gBAAgB,IAAI;AAC5C,YAAM,SAAS,MAAM,SAAS,WAAW,SAAS,WAAW;AAC7D,aAAO,EAAE,SAAS,YAAY,kBAAkB,QAAQ,OAAO,EAAE;AAAA,IACnE,SAAS,KAAK;AAEZ,gBAAU,KAAK;AAAA,QACb,MAAM;AAAA,QACN,SAAS,sBAAO,OAAO,sBAAY,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,QACnF,MAAM;AAAA,MACR,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF,CAAC;AAGD,QAAM,cAAc,MAAM,mBAAmB,OAAO,WAAW;AAG/D,aAAW,QAAQ,aAAa;AAC9B,QAAI,CAAC,KAAM;AACX,eAAW,KAAK,KAAK,WAAY,QAAO,KAAK,CAAC;AAAA,EAChD;AAEA,SAAO;AACT;AAKA,eAAe,cACb,KACA,UACA,YACA,oBACA,UACA,YACoB;AACpB,QAAM,SAAoB,CAAC;AAG3B,QAAM,cAAwB,CAAC;AAC/B,WAAS,IAAI,GAAG,KAAK,oBAAoB,KAAK;AAC5C,QAAI,cAAc,CAAC,WAAW,IAAI,CAAC,EAAG;AACtC,gBAAY,KAAK,CAAC;AAAA,EACpB;AAGA,QAAM,aAA0D,CAAC;AACjE,aAAW,WAAW,aAAa;AACjC,UAAM,OAAO,MAAM,IAAI,QAAQ,OAAO;AACtC,UAAM,QAAQ,MAAM,gBAAgB,IAAI;AACxC,eAAW,KAAK,EAAE,OAAO,QAAQ,CAAC;AAAA,EACpC;AAGA,QAAM,UAAoC,CAAC;AAC3C,WAAS,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK,SAAS,WAAW;AAC9D,YAAQ,KAAK,WAAW,MAAM,GAAG,IAAI,SAAS,SAAS,CAAC;AAAA,EAC1D;AAGA,MAAI,YAAY;AAChB,aAAW,SAAS,SAAS;AAC3B,QAAI;AACF,YAAM,UAAU,MAAM,SAAS,aAAa,KAAK;AACjD,iBAAW,EAAE,QAAQ,KAAK,OAAO;AAC/B,cAAM,SAAS,QAAQ,IAAI,OAAO;AAClC,YAAI,QAAQ;AACV,qBAAW,KAAK,kBAAkB,QAAQ,OAAO,EAAG,QAAO,KAAK,CAAC;AAAA,QACnE;AACA;AACA,qBAAa,WAAW,YAAY,MAAM;AAAA,MAC5C;AAAA,IACF,SAAS,KAAK;AAEZ,YAAM,QAAQ,GAAG,MAAM,CAAC,EAAE,OAAO,IAAI,MAAM,MAAM,SAAS,CAAC,EAAE,OAAO;AACpE,gBAAU,KAAK;AAAA,QACb,SAAS,qDAAkB,KAAK,MAAM,eAAe,QAAQ,IAAI,UAAU,yCAAW;AAAA,QACtF,MAAM;AAAA,MACR,CAAC;AACD,mBAAa,MAAM;AACnB,mBAAa,WAAW,YAAY,MAAM;AAAA,IAC5C;AAAA,EACF;AAEA,SAAO;AACT;AAWA,eAAe,gBAAgB,MAAyC;AACtE,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,iBAAiB;AAEvD,QAAM,QAAQ;AACd,QAAM,WAAW,KAAK,YAAY,EAAE,MAAM,CAAC;AAC3C,QAAM,SAAS,aAAa,KAAK,MAAM,SAAS,KAAK,GAAG,KAAK,MAAM,SAAS,MAAM,CAAC;AACnF,QAAM,MAAM,OAAO,WAAW,IAAI;AAElC,QAAM,KAAK,OAAO,EAAE,eAAe,KAAgB,SAAS,CAAC,EAAE;AAC/D,SAAO,IAAI,WAAW,OAAO,SAAS,WAAW,CAAC;AACpD;","names":[]}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|