@clazic/kordoc 2.3.0 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/{batch-provider-FUCIIS4M.js → batch-provider-PNDCSGQW.js} +59 -30
  2. package/dist/batch-provider-PNDCSGQW.js.map +1 -0
  3. package/dist/{chunk-2ZGLFZCN.js → chunk-2GFJFTKS.js} +193 -49
  4. package/dist/chunk-2GFJFTKS.js.map +1 -0
  5. package/dist/chunk-4PP34NVQ.js +121 -0
  6. package/dist/chunk-4PP34NVQ.js.map +1 -0
  7. package/dist/{tesseract-provider-WCVJWBUT.js → chunk-7FMKAV4P.js} +4 -4
  8. package/dist/{tesseract-provider-WCVJWBUT.js.map → chunk-7FMKAV4P.js.map} +1 -1
  9. package/dist/chunk-JOGAFNIL.js +153 -0
  10. package/dist/chunk-JOGAFNIL.js.map +1 -0
  11. package/dist/{chunk-WWILSVMJ.js → chunk-STIKJGEA.js} +2 -2
  12. package/dist/cli.js +10 -5
  13. package/dist/cli.js.map +1 -1
  14. package/dist/index.cjs +291 -103
  15. package/dist/index.cjs.map +1 -1
  16. package/dist/index.d.cts +11 -6
  17. package/dist/index.d.ts +11 -6
  18. package/dist/index.js +292 -104
  19. package/dist/index.js.map +1 -1
  20. package/dist/mcp.js +5 -2
  21. package/dist/mcp.js.map +1 -1
  22. package/dist/{provider-OBY3XFSZ.js → provider-HE727F7Z.js} +38 -139
  23. package/dist/provider-HE727F7Z.js.map +1 -0
  24. package/dist/resolve-QA3VACUP.js +111 -0
  25. package/dist/resolve-QA3VACUP.js.map +1 -0
  26. package/dist/tesseract-provider-MNMZPSGF.js +11 -0
  27. package/dist/{utils-QAK24RJS.js → utils-FFUQJTTI.js} +2 -2
  28. package/dist/utils-FFUQJTTI.js.map +1 -0
  29. package/dist/{watch-MPHX3QIH.js → watch-2O32L6IF.js} +6 -3
  30. package/dist/{watch-MPHX3QIH.js.map → watch-2O32L6IF.js.map} +1 -1
  31. package/package.json +1 -1
  32. package/dist/batch-provider-FUCIIS4M.js.map +0 -1
  33. package/dist/chunk-2ZGLFZCN.js.map +0 -1
  34. package/dist/provider-OBY3XFSZ.js.map +0 -1
  35. package/dist/resolve-LBFYRHJI.js +0 -247
  36. package/dist/resolve-LBFYRHJI.js.map +0 -1
  37. /package/dist/{chunk-WWILSVMJ.js.map → chunk-STIKJGEA.js.map} +0 -0
  38. /package/dist/{utils-QAK24RJS.js.map → tesseract-provider-MNMZPSGF.js.map} +0 -0
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/watch.ts"],"sourcesContent":["/** 디렉토리 감시 모드 — 새 문서 자동 변환 + Webhook 알림 */\n\nimport { watch, readFileSync, writeFileSync, mkdirSync, statSync, existsSync } from \"fs\"\nimport { basename, resolve, extname } from \"path\"\nimport { parse, detectFormat } from \"./index.js\"\nimport { toArrayBuffer } from \"./utils.js\"\nimport type { WatchOptions } from \"./types.js\"\n\nconst SUPPORTED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\nconst DEBOUNCE_MS = 1000\n/** 파일 쓰기 완료 판정: 연속 2회 동일 크기 확인 간격 */\nconst STABLE_CHECK_MS = 300\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\n\n/**\n * 디렉토리를 감시하여 새 문서 파일을 자동 변환.\n *\n * @example\n * ```bash\n * kordoc watch ./incoming -d ./output --webhook https://api.example.com/docs\n * ```\n */\nexport async function watchDirectory(options: WatchOptions): Promise<void> {\n const { dir, outDir, webhook, format = \"markdown\", pages, silent } = options\n\n if (!existsSync(dir)) throw new Error(`디렉토리를 찾을 수 없습니다: ${dir}`)\n if (webhook) validateWebhookUrl(webhook)\n if (outDir) mkdirSync(outDir, { recursive: true })\n\n const log = silent ? () => {} : (msg: string) => process.stderr.write(msg + \"\\n\")\n log(`[kordoc watch] 감시 시작: ${resolve(dir)}`)\n if (outDir) log(`[kordoc watch] 출력: ${resolve(outDir)}`)\n if (webhook) log(`[kordoc watch] 웹훅: ${webhook}`)\n\n // 디바운스 맵\n const pending = new Map<string, ReturnType<typeof setTimeout>>()\n\n /** 파일 크기가 안정화될 때까지 대기 (쓰기 완료 감지) */\n const waitForStableSize = async (absPath: string): Promise<number> => {\n let prevSize = statSync(absPath).size\n await new Promise(r => setTimeout(r, STABLE_CHECK_MS))\n if (!existsSync(absPath)) return 0\n const currSize = statSync(absPath).size\n if (currSize !== prevSize) {\n // 크기가 변했으면 한 번 더 대기\n await new Promise(r => setTimeout(r, STABLE_CHECK_MS))\n if (!existsSync(absPath)) return 0\n return statSync(absPath).size\n }\n return currSize\n }\n\n const processFile = async (filePath: string) => {\n const ext = extname(filePath).toLowerCase()\n if (!SUPPORTED_EXTENSIONS.has(ext)) return\n\n const fileName = basename(filePath)\n try {\n const absPath = resolve(dir, filePath)\n // 경로 순회 방지 — 감시 디렉토리 외부 파일 차단\n const realDir = resolve(dir)\n if (!absPath.startsWith(realDir)) return\n if (!existsSync(absPath)) return\n\n const fileSize = await waitForStableSize(absPath)\n if (fileSize > MAX_FILE_SIZE || fileSize === 0) return\n\n log(`[kordoc watch] 변환 중: ${fileName}`)\n\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const parseOptions = pages ? { pages } : undefined\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n log(`[kordoc watch] 실패: ${fileName} — ${result.error}`)\n await sendWebhook(webhook, { file: fileName, format: detectFormat(arrayBuffer), success: false, error: result.error })\n return\n }\n\n const output = format === \"json\" ? JSON.stringify(result, null, 2) : result.markdown\n\n if (outDir) {\n const outExt = format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n log(`[kordoc watch] 완료: ${fileName} → ${basename(outPath)}`)\n } else {\n process.stdout.write(output + \"\\n\")\n }\n\n await sendWebhook(webhook, {\n file: fileName,\n format: result.fileType,\n success: true,\n markdown: format === \"markdown\" ? output.substring(0, 1000) : undefined,\n })\n } catch (err) {\n log(`[kordoc watch] 에러: ${fileName} — ${err instanceof Error ? err.message : err}`)\n }\n }\n\n // fs.watch recursive (Node 18+ Windows/macOS, Node 19+ Linux)\n watch(dir, { recursive: true }, (event, filename) => {\n if (!filename) return\n const filePath = filename.toString()\n\n // 디바운스\n const existing = pending.get(filePath)\n if (existing) clearTimeout(existing)\n pending.set(filePath, setTimeout(() => {\n pending.delete(filePath)\n processFile(filePath).catch(() => {})\n }, DEBOUNCE_MS))\n })\n\n // 프로세스 종료 방지 (Ctrl+C로 종료)\n return new Promise(() => {})\n}\n\n/** Webhook URL 검증 — SSRF 방지: http/https만 허용, localhost/private IP 차단 */\nfunction validateWebhookUrl(url: string): void {\n let parsed: URL\n try {\n parsed = new URL(url)\n } catch {\n throw new Error(`유효하지 않은 webhook URL: ${url}`)\n }\n if (parsed.protocol !== \"http:\" && parsed.protocol !== \"https:\") {\n throw new Error(`허용되지 않는 webhook 프로토콜: ${parsed.protocol}`)\n }\n const hostname = parsed.hostname.toLowerCase()\n if (\n hostname === \"localhost\" ||\n hostname === \"[::1]\" ||\n hostname.startsWith(\"127.\") ||\n hostname.startsWith(\"10.\") ||\n hostname.startsWith(\"192.168.\") ||\n /^172\\.(1[6-9]|2\\d|3[01])\\./.test(hostname) ||\n hostname === \"0.0.0.0\" ||\n hostname.startsWith(\"169.254.\") ||\n hostname.endsWith(\".local\") ||\n // IPv6 사설 대역\n hostname.startsWith(\"[fc\") ||\n hostname.startsWith(\"[fd\") ||\n hostname.startsWith(\"[fe80:\") ||\n hostname === \"[::0]\" ||\n hostname === \"[::]\" ||\n // 클라우드 메타데이터 엔드포인트\n hostname === \"metadata.google.internal\" ||\n hostname === \"metadata.google\" ||\n // 16진수/8진수 IP 인코딩 우회 방지\n /^0x[0-9a-f]+$/i.test(hostname) ||\n /^0[0-7]+$/.test(hostname)\n ) {\n throw new Error(`내부 네트워크 대상 webhook은 허용되지 않습니다: ${hostname}`)\n }\n}\n\nasync function sendWebhook(url: string | undefined, payload: Record<string, unknown>): Promise<void> {\n if (!url) return\n try {\n validateWebhookUrl(url)\n await fetch(url, {\n method: \"POST\",\n headers: { \"Content-Type\": \"application/json\" },\n body: JSON.stringify({ ...payload, timestamp: new Date().toISOString() }),\n })\n } catch {\n // webhook 실패는 조용히 무시\n }\n}\n"],"mappings":";;;;;;;;;;;;AAEA,SAAS,OAAO,cAAc,eAAe,WAAW,UAAU,kBAAkB;AACpF,SAAS,UAAU,SAAS,eAAe;AAK3C,IAAM,uBAAuB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAChF,IAAM,cAAc;AAEpB,IAAM,kBAAkB;AACxB,IAAM,gBAAgB,MAAM,OAAO;AAUnC,eAAsB,eAAe,SAAsC;AACzE,QAAM,EAAE,KAAK,QAAQ,SAAS,SAAS,YAAY,OAAO,OAAO,IAAI;AAErE,MAAI,CAAC,WAAW,GAAG,EAAG,OAAM,IAAI,MAAM,gFAAoB,GAAG,EAAE;AAC/D,MAAI,QAAS,oBAAmB,OAAO;AACvC,MAAI,OAAQ,WAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AAEjD,QAAM,MAAM,SAAS,MAAM;AAAA,EAAC,IAAI,CAAC,QAAgB,QAAQ,OAAO,MAAM,MAAM,IAAI;AAChF,MAAI,6CAAyB,QAAQ,GAAG,CAAC,EAAE;AAC3C,MAAI,OAAQ,KAAI,gCAAsB,QAAQ,MAAM,CAAC,EAAE;AACvD,MAAI,QAAS,KAAI,gCAAsB,OAAO,EAAE;AAGhD,QAAM,UAAU,oBAAI,IAA2C;AAG/D,QAAM,oBAAoB,OAAO,YAAqC;AACpE,QAAI,WAAW,SAAS,OAAO,EAAE;AACjC,UAAM,IAAI,QAAQ,OAAK,WAAW,GAAG,eAAe,CAAC;AACrD,QAAI,CAAC,WAAW,OAAO,EAAG,QAAO;AACjC,UAAM,WAAW,SAAS,OAAO,EAAE;AACnC,QAAI,aAAa,UAAU;AAEzB,YAAM,IAAI,QAAQ,OAAK,WAAW,GAAG,eAAe,CAAC;AACrD,UAAI,CAAC,WAAW,OAAO,EAAG,QAAO;AACjC,aAAO,SAAS,OAAO,EAAE;AAAA,IAC3B;AACA,WAAO;AAAA,EACT;AAEA,QAAM,cAAc,OAAO,aAAqB;AAC9C,UAAM,MAAM,QAAQ,QAAQ,EAAE,YAAY;AAC1C,QAAI,CAAC,qBAAqB,IAAI,GAAG,EAAG;AAEpC,UAAM,WAAW,SAAS,QAAQ;AAClC,QAAI;AACF,YAAM,UAAU,QAAQ,KAAK,QAAQ;AAErC,YAAM,UAAU,QAAQ,GAAG;AAC3B,UAAI,CAAC,QAAQ,WAAW,OAAO,EAAG;AAClC,UAAI,CAAC,WAAW,OAAO,EAAG;AAE1B,YAAM,WAAW,MAAM,kBAAkB,OAAO;AAChD,UAAI,WAAW,iBAAiB,aAAa,EAAG;AAEhD,UAAI,uCAAwB,QAAQ,EAAE;AAEtC,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,eAAe,QAAQ,EAAE,MAAM,IAAI;AACzC,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,YAAI,gCAAsB,QAAQ,WAAM,OAAO,KAAK,EAAE;AACtD,cAAM,YAAY,SAAS,EAAE,MAAM,UAAU,QAAQ,aAAa,WAAW,GAAG,SAAS,OAAO,OAAO,OAAO,MAAM,CAAC;AACrH;AAAA,MACF;AAEA,YAAM,SAAS,WAAW,SAAS,KAAK,UAAU,QAAQ,MAAM,CAAC,IAAI,OAAO;AAE5E,UAAI,QAAQ;AACV,cAAM,SAAS,WAAW,SAAS,UAAU;AAC7C,cAAM,UAAU,QAAQ,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACpE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,gCAAsB,QAAQ,WAAM,SAAS,OAAO,CAAC,EAAE;AAAA,MAC7D,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAEA,YAAM,YAAY,SAAS;AAAA,QACzB,MAAM;AAAA,QACN,QAAQ,OAAO;AAAA,QACf,SAAS;AAAA,QACT,UAAU,WAAW,aAAa,OAAO,UAAU,GAAG,GAAI,IAAI;AAAA,MAChE,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,UAAI,gCAAsB,QAAQ,WAAM,eAAe,QAAQ,IAAI,UAAU,GAAG,EAAE;AAAA,IACpF;AAAA,EACF;AAGA,QAAM,KAAK,EAAE,WAAW,KAAK,GAAG,CAAC,OAAO,aAAa;AACnD,QAAI,CAAC,SAAU;AACf,UAAM,WAAW,SAAS,SAAS;AAGnC,UAAM,WAAW,QAAQ,IAAI,QAAQ;AACrC,QAAI,SAAU,cAAa,QAAQ;AACnC,YAAQ,IAAI,UAAU,WAAW,MAAM;AACrC,cAAQ,OAAO,QAAQ;AACvB,kBAAY,QAAQ,EAAE,MAAM,MAAM;AAAA,MAAC,CAAC;AAAA,IACtC,GAAG,WAAW,CAAC;AAAA,EACjB,CAAC;AAGD,SAAO,IAAI,QAAQ,MAAM;AAAA,EAAC,CAAC;AAC7B;AAGA,SAAS,mBAAmB,KAAmB;AAC7C,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;AAAA,EACtB,QAAQ;AACN,UAAM,IAAI,MAAM,sDAAwB,GAAG,EAAE;AAAA,EAC/C;AACA,MAAI,OAAO,aAAa,WAAW,OAAO,aAAa,UAAU;AAC/D,UAAM,IAAI,MAAM,2EAAyB,OAAO,QAAQ,EAAE;AAAA,EAC5D;AACA,QAAM,WAAW,OAAO,SAAS,YAAY;AAC7C,MACE,aAAa,eACb,aAAa,WACb,SAAS,WAAW,MAAM,KAC1B,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,UAAU,KAC9B,6BAA6B,KAAK,QAAQ,KAC1C,aAAa,aACb,SAAS,WAAW,UAAU,KAC9B,SAAS,SAAS,QAAQ;AAAA,EAE1B,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,QAAQ,KAC5B,aAAa,WACb,aAAa;AAAA,EAEb,aAAa,8BACb,aAAa;AAAA,EAEb,iBAAiB,KAAK,QAAQ,KAC9B,YAAY,KAAK,QAAQ,GACzB;AACA,UAAM,IAAI,MAAM,uHAAkC,QAAQ,EAAE;AAAA,EAC9D;AACF;AAEA,eAAe,YAAY,KAAyB,SAAiD;AACnG,MAAI,CAAC,IAAK;AACV,MAAI;AACF,uBAAmB,GAAG;AACtB,UAAM,MAAM,KAAK;AAAA,MACf,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,EAAE,GAAG,SAAS,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE,CAAC;AAAA,IAC1E,CAAC;AAAA,EACH,QAAQ;AAAA,EAER;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/watch.ts"],"sourcesContent":["/** 디렉토리 감시 모드 — 새 문서 자동 변환 + Webhook 알림 */\n\nimport { watch, readFileSync, writeFileSync, mkdirSync, statSync, existsSync } from \"fs\"\nimport { basename, resolve, extname } from \"path\"\nimport { parse, detectFormat } from \"./index.js\"\nimport { toArrayBuffer } from \"./utils.js\"\nimport type { WatchOptions } from \"./types.js\"\n\nconst SUPPORTED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\nconst DEBOUNCE_MS = 1000\n/** 파일 쓰기 완료 판정: 연속 2회 동일 크기 확인 간격 */\nconst STABLE_CHECK_MS = 300\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\n\n/**\n * 디렉토리를 감시하여 새 문서 파일을 자동 변환.\n *\n * @example\n * ```bash\n * kordoc watch ./incoming -d ./output --webhook https://api.example.com/docs\n * ```\n */\nexport async function watchDirectory(options: WatchOptions): Promise<void> {\n const { dir, outDir, webhook, format = \"markdown\", pages, silent } = options\n\n if (!existsSync(dir)) throw new Error(`디렉토리를 찾을 수 없습니다: ${dir}`)\n if (webhook) validateWebhookUrl(webhook)\n if (outDir) mkdirSync(outDir, { recursive: true })\n\n const log = silent ? () => {} : (msg: string) => process.stderr.write(msg + \"\\n\")\n log(`[kordoc watch] 감시 시작: ${resolve(dir)}`)\n if (outDir) log(`[kordoc watch] 출력: ${resolve(outDir)}`)\n if (webhook) log(`[kordoc watch] 웹훅: ${webhook}`)\n\n // 디바운스 맵\n const pending = new Map<string, ReturnType<typeof setTimeout>>()\n\n /** 파일 크기가 안정화될 때까지 대기 (쓰기 완료 감지) */\n const waitForStableSize = async (absPath: string): Promise<number> => {\n let prevSize = statSync(absPath).size\n await new Promise(r => setTimeout(r, STABLE_CHECK_MS))\n if (!existsSync(absPath)) return 0\n const currSize = statSync(absPath).size\n if (currSize !== prevSize) {\n // 크기가 변했으면 한 번 더 대기\n await new Promise(r => setTimeout(r, STABLE_CHECK_MS))\n if (!existsSync(absPath)) return 0\n return statSync(absPath).size\n }\n return currSize\n }\n\n const processFile = async (filePath: string) => {\n const ext = extname(filePath).toLowerCase()\n if (!SUPPORTED_EXTENSIONS.has(ext)) return\n\n const fileName = basename(filePath)\n try {\n const absPath = resolve(dir, filePath)\n // 경로 순회 방지 — 감시 디렉토리 외부 파일 차단\n const realDir = resolve(dir)\n if (!absPath.startsWith(realDir)) return\n if (!existsSync(absPath)) return\n\n const fileSize = await waitForStableSize(absPath)\n if (fileSize > MAX_FILE_SIZE || fileSize === 0) return\n\n log(`[kordoc watch] 변환 중: ${fileName}`)\n\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const parseOptions = pages ? { pages } : undefined\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n log(`[kordoc watch] 실패: ${fileName} — ${result.error}`)\n await sendWebhook(webhook, { file: fileName, format: detectFormat(arrayBuffer), success: false, error: result.error })\n return\n }\n\n const output = format === \"json\" ? JSON.stringify(result, null, 2) : result.markdown\n\n if (outDir) {\n const outExt = format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n log(`[kordoc watch] 완료: ${fileName} → ${basename(outPath)}`)\n } else {\n process.stdout.write(output + \"\\n\")\n }\n\n await sendWebhook(webhook, {\n file: fileName,\n format: result.fileType,\n success: true,\n markdown: format === \"markdown\" ? output.substring(0, 1000) : undefined,\n })\n } catch (err) {\n log(`[kordoc watch] 에러: ${fileName} — ${err instanceof Error ? err.message : err}`)\n }\n }\n\n // fs.watch recursive (Node 18+ Windows/macOS, Node 19+ Linux)\n watch(dir, { recursive: true }, (event, filename) => {\n if (!filename) return\n const filePath = filename.toString()\n\n // 디바운스\n const existing = pending.get(filePath)\n if (existing) clearTimeout(existing)\n pending.set(filePath, setTimeout(() => {\n pending.delete(filePath)\n processFile(filePath).catch(() => {})\n }, DEBOUNCE_MS))\n })\n\n // 프로세스 종료 방지 (Ctrl+C로 종료)\n return new Promise(() => {})\n}\n\n/** Webhook URL 검증 — SSRF 방지: http/https만 허용, localhost/private IP 차단 */\nfunction validateWebhookUrl(url: string): void {\n let parsed: URL\n try {\n parsed = new URL(url)\n } catch {\n throw new Error(`유효하지 않은 webhook URL: ${url}`)\n }\n if (parsed.protocol !== \"http:\" && parsed.protocol !== \"https:\") {\n throw new Error(`허용되지 않는 webhook 프로토콜: ${parsed.protocol}`)\n }\n const hostname = parsed.hostname.toLowerCase()\n if (\n hostname === \"localhost\" ||\n hostname === \"[::1]\" ||\n hostname.startsWith(\"127.\") ||\n hostname.startsWith(\"10.\") ||\n hostname.startsWith(\"192.168.\") ||\n /^172\\.(1[6-9]|2\\d|3[01])\\./.test(hostname) ||\n hostname === \"0.0.0.0\" ||\n hostname.startsWith(\"169.254.\") ||\n hostname.endsWith(\".local\") ||\n // IPv6 사설 대역\n hostname.startsWith(\"[fc\") ||\n hostname.startsWith(\"[fd\") ||\n hostname.startsWith(\"[fe80:\") ||\n hostname === \"[::0]\" ||\n hostname === \"[::]\" ||\n // 클라우드 메타데이터 엔드포인트\n hostname === \"metadata.google.internal\" ||\n hostname === \"metadata.google\" ||\n // 16진수/8진수 IP 인코딩 우회 방지\n /^0x[0-9a-f]+$/i.test(hostname) ||\n /^0[0-7]+$/.test(hostname)\n ) {\n throw new Error(`내부 네트워크 대상 webhook은 허용되지 않습니다: ${hostname}`)\n }\n}\n\nasync function sendWebhook(url: string | undefined, payload: Record<string, unknown>): Promise<void> {\n if (!url) return\n try {\n validateWebhookUrl(url)\n await fetch(url, {\n method: \"POST\",\n headers: { \"Content-Type\": \"application/json\" },\n body: JSON.stringify({ ...payload, timestamp: new Date().toISOString() }),\n })\n } catch {\n // webhook 실패는 조용히 무시\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;AAEA,SAAS,OAAO,cAAc,eAAe,WAAW,UAAU,kBAAkB;AACpF,SAAS,UAAU,SAAS,eAAe;AAK3C,IAAM,uBAAuB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAChF,IAAM,cAAc;AAEpB,IAAM,kBAAkB;AACxB,IAAM,gBAAgB,MAAM,OAAO;AAUnC,eAAsB,eAAe,SAAsC;AACzE,QAAM,EAAE,KAAK,QAAQ,SAAS,SAAS,YAAY,OAAO,OAAO,IAAI;AAErE,MAAI,CAAC,WAAW,GAAG,EAAG,OAAM,IAAI,MAAM,gFAAoB,GAAG,EAAE;AAC/D,MAAI,QAAS,oBAAmB,OAAO;AACvC,MAAI,OAAQ,WAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AAEjD,QAAM,MAAM,SAAS,MAAM;AAAA,EAAC,IAAI,CAAC,QAAgB,QAAQ,OAAO,MAAM,MAAM,IAAI;AAChF,MAAI,6CAAyB,QAAQ,GAAG,CAAC,EAAE;AAC3C,MAAI,OAAQ,KAAI,gCAAsB,QAAQ,MAAM,CAAC,EAAE;AACvD,MAAI,QAAS,KAAI,gCAAsB,OAAO,EAAE;AAGhD,QAAM,UAAU,oBAAI,IAA2C;AAG/D,QAAM,oBAAoB,OAAO,YAAqC;AACpE,QAAI,WAAW,SAAS,OAAO,EAAE;AACjC,UAAM,IAAI,QAAQ,OAAK,WAAW,GAAG,eAAe,CAAC;AACrD,QAAI,CAAC,WAAW,OAAO,EAAG,QAAO;AACjC,UAAM,WAAW,SAAS,OAAO,EAAE;AACnC,QAAI,aAAa,UAAU;AAEzB,YAAM,IAAI,QAAQ,OAAK,WAAW,GAAG,eAAe,CAAC;AACrD,UAAI,CAAC,WAAW,OAAO,EAAG,QAAO;AACjC,aAAO,SAAS,OAAO,EAAE;AAAA,IAC3B;AACA,WAAO;AAAA,EACT;AAEA,QAAM,cAAc,OAAO,aAAqB;AAC9C,UAAM,MAAM,QAAQ,QAAQ,EAAE,YAAY;AAC1C,QAAI,CAAC,qBAAqB,IAAI,GAAG,EAAG;AAEpC,UAAM,WAAW,SAAS,QAAQ;AAClC,QAAI;AACF,YAAM,UAAU,QAAQ,KAAK,QAAQ;AAErC,YAAM,UAAU,QAAQ,GAAG;AAC3B,UAAI,CAAC,QAAQ,WAAW,OAAO,EAAG;AAClC,UAAI,CAAC,WAAW,OAAO,EAAG;AAE1B,YAAM,WAAW,MAAM,kBAAkB,OAAO;AAChD,UAAI,WAAW,iBAAiB,aAAa,EAAG;AAEhD,UAAI,uCAAwB,QAAQ,EAAE;AAEtC,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,eAAe,QAAQ,EAAE,MAAM,IAAI;AACzC,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,YAAI,gCAAsB,QAAQ,WAAM,OAAO,KAAK,EAAE;AACtD,cAAM,YAAY,SAAS,EAAE,MAAM,UAAU,QAAQ,aAAa,WAAW,GAAG,SAAS,OAAO,OAAO,OAAO,MAAM,CAAC;AACrH;AAAA,MACF;AAEA,YAAM,SAAS,WAAW,SAAS,KAAK,UAAU,QAAQ,MAAM,CAAC,IAAI,OAAO;AAE5E,UAAI,QAAQ;AACV,cAAM,SAAS,WAAW,SAAS,UAAU;AAC7C,cAAM,UAAU,QAAQ,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACpE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,gCAAsB,QAAQ,WAAM,SAAS,OAAO,CAAC,EAAE;AAAA,MAC7D,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAEA,YAAM,YAAY,SAAS;AAAA,QACzB,MAAM;AAAA,QACN,QAAQ,OAAO;AAAA,QACf,SAAS;AAAA,QACT,UAAU,WAAW,aAAa,OAAO,UAAU,GAAG,GAAI,IAAI;AAAA,MAChE,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,UAAI,gCAAsB,QAAQ,WAAM,eAAe,QAAQ,IAAI,UAAU,GAAG,EAAE;AAAA,IACpF;AAAA,EACF;AAGA,QAAM,KAAK,EAAE,WAAW,KAAK,GAAG,CAAC,OAAO,aAAa;AACnD,QAAI,CAAC,SAAU;AACf,UAAM,WAAW,SAAS,SAAS;AAGnC,UAAM,WAAW,QAAQ,IAAI,QAAQ;AACrC,QAAI,SAAU,cAAa,QAAQ;AACnC,YAAQ,IAAI,UAAU,WAAW,MAAM;AACrC,cAAQ,OAAO,QAAQ;AACvB,kBAAY,QAAQ,EAAE,MAAM,MAAM;AAAA,MAAC,CAAC;AAAA,IACtC,GAAG,WAAW,CAAC;AAAA,EACjB,CAAC;AAGD,SAAO,IAAI,QAAQ,MAAM;AAAA,EAAC,CAAC;AAC7B;AAGA,SAAS,mBAAmB,KAAmB;AAC7C,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;AAAA,EACtB,QAAQ;AACN,UAAM,IAAI,MAAM,sDAAwB,GAAG,EAAE;AAAA,EAC/C;AACA,MAAI,OAAO,aAAa,WAAW,OAAO,aAAa,UAAU;AAC/D,UAAM,IAAI,MAAM,2EAAyB,OAAO,QAAQ,EAAE;AAAA,EAC5D;AACA,QAAM,WAAW,OAAO,SAAS,YAAY;AAC7C,MACE,aAAa,eACb,aAAa,WACb,SAAS,WAAW,MAAM,KAC1B,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,UAAU,KAC9B,6BAA6B,KAAK,QAAQ,KAC1C,aAAa,aACb,SAAS,WAAW,UAAU,KAC9B,SAAS,SAAS,QAAQ;AAAA,EAE1B,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,QAAQ,KAC5B,aAAa,WACb,aAAa;AAAA,EAEb,aAAa,8BACb,aAAa;AAAA,EAEb,iBAAiB,KAAK,QAAQ,KAC9B,YAAY,KAAK,QAAQ,GACzB;AACA,UAAM,IAAI,MAAM,uHAAkC,QAAQ,EAAE;AAAA,EAC9D;AACF;AAEA,eAAe,YAAY,KAAyB,SAAiD;AACnG,MAAI,CAAC,IAAK;AACV,MAAI;AACF,uBAAmB,GAAG;AACtB,UAAM,MAAM,KAAK;AAAA,MACf,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,EAAE,GAAG,SAAS,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE,CAAC;AAAA,IAC1E,CAAC;AAAA,EACH,QAAQ;AAAA,EAER;AACF;","names":[]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@clazic/kordoc",
3
- "version": "2.3.0",
3
+ "version": "2.3.2",
4
4
  "description": "Parse Korean documents (HWP, HWPX, PDF, XLSX, DOCX) to Markdown",
5
5
  "type": "module",
6
6
  "exports": {
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/ocr/batch-provider.ts"],"sourcesContent":["/**\n * CLI 배치 OCR 프로바이더\n *\n * 여러 페이지 이미지를 단일 CLI 호출로 처리하여 API 호출 수를 대폭 감소.\n * gemini/claude: @file 멀티 참조, codex: --image 멀티 플래그\n *\n * 299페이지 기준:\n * - 기존: CLI 299회 호출 (~30분)\n * - 배치: CLI 3~6회 호출 (~3분)\n */\n\nimport { spawnSync } from \"child_process\"\nimport { writeFileSync, readFileSync, unlinkSync, mkdirSync } from \"fs\"\nimport { join } from \"path\"\nimport { tmpdir } from \"os\"\nimport type { StructuredOcrResult, BatchOcrProvider } from \"../types.js\"\n\n/** 배치 OCR 프롬프트 */\nconst BATCH_OCR_PROMPT =\n \"다음 문서 페이지 이미지들을 OCR하여 순수 Markdown으로 변환하세요.\\n\\n\" +\n \"규칙:\\n\" +\n \"- 각 페이지 결과 사이에 반드시 이 구분자를 삽입: <!-- PAGE_BREAK -->\\n\" +\n \"- 테이블은 Markdown 테이블 문법 사용 (| 구분, |---|---| 헤더 구분선 포함)\\n\" +\n \"- 병합된 셀은 해당 위치에 내용 기재\\n\" +\n \"- 헤딩은 글자 크기에 따라 ## ~ ###### 사용\\n\" +\n \"- 리스트는 - 또는 1. 사용\\n\" +\n \"- 이미지, 도형 등 비텍스트 요소는 무시\\n\" +\n \"- 원문의 읽기 순서와 구조를 유지\\n\" +\n \"- ```로 감싸지 말고 순수 Markdown만 출력\"\n\n/** 모드별 기본 배치 크기 (CLI 내부 타임아웃 + 실측 기반)\n *\n * gemini CLI: 10장 이상에서 AbortError 발생 (내부 타임아웃).\n * 5장 배치가 안정적으로 동작 확인 (35초/배치).\n * 299페이지 = 60배치 = 기존 299회 대비 80% 감소.\n */\nexport const DEFAULT_BATCH_SIZES: Record<string, number> = {\n gemini: 5,\n claude: 5,\n codex: 10,\n}\n\n/** 임시 디렉토리 — gemini CLI는 cwd 하위 + gitignore 밖만 @참조 가능 */\nlet _batchTempDir: string | null = null\nfunction getBatchTempDir(): string {\n if (!_batchTempDir) {\n _batchTempDir = join(process.cwd(), \"_kordoc_ocr_tmp\")\n mkdirSync(_batchTempDir, { recursive: true })\n }\n return _batchTempDir\n}\n\n/**\n * 배치 CLI 프로바이더 생성\n */\nexport function createBatchCliProvider(\n mode: \"gemini\" | \"claude\" | \"codex\",\n batchSize: number\n): BatchOcrProvider {\n return {\n __batch: true as const,\n batchSize,\n async processBatch(pages) {\n const results = new Map<number, StructuredOcrResult>()\n const tempDir = getBatchTempDir()\n const tempFiles: string[] = []\n\n try {\n // 1. Write all page images to temp files\n for (const { image, pageNum } of pages) {\n const path = join(tempDir, `batch-p${pageNum}.png`)\n writeFileSync(path, image)\n tempFiles.push(path)\n }\n\n // 2. Call CLI with all file references\n let output: string\n if (mode === \"codex\") {\n output = callBatchCodexCli(tempFiles)\n } else {\n output = callBatchCli(mode, tempFiles)\n }\n\n // 3. Parse response by PAGE_BREAK separator\n const cleaned = stripCodeFence(output.trim())\n const parts = cleaned.split(/<!--\\s*PAGE_BREAK\\s*-->/)\n .map(p => p.trim())\n .filter(p => p.length > 0)\n\n // 4. Map results to page numbers (best-effort if count mismatch)\n for (let i = 0; i < pages.length; i++) {\n const pageNum = pages[i].pageNum\n if (i < parts.length) {\n results.set(pageNum, { markdown: parts[i] })\n }\n // If fewer parts than pages, remaining pages get no result\n }\n } finally {\n // 5. Clean up temp files\n for (const f of tempFiles) {\n try { unlinkSync(f) } catch { /* ignore */ }\n }\n }\n\n return results\n },\n }\n}\n\n/** gemini/claude 배치 호출 */\nfunction callBatchCli(mode: \"gemini\" | \"claude\", imagePaths: string[]): string {\n const fileRefs = imagePaths.map(p => `@${p}`).join(\"\\n\")\n const prompt = `${BATCH_OCR_PROMPT}\\n\\n${fileRefs}`\n\n let args: string[]\n if (mode === \"gemini\") {\n args = [\"--prompt\", prompt, \"--yolo\"]\n const model = process.env.KORDOC_GEMINI_MODEL\n if (model) args.push(\"--model\", model)\n } else {\n // claude\n args = [\"--print\", prompt]\n const model = process.env.KORDOC_CLAUDE_MODEL\n if (model) args.push(\"--model\", model)\n }\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = spawnSync(mode, args, {\n encoding: \"utf-8\",\n timeout: timeoutMs,\n maxBuffer: 50 * 1024 * 1024, // 50MB (large batch output)\n ...(mode === \"claude\" ? { cwd: tmpdir() } : {}),\n })\n\n if (result.error) {\n throw new Error(`${mode} 배치 OCR 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`${mode} 배치 OCR 실패: ${errMsg}`)\n }\n\n return result.stdout || \"\"\n}\n\n/** codex 배치 호출 — --image를 여러 번 지정 */\nfunction callBatchCodexCli(imagePaths: string[]): string {\n const outPath = join(tmpdir(), `kordoc-codex-batch-${Date.now()}.txt`)\n try {\n const args = [\"exec\", BATCH_OCR_PROMPT]\n for (const p of imagePaths) {\n args.push(\"--image\", p)\n }\n args.push(\"--output-last-message\", outPath)\n const model = process.env.KORDOC_CODEX_MODEL\n if (model) args.push(\"--model\", model)\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = spawnSync(\"codex\", args, {\n encoding: \"utf-8\",\n timeout: timeoutMs,\n maxBuffer: 50 * 1024 * 1024,\n input: \"\",\n })\n\n if (result.error) {\n throw new Error(`codex 배치 OCR 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`codex 배치 OCR 실패: ${errMsg}`)\n }\n\n try {\n return readFileSync(outPath, \"utf-8\")\n } catch {\n return result.stdout || \"\"\n }\n } finally {\n try { unlinkSync(outPath) } catch { /* ignore */ }\n }\n}\n\n/** LLM 출력에서 코드 펜스 제거 (cli-provider.ts와 동일 로직) */\nfunction stripCodeFence(text: string): string {\n const match = text.match(/^```(?:markdown|md)?\\s*\\n([\\s\\S]*?)\\n```\\s*$/m)\n return match ? match[1].trim() : text\n}\n"],"mappings":";;;;AAWA,SAAS,iBAAiB;AAC1B,SAAS,eAAe,cAAc,YAAY,iBAAiB;AACnE,SAAS,YAAY;AACrB,SAAS,cAAc;AAIvB,IAAM,mBACJ;AAiBK,IAAM,sBAA8C;AAAA,EACzD,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,OAAO;AACT;AAGA,IAAI,gBAA+B;AACnC,SAAS,kBAA0B;AACjC,MAAI,CAAC,eAAe;AAClB,oBAAgB,KAAK,QAAQ,IAAI,GAAG,iBAAiB;AACrD,cAAU,eAAe,EAAE,WAAW,KAAK,CAAC;AAAA,EAC9C;AACA,SAAO;AACT;AAKO,SAAS,uBACd,MACA,WACkB;AAClB,SAAO;AAAA,IACL,SAAS;AAAA,IACT;AAAA,IACA,MAAM,aAAa,OAAO;AACxB,YAAM,UAAU,oBAAI,IAAiC;AACrD,YAAM,UAAU,gBAAgB;AAChC,YAAM,YAAsB,CAAC;AAE7B,UAAI;AAEF,mBAAW,EAAE,OAAO,QAAQ,KAAK,OAAO;AACtC,gBAAM,OAAO,KAAK,SAAS,UAAU,OAAO,MAAM;AAClD,wBAAc,MAAM,KAAK;AACzB,oBAAU,KAAK,IAAI;AAAA,QACrB;AAGA,YAAI;AACJ,YAAI,SAAS,SAAS;AACpB,mBAAS,kBAAkB,SAAS;AAAA,QACtC,OAAO;AACL,mBAAS,aAAa,MAAM,SAAS;AAAA,QACvC;AAGA,cAAM,UAAU,eAAe,OAAO,KAAK,CAAC;AAC5C,cAAM,QAAQ,QAAQ,MAAM,yBAAyB,EAClD,IAAI,OAAK,EAAE,KAAK,CAAC,EACjB,OAAO,OAAK,EAAE,SAAS,CAAC;AAG3B,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,gBAAM,UAAU,MAAM,CAAC,EAAE;AACzB,cAAI,IAAI,MAAM,QAAQ;AACpB,oBAAQ,IAAI,SAAS,EAAE,UAAU,MAAM,CAAC,EAAE,CAAC;AAAA,UAC7C;AAAA,QAEF;AAAA,MACF,UAAE;AAEA,mBAAW,KAAK,WAAW;AACzB,cAAI;AAAE,uBAAW,CAAC;AAAA,UAAE,QAAQ;AAAA,UAAe;AAAA,QAC7C;AAAA,MACF;AAEA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAGA,SAAS,aAAa,MAA2B,YAA8B;AAC7E,QAAM,WAAW,WAAW,IAAI,OAAK,IAAI,CAAC,EAAE,EAAE,KAAK,IAAI;AACvD,QAAM,SAAS,GAAG,gBAAgB;AAAA;AAAA,EAAO,QAAQ;AAEjD,MAAI;AACJ,MAAI,SAAS,UAAU;AACrB,WAAO,CAAC,YAAY,QAAQ,QAAQ;AACpC,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAAA,EACvC,OAAO;AAEL,WAAO,CAAC,WAAW,MAAM;AACzB,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAAA,EACvC;AAEA,QAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,QAAM,SAAS,UAAU,MAAM,MAAM;AAAA,IACnC,UAAU;AAAA,IACV,SAAS;AAAA,IACT,WAAW,KAAK,OAAO;AAAA;AAAA,IACvB,GAAI,SAAS,WAAW,EAAE,KAAK,OAAO,EAAE,IAAI,CAAC;AAAA,EAC/C,CAAC;AAED,MAAI,OAAO,OAAO;AAChB,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,OAAO,MAAM,OAAO,EAAE;AAAA,EAC9D;AACA,MAAI,OAAO,WAAW,GAAG;AACvB,UAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,MAAM,EAAE;AAAA,EAChD;AAEA,SAAO,OAAO,UAAU;AAC1B;AAGA,SAAS,kBAAkB,YAA8B;AACvD,QAAM,UAAU,KAAK,OAAO,GAAG,sBAAsB,KAAK,IAAI,CAAC,MAAM;AACrE,MAAI;AACF,UAAM,OAAO,CAAC,QAAQ,gBAAgB;AACtC,eAAW,KAAK,YAAY;AAC1B,WAAK,KAAK,WAAW,CAAC;AAAA,IACxB;AACA,SAAK,KAAK,yBAAyB,OAAO;AAC1C,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAErC,UAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,UAAM,SAAS,UAAU,SAAS,MAAM;AAAA,MACtC,UAAU;AAAA,MACV,SAAS;AAAA,MACT,WAAW,KAAK,OAAO;AAAA,MACvB,OAAO;AAAA,IACT,CAAC;AAED,QAAI,OAAO,OAAO;AAChB,YAAM,IAAI,MAAM,wCAAoB,OAAO,MAAM,OAAO,EAAE;AAAA,IAC5D;AACA,QAAI,OAAO,WAAW,GAAG;AACvB,YAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,YAAM,IAAI,MAAM,wCAAoB,MAAM,EAAE;AAAA,IAC9C;AAEA,QAAI;AACF,aAAO,aAAa,SAAS,OAAO;AAAA,IACtC,QAAQ;AACN,aAAO,OAAO,UAAU;AAAA,IAC1B;AAAA,EACF,UAAE;AACA,QAAI;AAAE,iBAAW,OAAO;AAAA,IAAE,QAAQ;AAAA,IAAe;AAAA,EACnD;AACF;AAGA,SAAS,eAAe,MAAsB;AAC5C,QAAM,QAAQ,KAAK,MAAM,+CAA+C;AACxE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;","names":[]}