@clazic/kordoc 2.4.7 → 2.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/{batch-provider-XI2EPODC.js → batch-provider-VX7CY6UH.js} +9 -4
  2. package/dist/batch-provider-VX7CY6UH.js.map +1 -0
  3. package/dist/{chunk-5DD75UUX.js → chunk-7ORDFSF4.js} +2 -2
  4. package/dist/{chunk-GITPGCCA.js → chunk-JO37HXVZ.js} +8 -8
  5. package/dist/{chunk-GITPGCCA.js.map → chunk-JO37HXVZ.js.map} +1 -1
  6. package/dist/{chunk-JOGAFNIL.js → chunk-YC2MEB7R.js} +6 -3
  7. package/dist/chunk-YC2MEB7R.js.map +1 -0
  8. package/dist/{chunk-4PP34NVQ.js → chunk-YW5G6BCJ.js} +2 -2
  9. package/dist/chunk-YW5G6BCJ.js.map +1 -0
  10. package/dist/cli.js +9 -8
  11. package/dist/cli.js.map +1 -1
  12. package/dist/index.cjs +17 -9
  13. package/dist/index.cjs.map +1 -1
  14. package/dist/index.js +17 -9
  15. package/dist/index.js.map +1 -1
  16. package/dist/mcp.js +4 -4
  17. package/dist/{provider-7F7NEDTN.js → provider-PYZL2VNN.js} +2 -2
  18. package/dist/{resolve-SUU5Q6DJ.js → resolve-BGOGWG6E.js} +4 -4
  19. package/dist/{utils-7LOIY3O6.js → utils-HAVLKXCA.js} +2 -2
  20. package/dist/{watch-HUIKUPMG.js → watch-YQ3ZQM2I.js} +16 -9
  21. package/dist/watch-YQ3ZQM2I.js.map +1 -0
  22. package/package.json +2 -2
  23. package/dist/batch-provider-XI2EPODC.js.map +0 -1
  24. package/dist/chunk-4PP34NVQ.js.map +0 -1
  25. package/dist/chunk-JOGAFNIL.js.map +0 -1
  26. package/dist/watch-HUIKUPMG.js.map +0 -1
  27. /package/dist/{chunk-5DD75UUX.js.map → chunk-7ORDFSF4.js.map} +0 -0
  28. /package/dist/{provider-7F7NEDTN.js.map → provider-PYZL2VNN.js.map} +0 -0
  29. /package/dist/{resolve-SUU5Q6DJ.js.map → resolve-BGOGWG6E.js.map} +0 -0
  30. /package/dist/{utils-7LOIY3O6.js.map → utils-HAVLKXCA.js.map} +0 -0
package/dist/mcp.js CHANGED
@@ -10,17 +10,17 @@ import {
10
10
  markdownToHwpx,
11
11
  markdownToXlsx,
12
12
  parse
13
- } from "./chunk-GITPGCCA.js";
14
- import "./chunk-4PP34NVQ.js";
13
+ } from "./chunk-JO37HXVZ.js";
14
+ import "./chunk-YW5G6BCJ.js";
15
15
  import {
16
16
  KordocError,
17
17
  VERSION,
18
18
  sanitizeError,
19
19
  toArrayBuffer
20
- } from "./chunk-5DD75UUX.js";
20
+ } from "./chunk-7ORDFSF4.js";
21
21
  import "./chunk-MOL7MDBG.js";
22
22
  import "./chunk-7FMKAV4P.js";
23
- import "./chunk-JOGAFNIL.js";
23
+ import "./chunk-YC2MEB7R.js";
24
24
  import "./chunk-ZWE3DS7E.js";
25
25
 
26
26
  // src/mcp.ts
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  markdownToBlocks
4
- } from "./chunk-4PP34NVQ.js";
4
+ } from "./chunk-YW5G6BCJ.js";
5
5
  import "./chunk-ZWE3DS7E.js";
6
6
 
7
7
  // src/ocr/provider.ts
@@ -164,4 +164,4 @@ async function renderPageToPng(page) {
164
164
  export {
165
165
  ocrPages
166
166
  };
167
- //# sourceMappingURL=provider-7F7NEDTN.js.map
167
+ //# sourceMappingURL=provider-PYZL2VNN.js.map
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  createCliOcrProvider
4
- } from "./chunk-JOGAFNIL.js";
4
+ } from "./chunk-YC2MEB7R.js";
5
5
  import {
6
6
  detectAvailableOcr,
7
7
  getTesseractFallbackMessage,
@@ -24,7 +24,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
24
24
  return createTesseractProvider();
25
25
  }
26
26
  if (mode === "gemini" || mode === "claude" || mode === "codex") {
27
- const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-XI2EPODC.js");
27
+ const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-VX7CY6UH.js");
28
28
  const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[mode];
29
29
  if (effectiveBatch > 1) {
30
30
  return createBatchCliProvider(mode, effectiveBatch);
@@ -55,7 +55,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
55
55
  return createTesseractProvider();
56
56
  }
57
57
  if (detected === "gemini" || detected === "codex" || detected === "claude") {
58
- const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-XI2EPODC.js");
58
+ const { createBatchCliProvider, DEFAULT_BATCH_SIZES } = await import("./batch-provider-VX7CY6UH.js");
59
59
  const effectiveBatch = batchSize ?? DEFAULT_BATCH_SIZES[detected];
60
60
  if (effectiveBatch > 1) {
61
61
  return createBatchCliProvider(detected, effectiveBatch);
@@ -67,4 +67,4 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
67
67
  export {
68
68
  resolveOcrProvider
69
69
  };
70
- //# sourceMappingURL=resolve-SUU5Q6DJ.js.map
70
+ //# sourceMappingURL=resolve-BGOGWG6E.js.map
@@ -8,7 +8,7 @@ import {
8
8
  sanitizeError,
9
9
  sanitizeHref,
10
10
  toArrayBuffer
11
- } from "./chunk-5DD75UUX.js";
11
+ } from "./chunk-7ORDFSF4.js";
12
12
  import "./chunk-ZWE3DS7E.js";
13
13
  export {
14
14
  KordocError,
@@ -20,4 +20,4 @@ export {
20
20
  sanitizeHref,
21
21
  toArrayBuffer
22
22
  };
23
- //# sourceMappingURL=utils-7LOIY3O6.js.map
23
+ //# sourceMappingURL=utils-HAVLKXCA.js.map
@@ -2,19 +2,19 @@
2
2
  import {
3
3
  detectFormat,
4
4
  parse
5
- } from "./chunk-GITPGCCA.js";
6
- import "./chunk-4PP34NVQ.js";
5
+ } from "./chunk-JO37HXVZ.js";
6
+ import "./chunk-YW5G6BCJ.js";
7
7
  import {
8
8
  toArrayBuffer
9
- } from "./chunk-5DD75UUX.js";
9
+ } from "./chunk-7ORDFSF4.js";
10
10
  import "./chunk-MOL7MDBG.js";
11
11
  import "./chunk-7FMKAV4P.js";
12
- import "./chunk-JOGAFNIL.js";
12
+ import "./chunk-YC2MEB7R.js";
13
13
  import "./chunk-ZWE3DS7E.js";
14
14
 
15
15
  // src/watch.ts
16
16
  import { watch, readFileSync, writeFileSync, mkdirSync, statSync, existsSync } from "fs";
17
- import { basename, resolve, extname } from "path";
17
+ import { basename, resolve, extname, sep } from "path";
18
18
  var SUPPORTED_EXTENSIONS = /* @__PURE__ */ new Set([".hwp", ".hwpx", ".pdf", ".xlsx", ".docx"]);
19
19
  var DEBOUNCE_MS = 1e3;
20
20
  var STABLE_CHECK_MS = 300;
@@ -49,7 +49,8 @@ async function watchDirectory(options) {
49
49
  try {
50
50
  const absPath = resolve(dir, filePath);
51
51
  const realDir = resolve(dir);
52
- if (!absPath.startsWith(realDir)) return;
52
+ const dirWithSep = realDir.endsWith(sep) ? realDir : realDir + sep;
53
+ if (!absPath.startsWith(dirWithSep) && absPath !== realDir) return;
53
54
  if (!existsSync(absPath)) return;
54
55
  const fileSize = await waitForStableSize(absPath);
55
56
  if (fileSize > MAX_FILE_SIZE || fileSize === 0) return;
@@ -82,7 +83,7 @@ async function watchDirectory(options) {
82
83
  log(`[kordoc watch] \uC5D0\uB7EC: ${fileName} \u2014 ${err instanceof Error ? err.message : err}`);
83
84
  }
84
85
  };
85
- watch(dir, { recursive: true }, (event, filename) => {
86
+ const handler = (_event, filename) => {
86
87
  if (!filename) return;
87
88
  const filePath = filename.toString();
88
89
  const existing = pending.get(filePath);
@@ -92,7 +93,13 @@ async function watchDirectory(options) {
92
93
  processFile(filePath).catch(() => {
93
94
  });
94
95
  }, DEBOUNCE_MS));
95
- });
96
+ };
97
+ try {
98
+ watch(dir, { recursive: true }, handler);
99
+ } catch {
100
+ process.stderr.write("[kordoc watch] \uACBD\uACE0: \uD558\uC704 \uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \uBBF8\uC9C0\uC6D0 (Node.js 22+ \uB610\uB294 macOS/Windows \uD544\uC694) \u2014 \uCD5C\uC0C1\uC704\uB9CC \uAC10\uC2DC\n");
101
+ watch(dir, handler);
102
+ }
96
103
  return new Promise(() => {
97
104
  });
98
105
  }
@@ -129,4 +136,4 @@ async function sendWebhook(url, payload) {
129
136
  export {
130
137
  watchDirectory
131
138
  };
132
- //# sourceMappingURL=watch-HUIKUPMG.js.map
139
+ //# sourceMappingURL=watch-YQ3ZQM2I.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/watch.ts"],"sourcesContent":["/** 디렉토리 감시 모드 — 새 문서 자동 변환 + Webhook 알림 */\n\nimport { watch, readFileSync, writeFileSync, mkdirSync, statSync, existsSync } from \"fs\"\nimport { basename, resolve, extname, sep } from \"path\"\nimport { parse, detectFormat } from \"./index.js\"\nimport { toArrayBuffer } from \"./utils.js\"\nimport type { WatchOptions } from \"./types.js\"\n\nconst SUPPORTED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\nconst DEBOUNCE_MS = 1000\n/** 파일 쓰기 완료 판정: 연속 2회 동일 크기 확인 간격 */\nconst STABLE_CHECK_MS = 300\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\n\n/**\n * 디렉토리를 감시하여 새 문서 파일을 자동 변환.\n *\n * @example\n * ```bash\n * kordoc watch ./incoming -d ./output --webhook https://api.example.com/docs\n * ```\n */\nexport async function watchDirectory(options: WatchOptions): Promise<void> {\n const { dir, outDir, webhook, format = \"markdown\", pages, silent } = options\n\n if (!existsSync(dir)) throw new Error(`디렉토리를 찾을 수 없습니다: ${dir}`)\n if (webhook) validateWebhookUrl(webhook)\n if (outDir) mkdirSync(outDir, { recursive: true })\n\n const log = silent ? () => {} : (msg: string) => process.stderr.write(msg + \"\\n\")\n log(`[kordoc watch] 감시 시작: ${resolve(dir)}`)\n if (outDir) log(`[kordoc watch] 출력: ${resolve(outDir)}`)\n if (webhook) log(`[kordoc watch] 웹훅: ${webhook}`)\n\n // 디바운스 맵\n const pending = new Map<string, ReturnType<typeof setTimeout>>()\n\n /** 파일 크기가 안정화될 때까지 대기 (쓰기 완료 감지) */\n const waitForStableSize = async (absPath: string): Promise<number> => {\n let prevSize = statSync(absPath).size\n await new Promise(r => setTimeout(r, STABLE_CHECK_MS))\n if (!existsSync(absPath)) return 0\n const currSize = statSync(absPath).size\n if (currSize !== prevSize) {\n // 크기가 변했으면 한 번 더 대기\n await new Promise(r => setTimeout(r, STABLE_CHECK_MS))\n if (!existsSync(absPath)) return 0\n return statSync(absPath).size\n }\n return currSize\n }\n\n const processFile = async (filePath: string) => {\n const ext = extname(filePath).toLowerCase()\n if (!SUPPORTED_EXTENSIONS.has(ext)) return\n\n const fileName = basename(filePath)\n try {\n const absPath = resolve(dir, filePath)\n // 경로 순회 방지 — 감시 디렉토리 외부 파일 차단\n const realDir = resolve(dir)\n const dirWithSep = realDir.endsWith(sep) ? realDir : realDir + sep\n if (!absPath.startsWith(dirWithSep) && absPath !== realDir) return\n if (!existsSync(absPath)) return\n\n const fileSize = await waitForStableSize(absPath)\n if (fileSize > MAX_FILE_SIZE || fileSize === 0) return\n\n log(`[kordoc watch] 변환 중: ${fileName}`)\n\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const parseOptions = pages ? { pages } : undefined\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n log(`[kordoc watch] 실패: ${fileName} — ${result.error}`)\n await sendWebhook(webhook, { file: fileName, format: detectFormat(arrayBuffer), success: false, error: result.error })\n return\n }\n\n const output = format === \"json\" ? JSON.stringify(result, null, 2) : result.markdown\n\n if (outDir) {\n const outExt = format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n log(`[kordoc watch] 완료: ${fileName} → ${basename(outPath)}`)\n } else {\n process.stdout.write(output + \"\\n\")\n }\n\n await sendWebhook(webhook, {\n file: fileName,\n format: result.fileType,\n success: true,\n markdown: format === \"markdown\" ? output.substring(0, 1000) : undefined,\n })\n } catch (err) {\n log(`[kordoc watch] 에러: ${fileName} — ${err instanceof Error ? err.message : err}`)\n }\n }\n\n const handler = (_event: string | null, filename: string | Buffer | null) => {\n if (!filename) return\n const filePath = filename.toString()\n const existing = pending.get(filePath)\n if (existing) clearTimeout(existing)\n pending.set(filePath, setTimeout(() => {\n pending.delete(filePath)\n processFile(filePath).catch(() => {})\n }, DEBOUNCE_MS))\n }\n\n // fs.watch recursive: Node 18+ macOS/Windows, Node 22+ Linux\n // Linux Node 22 미만은 recursive 미지원 → graceful degradation (최상위 디렉토리만 감시)\n try {\n watch(dir, { recursive: true }, handler)\n } catch {\n process.stderr.write(\"[kordoc watch] 경고: 하위 디렉토리 감시 미지원 (Node.js 22+ 또는 macOS/Windows 필요) — 최상위만 감시\\n\")\n watch(dir, handler)\n }\n\n // 프로세스 종료 방지 (Ctrl+C로 종료)\n return new Promise(() => {})\n}\n\n/** Webhook URL 검증 — SSRF 방지: http/https만 허용, localhost/private IP 차단 */\nfunction validateWebhookUrl(url: string): void {\n let parsed: URL\n try {\n parsed = new URL(url)\n } catch {\n throw new Error(`유효하지 않은 webhook URL: ${url}`)\n }\n if (parsed.protocol !== \"http:\" && parsed.protocol !== \"https:\") {\n throw new Error(`허용되지 않는 webhook 프로토콜: ${parsed.protocol}`)\n }\n const hostname = parsed.hostname.toLowerCase()\n if (\n hostname === \"localhost\" ||\n hostname === \"[::1]\" ||\n hostname.startsWith(\"127.\") ||\n hostname.startsWith(\"10.\") ||\n hostname.startsWith(\"192.168.\") ||\n /^172\\.(1[6-9]|2\\d|3[01])\\./.test(hostname) ||\n hostname === \"0.0.0.0\" ||\n hostname.startsWith(\"169.254.\") ||\n hostname.endsWith(\".local\") ||\n // IPv6 사설 대역\n hostname.startsWith(\"[fc\") ||\n hostname.startsWith(\"[fd\") ||\n hostname.startsWith(\"[fe80:\") ||\n hostname === \"[::0]\" ||\n hostname === \"[::]\" ||\n // 클라우드 메타데이터 엔드포인트\n hostname === \"metadata.google.internal\" ||\n hostname === \"metadata.google\" ||\n // 16진수/8진수 IP 인코딩 우회 방지\n /^0x[0-9a-f]+$/i.test(hostname) ||\n /^0[0-7]+$/.test(hostname)\n ) {\n throw new Error(`내부 네트워크 대상 webhook은 허용되지 않습니다: ${hostname}`)\n }\n}\n\nasync function sendWebhook(url: string | undefined, payload: Record<string, unknown>): Promise<void> {\n if (!url) return\n try {\n validateWebhookUrl(url)\n await fetch(url, {\n method: \"POST\",\n headers: { \"Content-Type\": \"application/json\" },\n body: JSON.stringify({ ...payload, timestamp: new Date().toISOString() }),\n })\n } catch {\n // webhook 실패는 조용히 무시\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;AAEA,SAAS,OAAO,cAAc,eAAe,WAAW,UAAU,kBAAkB;AACpF,SAAS,UAAU,SAAS,SAAS,WAAW;AAKhD,IAAM,uBAAuB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAChF,IAAM,cAAc;AAEpB,IAAM,kBAAkB;AACxB,IAAM,gBAAgB,MAAM,OAAO;AAUnC,eAAsB,eAAe,SAAsC;AACzE,QAAM,EAAE,KAAK,QAAQ,SAAS,SAAS,YAAY,OAAO,OAAO,IAAI;AAErE,MAAI,CAAC,WAAW,GAAG,EAAG,OAAM,IAAI,MAAM,gFAAoB,GAAG,EAAE;AAC/D,MAAI,QAAS,oBAAmB,OAAO;AACvC,MAAI,OAAQ,WAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AAEjD,QAAM,MAAM,SAAS,MAAM;AAAA,EAAC,IAAI,CAAC,QAAgB,QAAQ,OAAO,MAAM,MAAM,IAAI;AAChF,MAAI,6CAAyB,QAAQ,GAAG,CAAC,EAAE;AAC3C,MAAI,OAAQ,KAAI,gCAAsB,QAAQ,MAAM,CAAC,EAAE;AACvD,MAAI,QAAS,KAAI,gCAAsB,OAAO,EAAE;AAGhD,QAAM,UAAU,oBAAI,IAA2C;AAG/D,QAAM,oBAAoB,OAAO,YAAqC;AACpE,QAAI,WAAW,SAAS,OAAO,EAAE;AACjC,UAAM,IAAI,QAAQ,OAAK,WAAW,GAAG,eAAe,CAAC;AACrD,QAAI,CAAC,WAAW,OAAO,EAAG,QAAO;AACjC,UAAM,WAAW,SAAS,OAAO,EAAE;AACnC,QAAI,aAAa,UAAU;AAEzB,YAAM,IAAI,QAAQ,OAAK,WAAW,GAAG,eAAe,CAAC;AACrD,UAAI,CAAC,WAAW,OAAO,EAAG,QAAO;AACjC,aAAO,SAAS,OAAO,EAAE;AAAA,IAC3B;AACA,WAAO;AAAA,EACT;AAEA,QAAM,cAAc,OAAO,aAAqB;AAC9C,UAAM,MAAM,QAAQ,QAAQ,EAAE,YAAY;AAC1C,QAAI,CAAC,qBAAqB,IAAI,GAAG,EAAG;AAEpC,UAAM,WAAW,SAAS,QAAQ;AAClC,QAAI;AACF,YAAM,UAAU,QAAQ,KAAK,QAAQ;AAErC,YAAM,UAAU,QAAQ,GAAG;AAC3B,YAAM,aAAa,QAAQ,SAAS,GAAG,IAAI,UAAU,UAAU;AAC/D,UAAI,CAAC,QAAQ,WAAW,UAAU,KAAK,YAAY,QAAS;AAC5D,UAAI,CAAC,WAAW,OAAO,EAAG;AAE1B,YAAM,WAAW,MAAM,kBAAkB,OAAO;AAChD,UAAI,WAAW,iBAAiB,aAAa,EAAG;AAEhD,UAAI,uCAAwB,QAAQ,EAAE;AAEtC,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,eAAe,QAAQ,EAAE,MAAM,IAAI;AACzC,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,YAAI,gCAAsB,QAAQ,WAAM,OAAO,KAAK,EAAE;AACtD,cAAM,YAAY,SAAS,EAAE,MAAM,UAAU,QAAQ,aAAa,WAAW,GAAG,SAAS,OAAO,OAAO,OAAO,MAAM,CAAC;AACrH;AAAA,MACF;AAEA,YAAM,SAAS,WAAW,SAAS,KAAK,UAAU,QAAQ,MAAM,CAAC,IAAI,OAAO;AAE5E,UAAI,QAAQ;AACV,cAAM,SAAS,WAAW,SAAS,UAAU;AAC7C,cAAM,UAAU,QAAQ,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACpE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,gCAAsB,QAAQ,WAAM,SAAS,OAAO,CAAC,EAAE;AAAA,MAC7D,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAEA,YAAM,YAAY,SAAS;AAAA,QACzB,MAAM;AAAA,QACN,QAAQ,OAAO;AAAA,QACf,SAAS;AAAA,QACT,UAAU,WAAW,aAAa,OAAO,UAAU,GAAG,GAAI,IAAI;AAAA,MAChE,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,UAAI,gCAAsB,QAAQ,WAAM,eAAe,QAAQ,IAAI,UAAU,GAAG,EAAE;AAAA,IACpF;AAAA,EACF;AAEA,QAAM,UAAU,CAAC,QAAuB,aAAqC;AAC3E,QAAI,CAAC,SAAU;AACf,UAAM,WAAW,SAAS,SAAS;AACnC,UAAM,WAAW,QAAQ,IAAI,QAAQ;AACrC,QAAI,SAAU,cAAa,QAAQ;AACnC,YAAQ,IAAI,UAAU,WAAW,MAAM;AACrC,cAAQ,OAAO,QAAQ;AACvB,kBAAY,QAAQ,EAAE,MAAM,MAAM;AAAA,MAAC,CAAC;AAAA,IACtC,GAAG,WAAW,CAAC;AAAA,EACjB;AAIA,MAAI;AACF,UAAM,KAAK,EAAE,WAAW,KAAK,GAAG,OAAO;AAAA,EACzC,QAAQ;AACN,YAAQ,OAAO,MAAM,yMAAiF;AACtG,UAAM,KAAK,OAAO;AAAA,EACpB;AAGA,SAAO,IAAI,QAAQ,MAAM;AAAA,EAAC,CAAC;AAC7B;AAGA,SAAS,mBAAmB,KAAmB;AAC7C,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;AAAA,EACtB,QAAQ;AACN,UAAM,IAAI,MAAM,sDAAwB,GAAG,EAAE;AAAA,EAC/C;AACA,MAAI,OAAO,aAAa,WAAW,OAAO,aAAa,UAAU;AAC/D,UAAM,IAAI,MAAM,2EAAyB,OAAO,QAAQ,EAAE;AAAA,EAC5D;AACA,QAAM,WAAW,OAAO,SAAS,YAAY;AAC7C,MACE,aAAa,eACb,aAAa,WACb,SAAS,WAAW,MAAM,KAC1B,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,UAAU,KAC9B,6BAA6B,KAAK,QAAQ,KAC1C,aAAa,aACb,SAAS,WAAW,UAAU,KAC9B,SAAS,SAAS,QAAQ;AAAA,EAE1B,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,QAAQ,KAC5B,aAAa,WACb,aAAa;AAAA,EAEb,aAAa,8BACb,aAAa;AAAA,EAEb,iBAAiB,KAAK,QAAQ,KAC9B,YAAY,KAAK,QAAQ,GACzB;AACA,UAAM,IAAI,MAAM,uHAAkC,QAAQ,EAAE;AAAA,EAC9D;AACF;AAEA,eAAe,YAAY,KAAyB,SAAiD;AACnG,MAAI,CAAC,IAAK;AACV,MAAI;AACF,uBAAmB,GAAG;AACtB,UAAM,MAAM,KAAK;AAAA,MACf,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,EAAE,GAAG,SAAS,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE,CAAC;AAAA,IAC1E,CAAC;AAAA,EACH,QAAQ;AAAA,EAER;AACF;","names":[]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@clazic/kordoc",
3
- "version": "2.4.7",
3
+ "version": "2.4.8",
4
4
  "description": "Parse Korean documents (HWP, HWPX, PDF, XLSX, DOCX) to Markdown",
5
5
  "type": "module",
6
6
  "exports": {
@@ -27,7 +27,7 @@
27
27
  "prepublishOnly": "npm run test && npm run build",
28
28
  "build:worker": "tsup --config tsup.worker.config.ts",
29
29
  "deploy:worker": "npm run build:worker && npx wrangler deploy",
30
- "build:bin": "npm run build && pkg dist-binary/cli-bin.cjs --targets node18-macos-arm64,node18-macos-x64 --output kordoc"
30
+ "build:bin": "npm run build && pkg dist-binary/cli-bin.cjs --targets node18-macos-arm64,node18-macos-x64,node18-win-x64,node18-linux-x64 --output kordoc"
31
31
  },
32
32
  "keywords": [
33
33
  "hwp",
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/ocr/batch-provider.ts"],"sourcesContent":["/**\n * CLI 배치 OCR 프로바이더\n *\n * 여러 페이지 이미지를 단일 CLI 호출로 처리하여 API 호출 수를 대폭 감소.\n * gemini/claude: @file 멀티 참조, codex: --image 멀티 플래그\n *\n * 299페이지 기준:\n * - 기존: CLI 299회 호출 (~30분)\n * - 배치: CLI 3~6회 호출 (~3분)\n */\n\nimport { spawn, execSync } from \"child_process\"\nimport { writeFileSync, readFileSync, unlinkSync, mkdirSync } from \"fs\"\nimport { join } from \"path\"\nimport { tmpdir } from \"os\"\nimport type { StructuredOcrResult, BatchOcrProvider } from \"../types.js\"\n\n/** 배치 OCR 프롬프트 */\nconst BATCH_OCR_PROMPT =\n \"다음 문서 페이지 이미지들을 OCR하여 순수 Markdown으로 변환하세요.\\n\\n\" +\n \"규칙:\\n\" +\n \"- 각 페이지 결과 사이에 반드시 이 구분자를 삽입: <!-- PAGE_BREAK -->\\n\" +\n \"- 테이블은 Markdown 테이블 문법 사용 (| 구분, |---|---| 헤더 구분선 포함)\\n\" +\n \"- 병합된 셀은 해당 위치에 내용 기재\\n\" +\n \"- 헤딩은 글자 크기에 따라 ## ~ ###### 사용\\n\" +\n \"- 리스트는 - 또는 1. 사용\\n\" +\n \"- 이미지, 도형 등 비텍스트 요소는 무시\\n\" +\n \"- 원문의 읽기 순서와 구조를 유지\\n\" +\n \"- ```로 감싸지 말고 순수 Markdown만 출력\"\n\n/** 모드별 기본 배치 크기 (CLI 내부 타임아웃 + 실측 기반)\n *\n * gemini CLI: 10장 이상에서 AbortError 발생 (내부 타임아웃).\n * 5장 배치가 안정적으로 동작 확인 (35초/배치).\n * 299페이지 = 60배치 = 기존 299회 대비 80% 감소.\n */\nexport const DEFAULT_BATCH_SIZES: Record<string, number> = {\n gemini: 5,\n claude: 5,\n codex: 10,\n}\n\n/**\n * 임시 디렉토리 — gemini CLI는 cwd 하위 + gitignore 밖만 @참조 가능\n *\n * 숨김 처리:\n * - macOS/Linux: '.' 접두사로 기본 숨김 (ls -a 로만 표시)\n * - Windows: '.' 접두사 + attrib +h 로 숨김 속성 부여\n */\nlet _batchTempDir: string | null = null\nfunction getBatchTempDir(): string {\n if (!_batchTempDir) {\n _batchTempDir = join(process.cwd(), \".kordoc_ocr_tmp\")\n mkdirSync(_batchTempDir, { recursive: true })\n // Windows: dot-prefix만으로 숨김 처리 불충분 → attrib +h 추가\n if (process.platform === \"win32\") {\n try { execSync(`attrib +h \"${_batchTempDir}\"`, { stdio: \"ignore\" }) } catch { /* ignore */ }\n }\n }\n return _batchTempDir\n}\n\n/**\n * 배치 CLI 프로바이더 생성\n */\nexport function createBatchCliProvider(\n mode: \"gemini\" | \"claude\" | \"codex\",\n batchSize: number\n): BatchOcrProvider {\n return {\n __batch: true as const,\n batchSize,\n async processBatch(pages) {\n const results = new Map<number, StructuredOcrResult>()\n const tempDir = getBatchTempDir()\n const tempFiles: string[] = []\n\n try {\n // 1. Write all page images to temp files\n for (const { image, pageNum } of pages) {\n const path = join(tempDir, `batch-p${pageNum}.png`)\n writeFileSync(path, image)\n tempFiles.push(path)\n }\n\n // 2. Call CLI with all file references (비동기 — 병렬 배치 실행 가능)\n let output: string\n if (mode === \"codex\") {\n output = await callBatchCodexCli(tempFiles)\n } else {\n output = await callBatchCli(mode, tempFiles)\n }\n\n // 3. Parse response by PAGE_BREAK separator\n const cleaned = stripCodeFence(output.trim())\n const parts = cleaned.split(/<!--\\s*PAGE_BREAK\\s*-->/)\n .map(p => p.trim())\n .filter(p => p.length > 0)\n\n // 4. Map results to page numbers (best-effort if count mismatch)\n for (let i = 0; i < pages.length; i++) {\n const pageNum = pages[i].pageNum\n if (i < parts.length) {\n results.set(pageNum, { markdown: parts[i] })\n }\n // If fewer parts than pages, remaining pages get no result\n }\n } finally {\n // 5. Clean up temp files\n for (const f of tempFiles) {\n try { unlinkSync(f) } catch { /* ignore */ }\n }\n }\n\n return results\n },\n }\n}\n\n/**\n * 비동기 CLI 실행 헬퍼 — spawn + Promise 래핑.\n * spawnSync는 이벤트 루프를 차단하여 병렬 배치 실행 불가.\n */\nfunction spawnAsync(\n cmd: string,\n args: string[],\n opts: { timeoutMs: number; cwd?: string; stdin?: string }\n): Promise<{ stdout: string; stderr: string; exitCode: number }> {\n return new Promise((resolve, reject) => {\n const child = spawn(cmd, args, {\n cwd: opts.cwd,\n env: process.env,\n stdio: [\"pipe\", \"pipe\", \"pipe\"],\n })\n\n let stdout = \"\"\n let stderr = \"\"\n let killed = false\n\n child.stdout.setEncoding(\"utf-8\")\n child.stderr.setEncoding(\"utf-8\")\n child.stdout.on(\"data\", (d: string) => { stdout += d })\n child.stderr.on(\"data\", (d: string) => { stderr += d })\n\n const timer = setTimeout(() => {\n killed = true\n child.kill(\"SIGTERM\")\n }, opts.timeoutMs)\n\n if (opts.stdin !== undefined) {\n child.stdin.end(opts.stdin)\n } else {\n child.stdin.end()\n }\n\n child.on(\"close\", (code) => {\n clearTimeout(timer)\n if (killed) {\n reject(new Error(`타임아웃 (${Math.round(opts.timeoutMs / 1000)}초)`))\n } else {\n resolve({ stdout, stderr, exitCode: code ?? 1 })\n }\n })\n child.on(\"error\", (err) => {\n clearTimeout(timer)\n reject(err)\n })\n })\n}\n\n/** gemini/claude 배치 호출 (비동기) */\nasync function callBatchCli(mode: \"gemini\" | \"claude\", imagePaths: string[]): Promise<string> {\n const fileRefs = imagePaths.map(p => `@${p}`).join(\"\\n\")\n const prompt = `${BATCH_OCR_PROMPT}\\n\\n${fileRefs}`\n\n let args: string[]\n if (mode === \"gemini\") {\n const model = process.env.KORDOC_GEMINI_MODEL ?? \"gemini-2.5-flash\"\n args = [\"--prompt\", prompt, \"--yolo\", \"--model\", model]\n } else {\n args = [\"--print\", prompt]\n const model = process.env.KORDOC_CLAUDE_MODEL\n if (model) args.push(\"--model\", model)\n }\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = await spawnAsync(mode, args, {\n timeoutMs,\n ...(mode === \"claude\" ? { cwd: tmpdir() } : {}),\n })\n\n if (result.exitCode !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.exitCode}`\n throw new Error(`${mode} 배치 OCR 실패: ${errMsg}`)\n }\n\n return result.stdout || \"\"\n}\n\n/** codex 배치 호출 (비동기) — --image를 여러 번 지정 */\nasync function callBatchCodexCli(imagePaths: string[]): Promise<string> {\n const outPath = join(tmpdir(), `kordoc-codex-batch-${Date.now()}-${Math.random().toString(36).slice(2)}.txt`)\n try {\n const args = [\"exec\", BATCH_OCR_PROMPT]\n for (const p of imagePaths) {\n args.push(\"--image\", p)\n }\n args.push(\"--output-last-message\", outPath)\n const model = process.env.KORDOC_CODEX_MODEL\n if (model) args.push(\"--model\", model)\n\n const timeoutMs = 60_000 + imagePaths.length * 20_000\n const result = await spawnAsync(\"codex\", args, {\n timeoutMs,\n stdin: \"\",\n })\n\n if (result.exitCode !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.exitCode}`\n throw new Error(`codex 배치 OCR 실패: ${errMsg}`)\n }\n\n try {\n return readFileSync(outPath, \"utf-8\")\n } catch {\n return result.stdout || \"\"\n }\n } finally {\n try { unlinkSync(outPath) } catch { /* ignore */ }\n }\n}\n\n/** LLM 출력에서 코드 펜스 제거 (cli-provider.ts와 동일 로직) */\nfunction stripCodeFence(text: string): string {\n const match = text.match(/^```(?:markdown|md)?\\s*\\n([\\s\\S]*?)\\n```\\s*$/m)\n return match ? match[1].trim() : text\n}\n"],"mappings":";;;;AAWA,SAAS,OAAO,gBAAgB;AAChC,SAAS,eAAe,cAAc,YAAY,iBAAiB;AACnE,SAAS,YAAY;AACrB,SAAS,cAAc;AAIvB,IAAM,mBACJ;AAiBK,IAAM,sBAA8C;AAAA,EACzD,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,OAAO;AACT;AASA,IAAI,gBAA+B;AACnC,SAAS,kBAA0B;AACjC,MAAI,CAAC,eAAe;AAClB,oBAAgB,KAAK,QAAQ,IAAI,GAAG,iBAAiB;AACrD,cAAU,eAAe,EAAE,WAAW,KAAK,CAAC;AAE5C,QAAI,QAAQ,aAAa,SAAS;AAChC,UAAI;AAAE,iBAAS,cAAc,aAAa,KAAK,EAAE,OAAO,SAAS,CAAC;AAAA,MAAE,QAAQ;AAAA,MAAe;AAAA,IAC7F;AAAA,EACF;AACA,SAAO;AACT;AAKO,SAAS,uBACd,MACA,WACkB;AAClB,SAAO;AAAA,IACL,SAAS;AAAA,IACT;AAAA,IACA,MAAM,aAAa,OAAO;AACxB,YAAM,UAAU,oBAAI,IAAiC;AACrD,YAAM,UAAU,gBAAgB;AAChC,YAAM,YAAsB,CAAC;AAE7B,UAAI;AAEF,mBAAW,EAAE,OAAO,QAAQ,KAAK,OAAO;AACtC,gBAAM,OAAO,KAAK,SAAS,UAAU,OAAO,MAAM;AAClD,wBAAc,MAAM,KAAK;AACzB,oBAAU,KAAK,IAAI;AAAA,QACrB;AAGA,YAAI;AACJ,YAAI,SAAS,SAAS;AACpB,mBAAS,MAAM,kBAAkB,SAAS;AAAA,QAC5C,OAAO;AACL,mBAAS,MAAM,aAAa,MAAM,SAAS;AAAA,QAC7C;AAGA,cAAM,UAAU,eAAe,OAAO,KAAK,CAAC;AAC5C,cAAM,QAAQ,QAAQ,MAAM,yBAAyB,EAClD,IAAI,OAAK,EAAE,KAAK,CAAC,EACjB,OAAO,OAAK,EAAE,SAAS,CAAC;AAG3B,iBAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,gBAAM,UAAU,MAAM,CAAC,EAAE;AACzB,cAAI,IAAI,MAAM,QAAQ;AACpB,oBAAQ,IAAI,SAAS,EAAE,UAAU,MAAM,CAAC,EAAE,CAAC;AAAA,UAC7C;AAAA,QAEF;AAAA,MACF,UAAE;AAEA,mBAAW,KAAK,WAAW;AACzB,cAAI;AAAE,uBAAW,CAAC;AAAA,UAAE,QAAQ;AAAA,UAAe;AAAA,QAC7C;AAAA,MACF;AAEA,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAMA,SAAS,WACP,KACA,MACA,MAC+D;AAC/D,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,QAAQ,MAAM,KAAK,MAAM;AAAA,MAC7B,KAAK,KAAK;AAAA,MACV,KAAK,QAAQ;AAAA,MACb,OAAO,CAAC,QAAQ,QAAQ,MAAM;AAAA,IAChC,CAAC;AAED,QAAI,SAAS;AACb,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,UAAM,OAAO,YAAY,OAAO;AAChC,UAAM,OAAO,YAAY,OAAO;AAChC,UAAM,OAAO,GAAG,QAAQ,CAAC,MAAc;AAAE,gBAAU;AAAA,IAAE,CAAC;AACtD,UAAM,OAAO,GAAG,QAAQ,CAAC,MAAc;AAAE,gBAAU;AAAA,IAAE,CAAC;AAEtD,UAAM,QAAQ,WAAW,MAAM;AAC7B,eAAS;AACT,YAAM,KAAK,SAAS;AAAA,IACtB,GAAG,KAAK,SAAS;AAEjB,QAAI,KAAK,UAAU,QAAW;AAC5B,YAAM,MAAM,IAAI,KAAK,KAAK;AAAA,IAC5B,OAAO;AACL,YAAM,MAAM,IAAI;AAAA,IAClB;AAEA,UAAM,GAAG,SAAS,CAAC,SAAS;AAC1B,mBAAa,KAAK;AAClB,UAAI,QAAQ;AACV,eAAO,IAAI,MAAM,6BAAS,KAAK,MAAM,KAAK,YAAY,GAAI,CAAC,SAAI,CAAC;AAAA,MAClE,OAAO;AACL,gBAAQ,EAAE,QAAQ,QAAQ,UAAU,QAAQ,EAAE,CAAC;AAAA,MACjD;AAAA,IACF,CAAC;AACD,UAAM,GAAG,SAAS,CAAC,QAAQ;AACzB,mBAAa,KAAK;AAClB,aAAO,GAAG;AAAA,IACZ,CAAC;AAAA,EACH,CAAC;AACH;AAGA,eAAe,aAAa,MAA2B,YAAuC;AAC5F,QAAM,WAAW,WAAW,IAAI,OAAK,IAAI,CAAC,EAAE,EAAE,KAAK,IAAI;AACvD,QAAM,SAAS,GAAG,gBAAgB;AAAA;AAAA,EAAO,QAAQ;AAEjD,MAAI;AACJ,MAAI,SAAS,UAAU;AACrB,UAAM,QAAQ,QAAQ,IAAI,uBAAuB;AACjD,WAAO,CAAC,YAAY,QAAQ,UAAU,WAAW,KAAK;AAAA,EACxD,OAAO;AACL,WAAO,CAAC,WAAW,MAAM;AACzB,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAAA,EACvC;AAEA,QAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,QAAM,SAAS,MAAM,WAAW,MAAM,MAAM;AAAA,IAC1C;AAAA,IACA,GAAI,SAAS,WAAW,EAAE,KAAK,OAAO,EAAE,IAAI,CAAC;AAAA,EAC/C,CAAC;AAED,MAAI,OAAO,aAAa,GAAG;AACzB,UAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,QAAQ;AACpE,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,MAAM,EAAE;AAAA,EAChD;AAEA,SAAO,OAAO,UAAU;AAC1B;AAGA,eAAe,kBAAkB,YAAuC;AACtE,QAAM,UAAU,KAAK,OAAO,GAAG,sBAAsB,KAAK,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,CAAC,CAAC,MAAM;AAC5G,MAAI;AACF,UAAM,OAAO,CAAC,QAAQ,gBAAgB;AACtC,eAAW,KAAK,YAAY;AAC1B,WAAK,KAAK,WAAW,CAAC;AAAA,IACxB;AACA,SAAK,KAAK,yBAAyB,OAAO;AAC1C,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAErC,UAAM,YAAY,MAAS,WAAW,SAAS;AAC/C,UAAM,SAAS,MAAM,WAAW,SAAS,MAAM;AAAA,MAC7C;AAAA,MACA,OAAO;AAAA,IACT,CAAC;AAED,QAAI,OAAO,aAAa,GAAG;AACzB,YAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,QAAQ;AACpE,YAAM,IAAI,MAAM,wCAAoB,MAAM,EAAE;AAAA,IAC9C;AAEA,QAAI;AACF,aAAO,aAAa,SAAS,OAAO;AAAA,IACtC,QAAQ;AACN,aAAO,OAAO,UAAU;AAAA,IAC1B;AAAA,EACF,UAAE;AACA,QAAI;AAAE,iBAAW,OAAO;AAAA,IAAE,QAAQ;AAAA,IAAe;AAAA,EACnD;AACF;AAGA,SAAS,eAAe,MAAsB;AAC5C,QAAM,QAAQ,KAAK,MAAM,+CAA+C;AACxE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;","names":[]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/ocr/markdown-to-blocks.ts"],"sourcesContent":["/**\n * Markdown → IRBlock[] 역파싱\n *\n * Vision LLM(gemini/claude/codex 등)이 반환한 Markdown 문자열을\n * kordoc의 IRBlock[] 중간 표현으로 변환.\n * 기존 blocksToMarkdown()의 역방향 처리.\n */\n\nimport type { IRBlock, IRTable, IRCell } from \"../types.js\"\n\n/**\n * Markdown 문자열을 IRBlock[] 배열로 변환.\n *\n * 지원 요소:\n * - 헤딩: # ~ ######\n * - 테이블: | col1 | col2 | (파이프 구분, |---|---| 구분선 포함)\n * - 순서/비순서 리스트: - / 1.\n * - 구분선: ---, ***, ___\n * - 일반 텍스트 (paragraph)\n */\nexport function markdownToBlocks(markdown: string, pageNumber: number): IRBlock[] {\n const blocks: IRBlock[] = []\n const lines = markdown.split(\"\\n\")\n let i = 0\n\n while (i < lines.length) {\n const line = lines[i]\n\n // 빈 줄 스킵\n if (line.trim() === \"\") {\n i++\n continue\n }\n\n // 1. 헤딩: # ~ ######\n const headingMatch = line.match(/^(#{1,6})\\s+(.+)$/)\n if (headingMatch) {\n blocks.push({\n type: \"heading\",\n level: headingMatch[1].length,\n text: headingMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 2. 구분선: ---, ***, ___\n if (/^[-*_]{3,}\\s*$/.test(line.trim())) {\n blocks.push({ type: \"separator\", pageNumber })\n i++\n continue\n }\n\n // 3. 테이블: | 로 시작하는 연속 행 수집\n if (line.trim().startsWith(\"|\")) {\n const tableLines: string[] = []\n while (i < lines.length && lines[i].trim().startsWith(\"|\")) {\n tableLines.push(lines[i])\n i++\n }\n const table = parseMarkdownTable(tableLines)\n if (table) {\n blocks.push({ type: \"table\", table, pageNumber })\n }\n continue\n }\n\n // 4. 비순서 리스트: -, *, +\n const ulMatch = line.match(/^(\\s*)[-*+]\\s+(.+)$/)\n if (ulMatch) {\n blocks.push({\n type: \"list\",\n listType: \"unordered\",\n text: ulMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 5. 순서 리스트: 1.\n const olMatch = line.match(/^(\\s*)\\d+\\.\\s+(.+)$/)\n if (olMatch) {\n blocks.push({\n type: \"list\",\n listType: \"ordered\",\n text: olMatch[2].trim(),\n pageNumber,\n })\n i++\n continue\n }\n\n // 6. 일반 텍스트 — 구조적 행이 나올 때까지 병합\n const paraLines: string[] = []\n while (i < lines.length && lines[i].trim() !== \"\" && !isStructuralLine(lines[i])) {\n paraLines.push(lines[i].trim())\n i++\n }\n if (paraLines.length > 0) {\n blocks.push({\n type: \"paragraph\",\n text: paraLines.join(\"\\n\"),\n pageNumber,\n })\n }\n }\n\n return blocks\n}\n\n/**\n * 구조적 행 판별 — paragraph 병합 중단 트리거.\n */\nfunction isStructuralLine(line: string): boolean {\n if (/^#{1,6}\\s+/.test(line)) return true\n if (line.trim().startsWith(\"|\")) return true\n if (/^[-*_]{3,}\\s*$/.test(line.trim())) return true\n if (/^\\s*[-*+]\\s+/.test(line)) return true\n if (/^\\s*\\d+\\.\\s+/.test(line)) return true\n return false\n}\n\n/**\n * Markdown 테이블 행 배열을 IRTable로 변환.\n *\n * 구분선 행(|---|---|)은 제거 후 데이터 행만 파싱.\n * hasHeader: 구분선이 있었으면 true.\n */\nfunction parseMarkdownTable(lines: string[]): IRTable | null {\n const hasSeparator = lines.some(line => /^\\|[\\s:|-]+\\|$/.test(line.trim()))\n\n const rows: IRCell[][] = []\n let maxCols = 0\n\n for (const line of lines) {\n // 구분선 행 스킵: |---|---| 패턴\n if (/^\\|\\s*:?-+:?\\s*(\\|\\s*:?-+:?\\s*)+\\|?\\s*$/.test(line.trim())) continue\n\n const parts = line.split(\"|\")\n // 앞뒤 빈 요소 제거 (| 로 시작/종료하는 행)\n const cells: IRCell[] = parts\n .slice(1, parts[parts.length - 1].trim() === \"\" ? -1 : undefined)\n .map(cell => ({\n text: cell.trim(),\n colSpan: 1,\n rowSpan: 1,\n }))\n\n if (cells.length > 0) {\n rows.push(cells)\n maxCols = Math.max(maxCols, cells.length)\n }\n }\n\n if (rows.length === 0) return null\n\n // 열 수 통일 (부족한 셀은 빈 셀로 채움)\n for (const row of rows) {\n while (row.length < maxCols) {\n row.push({ text: \"\", colSpan: 1, rowSpan: 1 })\n }\n }\n\n return {\n rows: rows.length,\n cols: maxCols,\n cells: rows,\n hasHeader: hasSeparator && rows.length > 1,\n }\n}\n"],"mappings":";;;AAoBO,SAAS,iBAAiB,UAAkB,YAA+B;AAChF,QAAM,SAAoB,CAAC;AAC3B,QAAM,QAAQ,SAAS,MAAM,IAAI;AACjC,MAAI,IAAI;AAER,SAAO,IAAI,MAAM,QAAQ;AACvB,UAAM,OAAO,MAAM,CAAC;AAGpB,QAAI,KAAK,KAAK,MAAM,IAAI;AACtB;AACA;AAAA,IACF;AAGA,UAAM,eAAe,KAAK,MAAM,mBAAmB;AACnD,QAAI,cAAc;AAChB,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,OAAO,aAAa,CAAC,EAAE;AAAA,QACvB,MAAM,aAAa,CAAC,EAAE,KAAK;AAAA,QAC3B;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,QAAI,iBAAiB,KAAK,KAAK,KAAK,CAAC,GAAG;AACtC,aAAO,KAAK,EAAE,MAAM,aAAa,WAAW,CAAC;AAC7C;AACA;AAAA,IACF;AAGA,QAAI,KAAK,KAAK,EAAE,WAAW,GAAG,GAAG;AAC/B,YAAM,aAAuB,CAAC;AAC9B,aAAO,IAAI,MAAM,UAAU,MAAM,CAAC,EAAE,KAAK,EAAE,WAAW,GAAG,GAAG;AAC1D,mBAAW,KAAK,MAAM,CAAC,CAAC;AACxB;AAAA,MACF;AACA,YAAM,QAAQ,mBAAmB,UAAU;AAC3C,UAAI,OAAO;AACT,eAAO,KAAK,EAAE,MAAM,SAAS,OAAO,WAAW,CAAC;AAAA,MAClD;AACA;AAAA,IACF;AAGA,UAAM,UAAU,KAAK,MAAM,qBAAqB;AAChD,QAAI,SAAS;AACX,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,UAAU;AAAA,QACV,MAAM,QAAQ,CAAC,EAAE,KAAK;AAAA,QACtB;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,UAAM,UAAU,KAAK,MAAM,qBAAqB;AAChD,QAAI,SAAS;AACX,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,UAAU;AAAA,QACV,MAAM,QAAQ,CAAC,EAAE,KAAK;AAAA,QACtB;AAAA,MACF,CAAC;AACD;AACA;AAAA,IACF;AAGA,UAAM,YAAsB,CAAC;AAC7B,WAAO,IAAI,MAAM,UAAU,MAAM,CAAC,EAAE,KAAK,MAAM,MAAM,CAAC,iBAAiB,MAAM,CAAC,CAAC,GAAG;AAChF,gBAAU,KAAK,MAAM,CAAC,EAAE,KAAK,CAAC;AAC9B;AAAA,IACF;AACA,QAAI,UAAU,SAAS,GAAG;AACxB,aAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,MAAM,UAAU,KAAK,IAAI;AAAA,QACzB;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO;AACT;AAKA,SAAS,iBAAiB,MAAuB;AAC/C,MAAI,aAAa,KAAK,IAAI,EAAG,QAAO;AACpC,MAAI,KAAK,KAAK,EAAE,WAAW,GAAG,EAAG,QAAO;AACxC,MAAI,iBAAiB,KAAK,KAAK,KAAK,CAAC,EAAG,QAAO;AAC/C,MAAI,eAAe,KAAK,IAAI,EAAG,QAAO;AACtC,MAAI,eAAe,KAAK,IAAI,EAAG,QAAO;AACtC,SAAO;AACT;AAQA,SAAS,mBAAmB,OAAiC;AAC3D,QAAM,eAAe,MAAM,KAAK,UAAQ,iBAAiB,KAAK,KAAK,KAAK,CAAC,CAAC;AAE1E,QAAM,OAAmB,CAAC;AAC1B,MAAI,UAAU;AAEd,aAAW,QAAQ,OAAO;AAExB,QAAI,0CAA0C,KAAK,KAAK,KAAK,CAAC,EAAG;AAEjE,UAAM,QAAQ,KAAK,MAAM,GAAG;AAE5B,UAAM,QAAkB,MACrB,MAAM,GAAG,MAAM,MAAM,SAAS,CAAC,EAAE,KAAK,MAAM,KAAK,KAAK,MAAS,EAC/D,IAAI,WAAS;AAAA,MACZ,MAAM,KAAK,KAAK;AAAA,MAChB,SAAS;AAAA,MACT,SAAS;AAAA,IACX,EAAE;AAEJ,QAAI,MAAM,SAAS,GAAG;AACpB,WAAK,KAAK,KAAK;AACf,gBAAU,KAAK,IAAI,SAAS,MAAM,MAAM;AAAA,IAC1C;AAAA,EACF;AAEA,MAAI,KAAK,WAAW,EAAG,QAAO;AAG9B,aAAW,OAAO,MAAM;AACtB,WAAO,IAAI,SAAS,SAAS;AAC3B,UAAI,KAAK,EAAE,MAAM,IAAI,SAAS,GAAG,SAAS,EAAE,CAAC;AAAA,IAC/C;AAAA,EACF;AAEA,SAAO;AAAA,IACL,MAAM,KAAK;AAAA,IACX,MAAM;AAAA,IACN,OAAO;AAAA,IACP,WAAW,gBAAgB,KAAK,SAAS;AAAA,EAC3C;AACF;","names":[]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/ocr/cli-provider.ts"],"sourcesContent":["/**\n * CLI 기반 OCR 프로바이더\n *\n * gemini / claude / codex / ollama CLI를 subprocess로 호출하여\n * PDF 페이지 이미지를 Markdown으로 변환.\n *\n * 이미지 전달 방식:\n * - gemini: -p \"프롬프트 @이미지경로\" (@ 파일 참조)\n * - claude: -p \"프롬프트 @이미지경로\" (@ 파일 참조, --print 모드)\n * - codex: exec -i 이미지경로 \"프롬프트\" (-i/--image 플래그)\n * - ollama: REST API (localhost:11434) — CLI는 이미지 입력 미지원\n */\n\nimport { spawnSync } from \"child_process\"\nimport { writeFileSync, readFileSync, unlinkSync, mkdirSync } from \"fs\"\nimport { join } from \"path\"\nimport { tmpdir } from \"os\"\nimport type { OcrMode, StructuredOcrResult } from \"../types.js\"\n\n/** OCR 프롬프트 — 모든 CLI 공통 */\nconst OCR_PROMPT = `이 PDF 페이지 이미지에서 텍스트와 테이블을 추출하여 순수 Markdown으로 변환하세요.\n규칙:\n- 테이블은 Markdown 테이블 문법 사용 (| 구분, |---|---| 헤더 구분선 포함)\n- 병합된 셀은 해당 위치에 내용 기재\n- 헤딩은 글자 크기에 따라 ## ~ ###### 사용\n- 리스트는 - 또는 1. 사용\n- 이미지, 도형 등 비텍스트 요소는 무시\n- 원문의 읽기 순서와 구조를 유지\n- \\`\\`\\`로 감싸지 말고 순수 Markdown만 출력`\n\n/** 임시 디렉토리 (프로세스당 1회 생성)\n *\n * gemini CLI는 /tmp/ 등 시스템 임시 디렉토리를 워크스페이스 외부로 간주하여\n * @파일참조 시 접근을 거부할 수 있음. cwd 하위 폴더를 사용하면 모든 CLI에서 접근 가능.\n *\n * ⚠️ .gitignore에 포함된 경로(예: .kordoc-tmp/)는 gemini CLI가 무시하므로\n * 반드시 gitignore되지 않는 이름 사용. 파일은 try/finally로 즉시 정리.\n */\nlet _tempDir: string | null = null\nfunction getTempDir(): string {\n if (!_tempDir) {\n _tempDir = join(process.cwd(), \"_kordoc_ocr_tmp\")\n mkdirSync(_tempDir, { recursive: true })\n }\n return _tempDir\n}\n\n/**\n * CLI OcrProvider 생성.\n *\n * @param mode - 사용할 CLI (gemini, claude, codex, ollama)\n * @returns OcrProvider 함수 (StructuredOcrResult 반환)\n */\nexport function createCliOcrProvider(\n mode: Exclude<OcrMode, \"auto\" | \"off\" | \"tesseract\">\n): (pageImage: Uint8Array, pageNumber: number, mimeType: \"image/png\") => Promise<StructuredOcrResult> {\n return async (pageImage: Uint8Array, pageNumber: number): Promise<StructuredOcrResult> => {\n const tempPath = join(getTempDir(), `page-${pageNumber}.png`)\n\n try {\n writeFileSync(tempPath, pageImage)\n\n let output: string\n if (mode === \"ollama\") {\n output = await callOllamaApi(tempPath)\n } else {\n output = callCli(mode, tempPath)\n }\n\n return { markdown: stripCodeFence(output.trim()) }\n } finally {\n try { unlinkSync(tempPath) } catch { /* 임시 파일 정리 실패 무시 */ }\n }\n }\n}\n\n/**\n * CLI 실행 — gemini / claude / codex\n *\n * @throws CLI 실행 실패 또는 타임아웃(180초) 시 Error\n */\nfunction callCli(mode: string, imagePath: string): string {\n // codex는 --output-last-message로 대화 헤더 없는 깔끔한 출력 사용\n if (mode === \"codex\") {\n return callCodexCli(imagePath)\n }\n\n const args = buildCliArgs(mode, imagePath)\n\n const result = spawnSync(mode, args, {\n encoding: \"utf-8\",\n timeout: 600_000,\n maxBuffer: 10 * 1024 * 1024,\n // claude: /tmp에서 실행하여 프로젝트 CLAUDE.md의 규칙 간섭 방지\n ...(mode === \"claude\" ? { cwd: tmpdir() } : {}),\n })\n\n if (result.error) {\n throw new Error(`${mode} CLI 실행 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`${mode} OCR 실패: ${errMsg}`)\n }\n\n return result.stdout || \"\"\n}\n\n/**\n * codex exec 실행 — --output-last-message로 대화 헤더 없는 깔끔한 출력.\n * 인자 순서: `codex exec <prompt> --image <file> --output-last-message <outfile>`\n */\nfunction callCodexCli(imagePath: string): string {\n // 출력 파일은 /tmp/ 사용 — codex sandbox는 cwd 내 쓰기를 막을 수 있음\n const outPath = join(tmpdir(), `kordoc-codex-out-${Date.now()}.txt`)\n try {\n const args = [\"exec\", OCR_PROMPT, \"--image\", imagePath, \"--output-last-message\", outPath]\n const model = process.env.KORDOC_CODEX_MODEL\n if (model) args.push(\"--model\", model)\n\n const result = spawnSync(\"codex\", args, {\n encoding: \"utf-8\",\n timeout: 180_000,\n maxBuffer: 10 * 1024 * 1024,\n input: \"\", // stdin EOF 즉시 전달 (대화형 입력 차단)\n })\n\n if (result.error) {\n throw new Error(`codex CLI 실행 실패: ${result.error.message}`)\n }\n if (result.status !== 0) {\n const errMsg = result.stderr?.trim() || `exit code ${result.status}`\n throw new Error(`codex OCR 실패: ${errMsg}`)\n }\n\n // --output-last-message 파일에서 읽기 (없으면 stdout 폴백)\n try {\n return readFileSync(outPath, \"utf-8\")\n } catch {\n return result.stdout || \"\"\n }\n } finally {\n try { unlinkSync(outPath) } catch { /* 무시 */ }\n }\n}\n\n/**\n * CLI별 인자 배열 생성.\n *\n * gemini: [\"--prompt\", \"프롬프트 @이미지경로\", \"--yolo\"]\n * - -y/--yolo: 자동 승인 (OCR은 도구 사용 없으므로 실질적 영향 없음)\n * - @ 파일 참조로 이미지를 컨텍스트에 포함\n *\n * claude: [\"--print\", \"프롬프트 @이미지경로\"]\n * - --print(-p): 비대화형 출력 모드\n * - @ 파일 참조로 이미지를 컨텍스트에 포함\n *\n * codex: callCodexCli()에서 별도 처리\n * - `codex exec <prompt> --image <file> --output-last-message <outfile>`\n * - 프롬프트가 --image보다 앞에 위치해야 함 (인자 순서 중요)\n *\n * ⚠️ CLI 버전에 따라 문법이 다를 수 있음. 업데이트 시 --help 재확인 필요.\n */\nfunction buildCliArgs(mode: string, imagePath: string): string[] {\n const promptWithImage = `${OCR_PROMPT}\n\n이미지: @${imagePath}`\n\n switch (mode) {\n case \"gemini\": {\n const args = [\"--prompt\", promptWithImage, \"--yolo\"]\n const model = process.env.KORDOC_GEMINI_MODEL\n if (model) args.push(\"--model\", model)\n return args\n }\n\n case \"claude\": {\n const args = [\"--print\", promptWithImage]\n const model = process.env.KORDOC_CLAUDE_MODEL\n if (model) args.push(\"--model\", model)\n return args\n }\n\n default:\n throw new Error(`지원하지 않는 CLI: ${mode}`)\n }\n}\n\n/**\n * Ollama REST API 호출 — CLI는 이미지 입력을 지원하지 않으므로 API 직접 사용.\n *\n * 기본 모델: KORDOC_OLLAMA_MODEL 환경변수 또는 \"gemma4:27b\"\n * 기본 호스트: KORDOC_OLLAMA_HOST 환경변수 또는 \"http://localhost:11434\"\n *\n * @throws Ollama 서버 미실행 또는 응답 오류 시 Error\n */\nasync function callOllamaApi(imagePath: string): Promise<string> {\n const { readFileSync } = await import(\"fs\")\n const imageBase64 = readFileSync(imagePath).toString(\"base64\")\n\n const model = process.env.KORDOC_OLLAMA_MODEL || \"qwen3-vl:8b\"\n const host = process.env.KORDOC_OLLAMA_HOST || \"http://localhost:11434\"\n const timeoutMs = Number(process.env.KORDOC_OLLAMA_TIMEOUT) || 120_000\n\n const response = await fetch(`${host}/api/chat`, {\n method: \"POST\",\n headers: { \"Content-Type\": \"application/json\" },\n body: JSON.stringify({\n model,\n messages: [{\n role: \"user\",\n content: OCR_PROMPT,\n images: [imageBase64],\n }],\n stream: false,\n }),\n signal: AbortSignal.timeout(timeoutMs),\n })\n\n if (!response.ok) {\n throw new Error(`Ollama API 오류: ${response.status} ${response.statusText}`)\n }\n\n const data = await response.json() as { message?: { content?: string } }\n return data.message?.content || \"\"\n}\n\n/**\n * LLM 출력에서 코드 펜스 제거.\n * LLM이 가끔 결과를 ```markdown ... ``` 으로 감싸는 경우 처리.\n */\nfunction stripCodeFence(text: string): string {\n const match = text.match(/^```(?:markdown|md)?\\s*([\\s\\S]*?)```\\s*$/m)\n return match ? match[1].trim() : text\n}\n"],"mappings":";;;AAaA,SAAS,iBAAiB;AAC1B,SAAS,eAAe,cAAc,YAAY,iBAAiB;AACnE,SAAS,YAAY;AACrB,SAAS,cAAc;AAIvB,IAAM,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAkBnB,IAAI,WAA0B;AAC9B,SAAS,aAAqB;AAC5B,MAAI,CAAC,UAAU;AACb,eAAW,KAAK,QAAQ,IAAI,GAAG,iBAAiB;AAChD,cAAU,UAAU,EAAE,WAAW,KAAK,CAAC;AAAA,EACzC;AACA,SAAO;AACT;AAQO,SAAS,qBACd,MACoG;AACpG,SAAO,OAAO,WAAuB,eAAqD;AACxF,UAAM,WAAW,KAAK,WAAW,GAAG,QAAQ,UAAU,MAAM;AAE5D,QAAI;AACF,oBAAc,UAAU,SAAS;AAEjC,UAAI;AACJ,UAAI,SAAS,UAAU;AACrB,iBAAS,MAAM,cAAc,QAAQ;AAAA,MACvC,OAAO;AACL,iBAAS,QAAQ,MAAM,QAAQ;AAAA,MACjC;AAEA,aAAO,EAAE,UAAU,eAAe,OAAO,KAAK,CAAC,EAAE;AAAA,IACnD,UAAE;AACA,UAAI;AAAE,mBAAW,QAAQ;AAAA,MAAE,QAAQ;AAAA,MAAuB;AAAA,IAC5D;AAAA,EACF;AACF;AAOA,SAAS,QAAQ,MAAc,WAA2B;AAExD,MAAI,SAAS,SAAS;AACpB,WAAO,aAAa,SAAS;AAAA,EAC/B;AAEA,QAAM,OAAO,aAAa,MAAM,SAAS;AAEzC,QAAM,SAAS,UAAU,MAAM,MAAM;AAAA,IACnC,UAAU;AAAA,IACV,SAAS;AAAA,IACT,WAAW,KAAK,OAAO;AAAA;AAAA,IAEvB,GAAI,SAAS,WAAW,EAAE,KAAK,OAAO,EAAE,IAAI,CAAC;AAAA,EAC/C,CAAC;AAED,MAAI,OAAO,OAAO;AAChB,UAAM,IAAI,MAAM,GAAG,IAAI,mCAAe,OAAO,MAAM,OAAO,EAAE;AAAA,EAC9D;AACA,MAAI,OAAO,WAAW,GAAG;AACvB,UAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,UAAM,IAAI,MAAM,GAAG,IAAI,sBAAY,MAAM,EAAE;AAAA,EAC7C;AAEA,SAAO,OAAO,UAAU;AAC1B;AAMA,SAAS,aAAa,WAA2B;AAE/C,QAAM,UAAU,KAAK,OAAO,GAAG,oBAAoB,KAAK,IAAI,CAAC,MAAM;AACnE,MAAI;AACF,UAAM,OAAO,CAAC,QAAQ,YAAY,WAAW,WAAW,yBAAyB,OAAO;AACxF,UAAM,QAAQ,QAAQ,IAAI;AAC1B,QAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AAErC,UAAM,SAAS,UAAU,SAAS,MAAM;AAAA,MACtC,UAAU;AAAA,MACV,SAAS;AAAA,MACT,WAAW,KAAK,OAAO;AAAA,MACvB,OAAO;AAAA;AAAA,IACT,CAAC;AAED,QAAI,OAAO,OAAO;AAChB,YAAM,IAAI,MAAM,wCAAoB,OAAO,MAAM,OAAO,EAAE;AAAA,IAC5D;AACA,QAAI,OAAO,WAAW,GAAG;AACvB,YAAM,SAAS,OAAO,QAAQ,KAAK,KAAK,aAAa,OAAO,MAAM;AAClE,YAAM,IAAI,MAAM,2BAAiB,MAAM,EAAE;AAAA,IAC3C;AAGA,QAAI;AACF,aAAO,aAAa,SAAS,OAAO;AAAA,IACtC,QAAQ;AACN,aAAO,OAAO,UAAU;AAAA,IAC1B;AAAA,EACF,UAAE;AACA,QAAI;AAAE,iBAAW,OAAO;AAAA,IAAE,QAAQ;AAAA,IAAW;AAAA,EAC/C;AACF;AAmBA,SAAS,aAAa,MAAc,WAA6B;AAC/D,QAAM,kBAAkB,GAAG,UAAU;AAAA;AAAA,uBAE/B,SAAS;AAEf,UAAQ,MAAM;AAAA,IACZ,KAAK,UAAU;AACb,YAAM,OAAO,CAAC,YAAY,iBAAiB,QAAQ;AACnD,YAAM,QAAQ,QAAQ,IAAI;AAC1B,UAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AACrC,aAAO;AAAA,IACT;AAAA,IAEA,KAAK,UAAU;AACb,YAAM,OAAO,CAAC,WAAW,eAAe;AACxC,YAAM,QAAQ,QAAQ,IAAI;AAC1B,UAAI,MAAO,MAAK,KAAK,WAAW,KAAK;AACrC,aAAO;AAAA,IACT;AAAA,IAEA;AACE,YAAM,IAAI,MAAM,8CAAgB,IAAI,EAAE;AAAA,EAC1C;AACF;AAUA,eAAe,cAAc,WAAoC;AAC/D,QAAM,EAAE,cAAAA,cAAa,IAAI,MAAM,OAAO,IAAI;AAC1C,QAAM,cAAcA,cAAa,SAAS,EAAE,SAAS,QAAQ;AAE7D,QAAM,QAAQ,QAAQ,IAAI,uBAAuB;AACjD,QAAM,OAAO,QAAQ,IAAI,sBAAsB;AAC/C,QAAM,YAAY,OAAO,QAAQ,IAAI,qBAAqB,KAAK;AAE/D,QAAM,WAAW,MAAM,MAAM,GAAG,IAAI,aAAa;AAAA,IAC/C,QAAQ;AAAA,IACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,IAC9C,MAAM,KAAK,UAAU;AAAA,MACnB;AAAA,MACA,UAAU,CAAC;AAAA,QACT,MAAM;AAAA,QACN,SAAS;AAAA,QACT,QAAQ,CAAC,WAAW;AAAA,MACtB,CAAC;AAAA,MACD,QAAQ;AAAA,IACV,CAAC;AAAA,IACD,QAAQ,YAAY,QAAQ,SAAS;AAAA,EACvC,CAAC;AAED,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,IAAI,MAAM,4BAAkB,SAAS,MAAM,IAAI,SAAS,UAAU,EAAE;AAAA,EAC5E;AAEA,QAAM,OAAO,MAAM,SAAS,KAAK;AACjC,SAAO,KAAK,SAAS,WAAW;AAClC;AAMA,SAAS,eAAe,MAAsB;AAC5C,QAAM,QAAQ,KAAK,MAAM,2CAA2C;AACpE,SAAO,QAAQ,MAAM,CAAC,EAAE,KAAK,IAAI;AACnC;","names":["readFileSync"]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/watch.ts"],"sourcesContent":["/** 디렉토리 감시 모드 — 새 문서 자동 변환 + Webhook 알림 */\n\nimport { watch, readFileSync, writeFileSync, mkdirSync, statSync, existsSync } from \"fs\"\nimport { basename, resolve, extname } from \"path\"\nimport { parse, detectFormat } from \"./index.js\"\nimport { toArrayBuffer } from \"./utils.js\"\nimport type { WatchOptions } from \"./types.js\"\n\nconst SUPPORTED_EXTENSIONS = new Set([\".hwp\", \".hwpx\", \".pdf\", \".xlsx\", \".docx\"])\nconst DEBOUNCE_MS = 1000\n/** 파일 쓰기 완료 판정: 연속 2회 동일 크기 확인 간격 */\nconst STABLE_CHECK_MS = 300\nconst MAX_FILE_SIZE = 500 * 1024 * 1024\n\n/**\n * 디렉토리를 감시하여 새 문서 파일을 자동 변환.\n *\n * @example\n * ```bash\n * kordoc watch ./incoming -d ./output --webhook https://api.example.com/docs\n * ```\n */\nexport async function watchDirectory(options: WatchOptions): Promise<void> {\n const { dir, outDir, webhook, format = \"markdown\", pages, silent } = options\n\n if (!existsSync(dir)) throw new Error(`디렉토리를 찾을 수 없습니다: ${dir}`)\n if (webhook) validateWebhookUrl(webhook)\n if (outDir) mkdirSync(outDir, { recursive: true })\n\n const log = silent ? () => {} : (msg: string) => process.stderr.write(msg + \"\\n\")\n log(`[kordoc watch] 감시 시작: ${resolve(dir)}`)\n if (outDir) log(`[kordoc watch] 출력: ${resolve(outDir)}`)\n if (webhook) log(`[kordoc watch] 웹훅: ${webhook}`)\n\n // 디바운스 맵\n const pending = new Map<string, ReturnType<typeof setTimeout>>()\n\n /** 파일 크기가 안정화될 때까지 대기 (쓰기 완료 감지) */\n const waitForStableSize = async (absPath: string): Promise<number> => {\n let prevSize = statSync(absPath).size\n await new Promise(r => setTimeout(r, STABLE_CHECK_MS))\n if (!existsSync(absPath)) return 0\n const currSize = statSync(absPath).size\n if (currSize !== prevSize) {\n // 크기가 변했으면 한 번 더 대기\n await new Promise(r => setTimeout(r, STABLE_CHECK_MS))\n if (!existsSync(absPath)) return 0\n return statSync(absPath).size\n }\n return currSize\n }\n\n const processFile = async (filePath: string) => {\n const ext = extname(filePath).toLowerCase()\n if (!SUPPORTED_EXTENSIONS.has(ext)) return\n\n const fileName = basename(filePath)\n try {\n const absPath = resolve(dir, filePath)\n // 경로 순회 방지 — 감시 디렉토리 외부 파일 차단\n const realDir = resolve(dir)\n if (!absPath.startsWith(realDir)) return\n if (!existsSync(absPath)) return\n\n const fileSize = await waitForStableSize(absPath)\n if (fileSize > MAX_FILE_SIZE || fileSize === 0) return\n\n log(`[kordoc watch] 변환 중: ${fileName}`)\n\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const parseOptions = pages ? { pages } : undefined\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n log(`[kordoc watch] 실패: ${fileName} — ${result.error}`)\n await sendWebhook(webhook, { file: fileName, format: detectFormat(arrayBuffer), success: false, error: result.error })\n return\n }\n\n const output = format === \"json\" ? JSON.stringify(result, null, 2) : result.markdown\n\n if (outDir) {\n const outExt = format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n log(`[kordoc watch] 완료: ${fileName} → ${basename(outPath)}`)\n } else {\n process.stdout.write(output + \"\\n\")\n }\n\n await sendWebhook(webhook, {\n file: fileName,\n format: result.fileType,\n success: true,\n markdown: format === \"markdown\" ? output.substring(0, 1000) : undefined,\n })\n } catch (err) {\n log(`[kordoc watch] 에러: ${fileName} — ${err instanceof Error ? err.message : err}`)\n }\n }\n\n // fs.watch recursive (Node 18+ Windows/macOS, Node 19+ Linux)\n watch(dir, { recursive: true }, (event, filename) => {\n if (!filename) return\n const filePath = filename.toString()\n\n // 디바운스\n const existing = pending.get(filePath)\n if (existing) clearTimeout(existing)\n pending.set(filePath, setTimeout(() => {\n pending.delete(filePath)\n processFile(filePath).catch(() => {})\n }, DEBOUNCE_MS))\n })\n\n // 프로세스 종료 방지 (Ctrl+C로 종료)\n return new Promise(() => {})\n}\n\n/** Webhook URL 검증 — SSRF 방지: http/https만 허용, localhost/private IP 차단 */\nfunction validateWebhookUrl(url: string): void {\n let parsed: URL\n try {\n parsed = new URL(url)\n } catch {\n throw new Error(`유효하지 않은 webhook URL: ${url}`)\n }\n if (parsed.protocol !== \"http:\" && parsed.protocol !== \"https:\") {\n throw new Error(`허용되지 않는 webhook 프로토콜: ${parsed.protocol}`)\n }\n const hostname = parsed.hostname.toLowerCase()\n if (\n hostname === \"localhost\" ||\n hostname === \"[::1]\" ||\n hostname.startsWith(\"127.\") ||\n hostname.startsWith(\"10.\") ||\n hostname.startsWith(\"192.168.\") ||\n /^172\\.(1[6-9]|2\\d|3[01])\\./.test(hostname) ||\n hostname === \"0.0.0.0\" ||\n hostname.startsWith(\"169.254.\") ||\n hostname.endsWith(\".local\") ||\n // IPv6 사설 대역\n hostname.startsWith(\"[fc\") ||\n hostname.startsWith(\"[fd\") ||\n hostname.startsWith(\"[fe80:\") ||\n hostname === \"[::0]\" ||\n hostname === \"[::]\" ||\n // 클라우드 메타데이터 엔드포인트\n hostname === \"metadata.google.internal\" ||\n hostname === \"metadata.google\" ||\n // 16진수/8진수 IP 인코딩 우회 방지\n /^0x[0-9a-f]+$/i.test(hostname) ||\n /^0[0-7]+$/.test(hostname)\n ) {\n throw new Error(`내부 네트워크 대상 webhook은 허용되지 않습니다: ${hostname}`)\n }\n}\n\nasync function sendWebhook(url: string | undefined, payload: Record<string, unknown>): Promise<void> {\n if (!url) return\n try {\n validateWebhookUrl(url)\n await fetch(url, {\n method: \"POST\",\n headers: { \"Content-Type\": \"application/json\" },\n body: JSON.stringify({ ...payload, timestamp: new Date().toISOString() }),\n })\n } catch {\n // webhook 실패는 조용히 무시\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;AAEA,SAAS,OAAO,cAAc,eAAe,WAAW,UAAU,kBAAkB;AACpF,SAAS,UAAU,SAAS,eAAe;AAK3C,IAAM,uBAAuB,oBAAI,IAAI,CAAC,QAAQ,SAAS,QAAQ,SAAS,OAAO,CAAC;AAChF,IAAM,cAAc;AAEpB,IAAM,kBAAkB;AACxB,IAAM,gBAAgB,MAAM,OAAO;AAUnC,eAAsB,eAAe,SAAsC;AACzE,QAAM,EAAE,KAAK,QAAQ,SAAS,SAAS,YAAY,OAAO,OAAO,IAAI;AAErE,MAAI,CAAC,WAAW,GAAG,EAAG,OAAM,IAAI,MAAM,gFAAoB,GAAG,EAAE;AAC/D,MAAI,QAAS,oBAAmB,OAAO;AACvC,MAAI,OAAQ,WAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AAEjD,QAAM,MAAM,SAAS,MAAM;AAAA,EAAC,IAAI,CAAC,QAAgB,QAAQ,OAAO,MAAM,MAAM,IAAI;AAChF,MAAI,6CAAyB,QAAQ,GAAG,CAAC,EAAE;AAC3C,MAAI,OAAQ,KAAI,gCAAsB,QAAQ,MAAM,CAAC,EAAE;AACvD,MAAI,QAAS,KAAI,gCAAsB,OAAO,EAAE;AAGhD,QAAM,UAAU,oBAAI,IAA2C;AAG/D,QAAM,oBAAoB,OAAO,YAAqC;AACpE,QAAI,WAAW,SAAS,OAAO,EAAE;AACjC,UAAM,IAAI,QAAQ,OAAK,WAAW,GAAG,eAAe,CAAC;AACrD,QAAI,CAAC,WAAW,OAAO,EAAG,QAAO;AACjC,UAAM,WAAW,SAAS,OAAO,EAAE;AACnC,QAAI,aAAa,UAAU;AAEzB,YAAM,IAAI,QAAQ,OAAK,WAAW,GAAG,eAAe,CAAC;AACrD,UAAI,CAAC,WAAW,OAAO,EAAG,QAAO;AACjC,aAAO,SAAS,OAAO,EAAE;AAAA,IAC3B;AACA,WAAO;AAAA,EACT;AAEA,QAAM,cAAc,OAAO,aAAqB;AAC9C,UAAM,MAAM,QAAQ,QAAQ,EAAE,YAAY;AAC1C,QAAI,CAAC,qBAAqB,IAAI,GAAG,EAAG;AAEpC,UAAM,WAAW,SAAS,QAAQ;AAClC,QAAI;AACF,YAAM,UAAU,QAAQ,KAAK,QAAQ;AAErC,YAAM,UAAU,QAAQ,GAAG;AAC3B,UAAI,CAAC,QAAQ,WAAW,OAAO,EAAG;AAClC,UAAI,CAAC,WAAW,OAAO,EAAG;AAE1B,YAAM,WAAW,MAAM,kBAAkB,OAAO;AAChD,UAAI,WAAW,iBAAiB,aAAa,EAAG;AAEhD,UAAI,uCAAwB,QAAQ,EAAE;AAEtC,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,eAAe,QAAQ,EAAE,MAAM,IAAI;AACzC,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,YAAI,gCAAsB,QAAQ,WAAM,OAAO,KAAK,EAAE;AACtD,cAAM,YAAY,SAAS,EAAE,MAAM,UAAU,QAAQ,aAAa,WAAW,GAAG,SAAS,OAAO,OAAO,OAAO,MAAM,CAAC;AACrH;AAAA,MACF;AAEA,YAAM,SAAS,WAAW,SAAS,KAAK,UAAU,QAAQ,MAAM,CAAC,IAAI,OAAO;AAE5E,UAAI,QAAQ;AACV,cAAM,SAAS,WAAW,SAAS,UAAU;AAC7C,cAAM,UAAU,QAAQ,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACpE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,gCAAsB,QAAQ,WAAM,SAAS,OAAO,CAAC,EAAE;AAAA,MAC7D,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAEA,YAAM,YAAY,SAAS;AAAA,QACzB,MAAM;AAAA,QACN,QAAQ,OAAO;AAAA,QACf,SAAS;AAAA,QACT,UAAU,WAAW,aAAa,OAAO,UAAU,GAAG,GAAI,IAAI;AAAA,MAChE,CAAC;AAAA,IACH,SAAS,KAAK;AACZ,UAAI,gCAAsB,QAAQ,WAAM,eAAe,QAAQ,IAAI,UAAU,GAAG,EAAE;AAAA,IACpF;AAAA,EACF;AAGA,QAAM,KAAK,EAAE,WAAW,KAAK,GAAG,CAAC,OAAO,aAAa;AACnD,QAAI,CAAC,SAAU;AACf,UAAM,WAAW,SAAS,SAAS;AAGnC,UAAM,WAAW,QAAQ,IAAI,QAAQ;AACrC,QAAI,SAAU,cAAa,QAAQ;AACnC,YAAQ,IAAI,UAAU,WAAW,MAAM;AACrC,cAAQ,OAAO,QAAQ;AACvB,kBAAY,QAAQ,EAAE,MAAM,MAAM;AAAA,MAAC,CAAC;AAAA,IACtC,GAAG,WAAW,CAAC;AAAA,EACjB,CAAC;AAGD,SAAO,IAAI,QAAQ,MAAM;AAAA,EAAC,CAAC;AAC7B;AAGA,SAAS,mBAAmB,KAAmB;AAC7C,MAAI;AACJ,MAAI;AACF,aAAS,IAAI,IAAI,GAAG;AAAA,EACtB,QAAQ;AACN,UAAM,IAAI,MAAM,sDAAwB,GAAG,EAAE;AAAA,EAC/C;AACA,MAAI,OAAO,aAAa,WAAW,OAAO,aAAa,UAAU;AAC/D,UAAM,IAAI,MAAM,2EAAyB,OAAO,QAAQ,EAAE;AAAA,EAC5D;AACA,QAAM,WAAW,OAAO,SAAS,YAAY;AAC7C,MACE,aAAa,eACb,aAAa,WACb,SAAS,WAAW,MAAM,KAC1B,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,UAAU,KAC9B,6BAA6B,KAAK,QAAQ,KAC1C,aAAa,aACb,SAAS,WAAW,UAAU,KAC9B,SAAS,SAAS,QAAQ;AAAA,EAE1B,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,KAAK,KACzB,SAAS,WAAW,QAAQ,KAC5B,aAAa,WACb,aAAa;AAAA,EAEb,aAAa,8BACb,aAAa;AAAA,EAEb,iBAAiB,KAAK,QAAQ,KAC9B,YAAY,KAAK,QAAQ,GACzB;AACA,UAAM,IAAI,MAAM,uHAAkC,QAAQ,EAAE;AAAA,EAC9D;AACF;AAEA,eAAe,YAAY,KAAyB,SAAiD;AACnG,MAAI,CAAC,IAAK;AACV,MAAI;AACF,uBAAmB,GAAG;AACtB,UAAM,MAAM,KAAK;AAAA,MACf,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,EAAE,GAAG,SAAS,YAAW,oBAAI,KAAK,GAAE,YAAY,EAAE,CAAC;AAAA,IAC1E,CAAC;AAAA,EACH,QAAQ;AAAA,EAER;AACF;","names":[]}