@clazic/kordoc 2.4.17 → 2.4.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/{auto-detect-2YGFYQCN.js → auto-detect-CBYICI6B.js} +4 -4
  2. package/dist/{chunk-WM3XI23V.js → chunk-463YQ2WL.js} +38 -25
  3. package/dist/chunk-463YQ2WL.js.map +1 -0
  4. package/dist/{chunk-7NOZFYH6.js → chunk-CLK4PNZ7.js} +7 -8
  5. package/dist/chunk-CLK4PNZ7.js.map +1 -0
  6. package/dist/{chunk-W2KDIKDF.js → chunk-MZN7PLTZ.js} +2 -2
  7. package/dist/{chunk-34WIGIQC.js → chunk-Y4WFKJ5P.js} +1 -1
  8. package/dist/chunk-Y4WFKJ5P.js.map +1 -0
  9. package/dist/cli.js +9 -13
  10. package/dist/cli.js.map +1 -1
  11. package/dist/index.cjs +49 -191
  12. package/dist/index.cjs.map +1 -1
  13. package/dist/index.d.cts +5 -6
  14. package/dist/index.d.ts +5 -6
  15. package/dist/index.js +49 -190
  16. package/dist/index.js.map +1 -1
  17. package/dist/mcp.js +5 -6
  18. package/dist/mcp.js.map +1 -1
  19. package/dist/{resolve-673XFZQ6.js → resolve-XWYJYKKH.js} +15 -36
  20. package/dist/resolve-XWYJYKKH.js.map +1 -0
  21. package/dist/{utils-DHOODYKU.js → utils-YUAT7LFD.js} +2 -2
  22. package/dist/{watch-RM4VNOL4.js → watch-WEOFVVDO.js} +5 -6
  23. package/dist/{watch-RM4VNOL4.js.map → watch-WEOFVVDO.js.map} +1 -1
  24. package/package.json +1 -2
  25. package/dist/chunk-34WIGIQC.js.map +0 -1
  26. package/dist/chunk-7FMKAV4P.js +0 -56
  27. package/dist/chunk-7FMKAV4P.js.map +0 -1
  28. package/dist/chunk-7NOZFYH6.js.map +0 -1
  29. package/dist/chunk-WM3XI23V.js.map +0 -1
  30. package/dist/resolve-673XFZQ6.js.map +0 -1
  31. package/dist/tesseract-provider-MNMZPSGF.js +0 -11
  32. package/dist/utils-DHOODYKU.js.map +0 -1
  33. /package/dist/{auto-detect-2YGFYQCN.js.map → auto-detect-CBYICI6B.js.map} +0 -0
  34. /package/dist/{chunk-W2KDIKDF.js.map → chunk-MZN7PLTZ.js.map} +0 -0
  35. /package/dist/{tesseract-provider-MNMZPSGF.js.map → utils-YUAT7LFD.js.map} +0 -0
@@ -2,14 +2,14 @@
2
2
  import {
3
3
  detectAvailableOcr,
4
4
  getAutoFallbackChain,
5
- getTesseractFallbackMessage,
5
+ getNoCliMessage,
6
6
  validateOcrMode
7
- } from "./chunk-7NOZFYH6.js";
7
+ } from "./chunk-CLK4PNZ7.js";
8
8
  import "./chunk-ZWE3DS7E.js";
9
9
  export {
10
10
  detectAvailableOcr,
11
11
  getAutoFallbackChain,
12
- getTesseractFallbackMessage,
12
+ getNoCliMessage,
13
13
  validateOcrMode
14
14
  };
15
- //# sourceMappingURL=auto-detect-2YGFYQCN.js.map
15
+ //# sourceMappingURL=auto-detect-CBYICI6B.js.map
@@ -1,7 +1,4 @@
1
1
  #!/usr/bin/env node
2
- import {
3
- markdownToBlocks
4
- } from "./chunk-YW5G6BCJ.js";
5
2
  import {
6
3
  KordocError,
7
4
  classifyError,
@@ -10,16 +7,16 @@ import {
10
7
  precheckZipSize,
11
8
  sanitizeHref,
12
9
  toArrayBuffer
13
- } from "./chunk-W2KDIKDF.js";
10
+ } from "./chunk-MZN7PLTZ.js";
14
11
  import {
15
12
  parsePageRange
16
13
  } from "./chunk-MOL7MDBG.js";
17
- import {
18
- createTesseractProvider
19
- } from "./chunk-7FMKAV4P.js";
20
14
  import {
21
15
  createCliOcrProvider
22
- } from "./chunk-34WIGIQC.js";
16
+ } from "./chunk-Y4WFKJ5P.js";
17
+ import {
18
+ markdownToBlocks
19
+ } from "./chunk-YW5G6BCJ.js";
23
20
  import {
24
21
  createLoggerFromEnv,
25
22
  generateRunId
@@ -5688,7 +5685,7 @@ async function parsePdfDocument(buffer, options) {
5688
5685
  if (ocrMode === "off") {
5689
5686
  throw Object.assign(new KordocError(`\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF (${pageCount}\uD398\uC774\uC9C0, ${totalChars}\uC790)`), { isImageBased: true });
5690
5687
  }
5691
- const { resolveOcrProvider } = await import("./resolve-673XFZQ6.js");
5688
+ const { resolveOcrProvider } = await import("./resolve-XWYJYKKH.js");
5692
5689
  const { ocrPages } = await import("./provider-T2D5XRTI.js");
5693
5690
  const tryProvider = async (provider, filter) => {
5694
5691
  try {
@@ -5709,7 +5706,7 @@ async function parsePdfDocument(buffer, options) {
5709
5706
  if (options?.ocr) {
5710
5707
  ocrBlocks = await tryProvider(options.ocr, pageFilter);
5711
5708
  } else if (ocrMode === "auto") {
5712
- const { getAutoFallbackChain } = await import("./auto-detect-2YGFYQCN.js");
5709
+ const { getAutoFallbackChain } = await import("./auto-detect-CBYICI6B.js");
5713
5710
  const pendingPages = /* @__PURE__ */ new Set();
5714
5711
  for (let i = 1; i <= effectivePageCount; i++) {
5715
5712
  if (!pageFilter || pageFilter.has(i)) pendingPages.add(i);
@@ -9828,13 +9825,36 @@ async function markdownToXlsx(markdown, options) {
9828
9825
  import { performance } from "perf_hooks";
9829
9826
  import libre from "libreoffice-convert";
9830
9827
  var libreConvert = libre.convert;
9831
- var PROOFREAD_PROMPT = [
9832
- "Perform non-destructive proofreading only on the Markdown below.",
9833
- "Rules:",
9834
- "- Do not add, remove, or infer any facts",
9835
- "- Do not change numbers, units, or proper nouns",
9836
- "- Correct only typos, spacing, line breaks, and Markdown structure",
9837
- "- Output the corrected Markdown body only"
9828
+ var OCR_PROMPT = [
9829
+ "\uC774 PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0\uC5D0\uC11C \uD14D\uC2A4\uD2B8\uC640 \uD45C\uB97C \uCD94\uCD9C\uD558\uC5EC Markdown\uC73C\uB85C \uBCC0\uD658\uD558\uACE0, OCR \uC624\uC778\uC2DD \uC624\uB958\uB97C \uC989\uC2DC \uAD50\uC815\uD558\uC5EC \uCD5C\uC885 \uACB0\uACFC\uBB3C\uC744 \uCD9C\uB825\uD558\uC138\uC694.",
9830
+ "",
9831
+ "\uCD94\uCD9C \uADDC\uCE59:",
9832
+ "- \uD14D\uC2A4\uD2B8, \uD45C, \uC81C\uBAA9, \uB9AC\uC2A4\uD2B8\uB97C \uC6D0\uBB38 \uAD6C\uC870 \uADF8\uB300\uB85C Markdown\uC73C\uB85C \uBCC0\uD658",
9833
+ "- \uD45C\uB294 Markdown \uD14C\uC774\uBE14 \uBB38\uBC95 \uC0AC\uC6A9 (| \uAD6C\uBD84, |---|---| \uD5E4\uB354 \uAD6C\uBD84\uC120 \uD3EC\uD568)",
9834
+ "- \uD5E4\uB529\uC740 \uC2DC\uAC01\uC801 \uD06C\uAE30\uC5D0 \uB530\uB77C # ~ ###### \uC0AC\uC6A9",
9835
+ "- \uB9AC\uC2A4\uD2B8\uB294 - \uB610\uB294 1. \uC0AC\uC6A9",
9836
+ "- \uC774\uBBF8\uC9C0, \uB3C4\uD615 \uB4F1 \uBE44\uD14D\uC2A4\uD2B8 \uC694\uC18C\uB294 \uBB34\uC2DC",
9837
+ "- \uC6D0\uBB38\uC758 \uC77D\uAE30 \uC21C\uC11C\uC640 \uAD6C\uC870\uB97C \uC720\uC9C0",
9838
+ "",
9839
+ "\uC808\uB300 \uAE08\uC9C0 \uC0AC\uD56D:",
9840
+ "- \uBB38\uC7A5\xB7\uB2E8\uB77D\xB7\uD56D\uBAA9\uC744 \uCD94\uAC00\uD558\uAC70\uB098 \uC0AD\uC81C\uD558\uC9C0 \uB9D0 \uAC83",
9841
+ "- \uC22B\uC790, \uD37C\uC13C\uD2B8, \uB0A0\uC9DC, \uB2E8\uC704, \uAE08\uC561\uC744 \uC808\uB300 \uBCC0\uACBD\uD558\uC9C0 \uB9D0 \uAC83",
9842
+ "- \uACE0\uC720\uBA85\uC0AC, \uAE30\uAD00\uBA85, \uBC95\uB839\uBA85, \uC9C0\uBA85\uC744 \uBCC0\uACBD\uD558\uC9C0 \uB9D0 \uAC83",
9843
+ "- \uD45C\uC758 \uC81C\uBAA9\uC744 \uBCC0\uACBD \uB610\uB294 \uC0AD\uC81C\uD558\uC9C0 \uB9D0 \uAC83",
9844
+ "- \uD45C\uC758 \uD589\xB7\uC5F4 \uC218, \uC140 \uB0B4\uC6A9, \uD5E4\uB354\uB97C \uBCC0\uACBD\uD558\uC9C0 \uB9D0 \uAC83",
9845
+ "- \uC81C\uBAA9 \uC218\uC900(#, ##, ### \uB4F1)\uC744 \uC784\uC758\uB85C \uBC14\uAFB8\uC9C0 \uB9D0 \uAC83",
9846
+ "- \uC6D0\uBB38\uC5D0 \uC5C6\uB294 \uB0B4\uC6A9\uC744 \uC694\uC57D\xB7\uBCF4\uC644\xB7\uCD94\uB860\uD558\uC9C0 \uB9D0 \uAC83",
9847
+ "- ` ``` `\uB85C \uAC10\uC2F8\uAC70\uB098 \uC124\uBA85 \uD14D\uC2A4\uD2B8\uB97C \uCD94\uAC00\uD558\uC9C0 \uB9D0 \uAC83",
9848
+ "",
9849
+ "\uD5C8\uC6A9\uB418\uB294 \uAD50\uC815 \uBC94\uC704 (OCR \uC624\uC778\uC2DD \uC218\uC815):",
9850
+ "- \uBA85\uBC31\uD55C \uAE00\uC790 \uC624\uC778\uC2DD \uC218\uC815 (\uC608: '0' \u2192 'O', 'l' \u2192 '1' \uB4F1 \uB9E5\uB77D\uC0C1 \uBA85\uD655\uD55C \uACBD\uC6B0\uB9CC)",
9851
+ "- \uB2E8\uC5B4 \uC911\uAC04\uC5D0 \uC798\uBABB \uC0BD\uC785\uB41C \uACF5\uBC31 \uC81C\uAC70",
9852
+ "- \uC904\uBC14\uAFC8 \uC624\uB958\uB85C \uBD84\uB9AC\uB41C \uBB38\uC7A5 \uBCD1\uD569 (\uC758\uBBF8 \uB2E8\uC704 \uAE30\uC900)",
9853
+ "- Markdown \uBB38\uBC95 \uC624\uB958 \uC218\uC815 (\uD45C \uAD6C\uBD84\uC120 \uB204\uB77D, \uB9AC\uC2A4\uD2B8 \uB4E4\uC5EC\uC4F0\uAE30 \uB4F1)",
9854
+ "",
9855
+ "\uCD9C\uB825 \uADDC\uCE59:",
9856
+ "- \uBCC0\uD658\uB41C Markdown \uBCF8\uBB38\uB9CC \uCD9C\uB825\uD560 \uAC83 (\uC124\uBA85, \uC8FC\uC11D, \uBA54\uD0C0 \uD14D\uC2A4\uD2B8 \uC5C6\uC774)",
9857
+ "- \uD655\uC2E4\uD558\uC9C0 \uC54A\uC73C\uBA74 \uC6D0\uBB38\uC744 \uADF8\uB300\uB85C \uC720\uC9C0\uD560 \uAC83"
9838
9858
  ].join("\n");
9839
9859
 
9840
9860
  // src/index.ts
@@ -9908,9 +9928,6 @@ async function parseImage(buffer, options) {
9908
9928
  if (ocrMode === "gemini" || ocrMode === "claude" || ocrMode === "codex" || ocrMode === "ollama") {
9909
9929
  ocrProvider = createCliOcrProvider(ocrMode);
9910
9930
  actualOcrMode = ocrMode;
9911
- } else if (ocrMode === "tesseract") {
9912
- ocrProvider = await createTesseractProvider();
9913
- actualOcrMode = ocrMode;
9914
9931
  } else if (ocrMode === "auto") {
9915
9932
  const modesToTry = ["gemini", "claude", "codex", "ollama"];
9916
9933
  for (const mode of modesToTry) {
@@ -9922,10 +9939,6 @@ async function parseImage(buffer, options) {
9922
9939
  console.warn(`[kordoc] OCR auto-detection: ${mode} CLI not available or failed. Trying next.`, e);
9923
9940
  }
9924
9941
  }
9925
- if (!ocrProvider) {
9926
- ocrProvider = await createTesseractProvider();
9927
- actualOcrMode = "tesseract";
9928
- }
9929
9942
  }
9930
9943
  if (!ocrProvider) {
9931
9944
  return { success: false, fileType: "image", error: "\uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uD504\uB85C\uBC14\uC774\uB354\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.", code: "PARSE_ERROR" };
@@ -10193,4 +10206,4 @@ export {
10193
10206
  cfb/cfb.js:
10194
10207
  (*! crc32.js (C) 2014-present SheetJS -- http://sheetjs.com *)
10195
10208
  */
10196
- //# sourceMappingURL=chunk-WM3XI23V.js.map
10209
+ //# sourceMappingURL=chunk-463YQ2WL.js.map