@clazic/kordoc 2.4.17 → 2.4.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{auto-detect-2YGFYQCN.js → auto-detect-CBYICI6B.js} +4 -4
- package/dist/{chunk-WM3XI23V.js → chunk-463YQ2WL.js} +38 -25
- package/dist/chunk-463YQ2WL.js.map +1 -0
- package/dist/{chunk-7NOZFYH6.js → chunk-CLK4PNZ7.js} +7 -8
- package/dist/chunk-CLK4PNZ7.js.map +1 -0
- package/dist/{chunk-W2KDIKDF.js → chunk-MZN7PLTZ.js} +2 -2
- package/dist/{chunk-34WIGIQC.js → chunk-Y4WFKJ5P.js} +1 -1
- package/dist/chunk-Y4WFKJ5P.js.map +1 -0
- package/dist/cli.js +9 -13
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +49 -191
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +5 -6
- package/dist/index.d.ts +5 -6
- package/dist/index.js +49 -190
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +5 -6
- package/dist/mcp.js.map +1 -1
- package/dist/{resolve-673XFZQ6.js → resolve-XWYJYKKH.js} +15 -36
- package/dist/resolve-XWYJYKKH.js.map +1 -0
- package/dist/{utils-DHOODYKU.js → utils-YUAT7LFD.js} +2 -2
- package/dist/{watch-RM4VNOL4.js → watch-WEOFVVDO.js} +5 -6
- package/dist/{watch-RM4VNOL4.js.map → watch-WEOFVVDO.js.map} +1 -1
- package/package.json +1 -2
- package/dist/chunk-34WIGIQC.js.map +0 -1
- package/dist/chunk-7FMKAV4P.js +0 -56
- package/dist/chunk-7FMKAV4P.js.map +0 -1
- package/dist/chunk-7NOZFYH6.js.map +0 -1
- package/dist/chunk-WM3XI23V.js.map +0 -1
- package/dist/resolve-673XFZQ6.js.map +0 -1
- package/dist/tesseract-provider-MNMZPSGF.js +0 -11
- package/dist/utils-DHOODYKU.js.map +0 -1
- /package/dist/{auto-detect-2YGFYQCN.js.map → auto-detect-CBYICI6B.js.map} +0 -0
- /package/dist/{chunk-W2KDIKDF.js.map → chunk-MZN7PLTZ.js.map} +0 -0
- /package/dist/{tesseract-provider-MNMZPSGF.js.map → utils-YUAT7LFD.js.map} +0 -0
|
@@ -2,14 +2,14 @@
|
|
|
2
2
|
import {
|
|
3
3
|
detectAvailableOcr,
|
|
4
4
|
getAutoFallbackChain,
|
|
5
|
-
|
|
5
|
+
getNoCliMessage,
|
|
6
6
|
validateOcrMode
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-CLK4PNZ7.js";
|
|
8
8
|
import "./chunk-ZWE3DS7E.js";
|
|
9
9
|
export {
|
|
10
10
|
detectAvailableOcr,
|
|
11
11
|
getAutoFallbackChain,
|
|
12
|
-
|
|
12
|
+
getNoCliMessage,
|
|
13
13
|
validateOcrMode
|
|
14
14
|
};
|
|
15
|
-
//# sourceMappingURL=auto-detect-
|
|
15
|
+
//# sourceMappingURL=auto-detect-CBYICI6B.js.map
|
|
@@ -1,7 +1,4 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import {
|
|
3
|
-
markdownToBlocks
|
|
4
|
-
} from "./chunk-YW5G6BCJ.js";
|
|
5
2
|
import {
|
|
6
3
|
KordocError,
|
|
7
4
|
classifyError,
|
|
@@ -10,16 +7,16 @@ import {
|
|
|
10
7
|
precheckZipSize,
|
|
11
8
|
sanitizeHref,
|
|
12
9
|
toArrayBuffer
|
|
13
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-MZN7PLTZ.js";
|
|
14
11
|
import {
|
|
15
12
|
parsePageRange
|
|
16
13
|
} from "./chunk-MOL7MDBG.js";
|
|
17
|
-
import {
|
|
18
|
-
createTesseractProvider
|
|
19
|
-
} from "./chunk-7FMKAV4P.js";
|
|
20
14
|
import {
|
|
21
15
|
createCliOcrProvider
|
|
22
|
-
} from "./chunk-
|
|
16
|
+
} from "./chunk-Y4WFKJ5P.js";
|
|
17
|
+
import {
|
|
18
|
+
markdownToBlocks
|
|
19
|
+
} from "./chunk-YW5G6BCJ.js";
|
|
23
20
|
import {
|
|
24
21
|
createLoggerFromEnv,
|
|
25
22
|
generateRunId
|
|
@@ -5688,7 +5685,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
5688
5685
|
if (ocrMode === "off") {
|
|
5689
5686
|
throw Object.assign(new KordocError(`\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF (${pageCount}\uD398\uC774\uC9C0, ${totalChars}\uC790)`), { isImageBased: true });
|
|
5690
5687
|
}
|
|
5691
|
-
const { resolveOcrProvider } = await import("./resolve-
|
|
5688
|
+
const { resolveOcrProvider } = await import("./resolve-XWYJYKKH.js");
|
|
5692
5689
|
const { ocrPages } = await import("./provider-T2D5XRTI.js");
|
|
5693
5690
|
const tryProvider = async (provider, filter) => {
|
|
5694
5691
|
try {
|
|
@@ -5709,7 +5706,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
5709
5706
|
if (options?.ocr) {
|
|
5710
5707
|
ocrBlocks = await tryProvider(options.ocr, pageFilter);
|
|
5711
5708
|
} else if (ocrMode === "auto") {
|
|
5712
|
-
const { getAutoFallbackChain } = await import("./auto-detect-
|
|
5709
|
+
const { getAutoFallbackChain } = await import("./auto-detect-CBYICI6B.js");
|
|
5713
5710
|
const pendingPages = /* @__PURE__ */ new Set();
|
|
5714
5711
|
for (let i = 1; i <= effectivePageCount; i++) {
|
|
5715
5712
|
if (!pageFilter || pageFilter.has(i)) pendingPages.add(i);
|
|
@@ -9828,13 +9825,36 @@ async function markdownToXlsx(markdown, options) {
|
|
|
9828
9825
|
import { performance } from "perf_hooks";
|
|
9829
9826
|
import libre from "libreoffice-convert";
|
|
9830
9827
|
var libreConvert = libre.convert;
|
|
9831
|
-
var
|
|
9832
|
-
"
|
|
9833
|
-
"
|
|
9834
|
-
"
|
|
9835
|
-
"-
|
|
9836
|
-
"-
|
|
9837
|
-
"-
|
|
9828
|
+
var OCR_PROMPT = [
|
|
9829
|
+
"\uC774 PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0\uC5D0\uC11C \uD14D\uC2A4\uD2B8\uC640 \uD45C\uB97C \uCD94\uCD9C\uD558\uC5EC Markdown\uC73C\uB85C \uBCC0\uD658\uD558\uACE0, OCR \uC624\uC778\uC2DD \uC624\uB958\uB97C \uC989\uC2DC \uAD50\uC815\uD558\uC5EC \uCD5C\uC885 \uACB0\uACFC\uBB3C\uC744 \uCD9C\uB825\uD558\uC138\uC694.",
|
|
9830
|
+
"",
|
|
9831
|
+
"\uCD94\uCD9C \uADDC\uCE59:",
|
|
9832
|
+
"- \uD14D\uC2A4\uD2B8, \uD45C, \uC81C\uBAA9, \uB9AC\uC2A4\uD2B8\uB97C \uC6D0\uBB38 \uAD6C\uC870 \uADF8\uB300\uB85C Markdown\uC73C\uB85C \uBCC0\uD658",
|
|
9833
|
+
"- \uD45C\uB294 Markdown \uD14C\uC774\uBE14 \uBB38\uBC95 \uC0AC\uC6A9 (| \uAD6C\uBD84, |---|---| \uD5E4\uB354 \uAD6C\uBD84\uC120 \uD3EC\uD568)",
|
|
9834
|
+
"- \uD5E4\uB529\uC740 \uC2DC\uAC01\uC801 \uD06C\uAE30\uC5D0 \uB530\uB77C # ~ ###### \uC0AC\uC6A9",
|
|
9835
|
+
"- \uB9AC\uC2A4\uD2B8\uB294 - \uB610\uB294 1. \uC0AC\uC6A9",
|
|
9836
|
+
"- \uC774\uBBF8\uC9C0, \uB3C4\uD615 \uB4F1 \uBE44\uD14D\uC2A4\uD2B8 \uC694\uC18C\uB294 \uBB34\uC2DC",
|
|
9837
|
+
"- \uC6D0\uBB38\uC758 \uC77D\uAE30 \uC21C\uC11C\uC640 \uAD6C\uC870\uB97C \uC720\uC9C0",
|
|
9838
|
+
"",
|
|
9839
|
+
"\uC808\uB300 \uAE08\uC9C0 \uC0AC\uD56D:",
|
|
9840
|
+
"- \uBB38\uC7A5\xB7\uB2E8\uB77D\xB7\uD56D\uBAA9\uC744 \uCD94\uAC00\uD558\uAC70\uB098 \uC0AD\uC81C\uD558\uC9C0 \uB9D0 \uAC83",
|
|
9841
|
+
"- \uC22B\uC790, \uD37C\uC13C\uD2B8, \uB0A0\uC9DC, \uB2E8\uC704, \uAE08\uC561\uC744 \uC808\uB300 \uBCC0\uACBD\uD558\uC9C0 \uB9D0 \uAC83",
|
|
9842
|
+
"- \uACE0\uC720\uBA85\uC0AC, \uAE30\uAD00\uBA85, \uBC95\uB839\uBA85, \uC9C0\uBA85\uC744 \uBCC0\uACBD\uD558\uC9C0 \uB9D0 \uAC83",
|
|
9843
|
+
"- \uD45C\uC758 \uC81C\uBAA9\uC744 \uBCC0\uACBD \uB610\uB294 \uC0AD\uC81C\uD558\uC9C0 \uB9D0 \uAC83",
|
|
9844
|
+
"- \uD45C\uC758 \uD589\xB7\uC5F4 \uC218, \uC140 \uB0B4\uC6A9, \uD5E4\uB354\uB97C \uBCC0\uACBD\uD558\uC9C0 \uB9D0 \uAC83",
|
|
9845
|
+
"- \uC81C\uBAA9 \uC218\uC900(#, ##, ### \uB4F1)\uC744 \uC784\uC758\uB85C \uBC14\uAFB8\uC9C0 \uB9D0 \uAC83",
|
|
9846
|
+
"- \uC6D0\uBB38\uC5D0 \uC5C6\uB294 \uB0B4\uC6A9\uC744 \uC694\uC57D\xB7\uBCF4\uC644\xB7\uCD94\uB860\uD558\uC9C0 \uB9D0 \uAC83",
|
|
9847
|
+
"- ` ``` `\uB85C \uAC10\uC2F8\uAC70\uB098 \uC124\uBA85 \uD14D\uC2A4\uD2B8\uB97C \uCD94\uAC00\uD558\uC9C0 \uB9D0 \uAC83",
|
|
9848
|
+
"",
|
|
9849
|
+
"\uD5C8\uC6A9\uB418\uB294 \uAD50\uC815 \uBC94\uC704 (OCR \uC624\uC778\uC2DD \uC218\uC815):",
|
|
9850
|
+
"- \uBA85\uBC31\uD55C \uAE00\uC790 \uC624\uC778\uC2DD \uC218\uC815 (\uC608: '0' \u2192 'O', 'l' \u2192 '1' \uB4F1 \uB9E5\uB77D\uC0C1 \uBA85\uD655\uD55C \uACBD\uC6B0\uB9CC)",
|
|
9851
|
+
"- \uB2E8\uC5B4 \uC911\uAC04\uC5D0 \uC798\uBABB \uC0BD\uC785\uB41C \uACF5\uBC31 \uC81C\uAC70",
|
|
9852
|
+
"- \uC904\uBC14\uAFC8 \uC624\uB958\uB85C \uBD84\uB9AC\uB41C \uBB38\uC7A5 \uBCD1\uD569 (\uC758\uBBF8 \uB2E8\uC704 \uAE30\uC900)",
|
|
9853
|
+
"- Markdown \uBB38\uBC95 \uC624\uB958 \uC218\uC815 (\uD45C \uAD6C\uBD84\uC120 \uB204\uB77D, \uB9AC\uC2A4\uD2B8 \uB4E4\uC5EC\uC4F0\uAE30 \uB4F1)",
|
|
9854
|
+
"",
|
|
9855
|
+
"\uCD9C\uB825 \uADDC\uCE59:",
|
|
9856
|
+
"- \uBCC0\uD658\uB41C Markdown \uBCF8\uBB38\uB9CC \uCD9C\uB825\uD560 \uAC83 (\uC124\uBA85, \uC8FC\uC11D, \uBA54\uD0C0 \uD14D\uC2A4\uD2B8 \uC5C6\uC774)",
|
|
9857
|
+
"- \uD655\uC2E4\uD558\uC9C0 \uC54A\uC73C\uBA74 \uC6D0\uBB38\uC744 \uADF8\uB300\uB85C \uC720\uC9C0\uD560 \uAC83"
|
|
9838
9858
|
].join("\n");
|
|
9839
9859
|
|
|
9840
9860
|
// src/index.ts
|
|
@@ -9908,9 +9928,6 @@ async function parseImage(buffer, options) {
|
|
|
9908
9928
|
if (ocrMode === "gemini" || ocrMode === "claude" || ocrMode === "codex" || ocrMode === "ollama") {
|
|
9909
9929
|
ocrProvider = createCliOcrProvider(ocrMode);
|
|
9910
9930
|
actualOcrMode = ocrMode;
|
|
9911
|
-
} else if (ocrMode === "tesseract") {
|
|
9912
|
-
ocrProvider = await createTesseractProvider();
|
|
9913
|
-
actualOcrMode = ocrMode;
|
|
9914
9931
|
} else if (ocrMode === "auto") {
|
|
9915
9932
|
const modesToTry = ["gemini", "claude", "codex", "ollama"];
|
|
9916
9933
|
for (const mode of modesToTry) {
|
|
@@ -9922,10 +9939,6 @@ async function parseImage(buffer, options) {
|
|
|
9922
9939
|
console.warn(`[kordoc] OCR auto-detection: ${mode} CLI not available or failed. Trying next.`, e);
|
|
9923
9940
|
}
|
|
9924
9941
|
}
|
|
9925
|
-
if (!ocrProvider) {
|
|
9926
|
-
ocrProvider = await createTesseractProvider();
|
|
9927
|
-
actualOcrMode = "tesseract";
|
|
9928
|
-
}
|
|
9929
9942
|
}
|
|
9930
9943
|
if (!ocrProvider) {
|
|
9931
9944
|
return { success: false, fileType: "image", error: "\uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uD504\uB85C\uBC14\uC774\uB354\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4.", code: "PARSE_ERROR" };
|
|
@@ -10193,4 +10206,4 @@ export {
|
|
|
10193
10206
|
cfb/cfb.js:
|
|
10194
10207
|
(*! crc32.js (C) 2014-present SheetJS -- http://sheetjs.com *)
|
|
10195
10208
|
*/
|
|
10196
|
-
//# sourceMappingURL=chunk-
|
|
10209
|
+
//# sourceMappingURL=chunk-463YQ2WL.js.map
|