@clazic/kordoc 2.4.4 → 2.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-KEDUF24M.js → chunk-A2FNPGBS.js} +2 -2
- package/dist/{chunk-5AXJRBBK.js → chunk-L2CLLZ4S.js} +29 -11
- package/dist/chunk-L2CLLZ4S.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/index.cjs +58 -25
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +58 -25
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{provider-HE727F7Z.js → provider-7F7NEDTN.js} +32 -17
- package/dist/provider-7F7NEDTN.js.map +1 -0
- package/dist/{utils-BB2CDSTB.js → utils-RQ4S2RVN.js} +2 -2
- package/dist/{watch-6QVK32X7.js → watch-3EIG5EVL.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-5AXJRBBK.js.map +0 -1
- package/dist/provider-HE727F7Z.js.map +0 -1
- /package/dist/{chunk-KEDUF24M.js.map → chunk-A2FNPGBS.js.map} +0 -0
- /package/dist/{utils-BB2CDSTB.js.map → utils-RQ4S2RVN.js.map} +0 -0
- /package/dist/{watch-6QVK32X7.js.map → watch-3EIG5EVL.js.map} +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/utils.ts
|
|
4
|
-
var VERSION = true ? "2.4.
|
|
4
|
+
var VERSION = true ? "2.4.6" : "0.0.0-dev";
|
|
5
5
|
function toArrayBuffer(buf) {
|
|
6
6
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
7
7
|
return buf.buffer;
|
|
@@ -90,4 +90,4 @@ export {
|
|
|
90
90
|
sanitizeHref,
|
|
91
91
|
classifyError
|
|
92
92
|
};
|
|
93
|
-
//# sourceMappingURL=chunk-
|
|
93
|
+
//# sourceMappingURL=chunk-A2FNPGBS.js.map
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
precheckZipSize,
|
|
10
10
|
sanitizeHref,
|
|
11
11
|
toArrayBuffer
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-A2FNPGBS.js";
|
|
13
13
|
import {
|
|
14
14
|
parsePageRange
|
|
15
15
|
} from "./chunk-MOL7MDBG.js";
|
|
@@ -5463,10 +5463,10 @@ async function parsePdfDocument(buffer, options) {
|
|
|
5463
5463
|
throw Object.assign(new KordocError(`\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF (${pageCount}\uD398\uC774\uC9C0, ${totalChars}\uC790)`), { isImageBased: true });
|
|
5464
5464
|
}
|
|
5465
5465
|
const { resolveOcrProvider } = await import("./resolve-TZVGVOVD.js");
|
|
5466
|
-
const { ocrPages } = await import("./provider-
|
|
5467
|
-
const tryProvider = async (provider) => {
|
|
5466
|
+
const { ocrPages } = await import("./provider-7F7NEDTN.js");
|
|
5467
|
+
const tryProvider = async (provider, filter) => {
|
|
5468
5468
|
try {
|
|
5469
|
-
return await ocrPages(doc, provider,
|
|
5469
|
+
return await ocrPages(doc, provider, filter, effectivePageCount, warnings, concurrency, options?.onProgress);
|
|
5470
5470
|
} catch {
|
|
5471
5471
|
return [];
|
|
5472
5472
|
} finally {
|
|
@@ -5479,25 +5479,43 @@ async function parsePdfDocument(buffer, options) {
|
|
|
5479
5479
|
};
|
|
5480
5480
|
let ocrBlocks = [];
|
|
5481
5481
|
if (options?.ocr) {
|
|
5482
|
-
ocrBlocks = await tryProvider(options.ocr);
|
|
5482
|
+
ocrBlocks = await tryProvider(options.ocr, pageFilter);
|
|
5483
5483
|
} else if (ocrMode === "auto") {
|
|
5484
5484
|
const { getAutoFallbackChain } = await import("./auto-detect-2YGFYQCN.js");
|
|
5485
|
+
const pendingPages = /* @__PURE__ */ new Set();
|
|
5486
|
+
for (let i = 1; i <= effectivePageCount; i++) {
|
|
5487
|
+
if (!pageFilter || pageFilter.has(i)) pendingPages.add(i);
|
|
5488
|
+
}
|
|
5489
|
+
const allOcrBlocks = [];
|
|
5485
5490
|
for (const mode of getAutoFallbackChain()) {
|
|
5491
|
+
if (pendingPages.size === 0) break;
|
|
5486
5492
|
try {
|
|
5493
|
+
const modeFilter = pendingPages.size < effectivePageCount ? new Set(pendingPages) : pageFilter;
|
|
5487
5494
|
const provider = await resolveOcrProvider(mode, warnings, concurrency, batchSize);
|
|
5488
|
-
const blocks2 = await tryProvider(provider);
|
|
5495
|
+
const blocks2 = await tryProvider(provider, modeFilter);
|
|
5489
5496
|
if (blocks2.length > 0) {
|
|
5490
|
-
|
|
5491
|
-
|
|
5497
|
+
for (const b of blocks2) {
|
|
5498
|
+
if (b.pageNumber !== void 0) pendingPages.delete(b.pageNumber);
|
|
5499
|
+
}
|
|
5500
|
+
for (const b of blocks2) allOcrBlocks.push(b);
|
|
5501
|
+
if (pendingPages.size > 0) {
|
|
5502
|
+
warnings.push({
|
|
5503
|
+
message: `OCR: '${mode}' \uC644\uB8CC (${pendingPages.size}\uD398\uC774\uC9C0 \uBBF8\uCC98\uB9AC \u2192 \uB2E4\uC74C \uC5D4\uC9C4\uC73C\uB85C \uC7AC\uC2DC\uB3C4)`,
|
|
5504
|
+
code: "OCR_CLI_FALLBACK"
|
|
5505
|
+
});
|
|
5506
|
+
}
|
|
5507
|
+
} else {
|
|
5508
|
+
warnings.push({ message: `OCR: '${mode}' \uACB0\uACFC \uC5C6\uC74C, \uB2E4\uC74C \uC5D4\uC9C4\uC73C\uB85C \uC2DC\uB3C4`, code: "OCR_CLI_FALLBACK" });
|
|
5492
5509
|
}
|
|
5493
|
-
warnings.push({ message: `OCR: '${mode}' \uACB0\uACFC \uC5C6\uC74C, \uB2E4\uC74C \uC5D4\uC9C4\uC73C\uB85C \uC2DC\uB3C4`, code: "OCR_CLI_FALLBACK" });
|
|
5494
5510
|
} catch {
|
|
5495
5511
|
}
|
|
5496
5512
|
}
|
|
5513
|
+
allOcrBlocks.sort((a, b) => (a.pageNumber ?? 0) - (b.pageNumber ?? 0));
|
|
5514
|
+
ocrBlocks = allOcrBlocks;
|
|
5497
5515
|
} else {
|
|
5498
5516
|
try {
|
|
5499
5517
|
const provider = await resolveOcrProvider(ocrMode, warnings, concurrency, batchSize);
|
|
5500
|
-
ocrBlocks = await tryProvider(provider);
|
|
5518
|
+
ocrBlocks = await tryProvider(provider, pageFilter);
|
|
5501
5519
|
} catch (resolveErr) {
|
|
5502
5520
|
throw Object.assign(
|
|
5503
5521
|
new KordocError(resolveErr instanceof Error ? resolveErr.message : "OCR \uD504\uB85C\uBC14\uC774\uB354 \uCD08\uAE30\uD654 \uC2E4\uD328"),
|
|
@@ -9795,4 +9813,4 @@ export {
|
|
|
9795
9813
|
cfb/cfb.js:
|
|
9796
9814
|
(*! crc32.js (C) 2014-present SheetJS -- http://sheetjs.com *)
|
|
9797
9815
|
*/
|
|
9798
|
-
//# sourceMappingURL=chunk-
|
|
9816
|
+
//# sourceMappingURL=chunk-L2CLLZ4S.js.map
|