@clazic/kordoc 2.4.4 → 2.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/utils.ts
4
- var VERSION = true ? "2.4.4" : "0.0.0-dev";
4
+ var VERSION = true ? "2.4.6" : "0.0.0-dev";
5
5
  function toArrayBuffer(buf) {
6
6
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
7
7
  return buf.buffer;
@@ -90,4 +90,4 @@ export {
90
90
  sanitizeHref,
91
91
  classifyError
92
92
  };
93
- //# sourceMappingURL=chunk-KEDUF24M.js.map
93
+ //# sourceMappingURL=chunk-A2FNPGBS.js.map
@@ -9,7 +9,7 @@ import {
9
9
  precheckZipSize,
10
10
  sanitizeHref,
11
11
  toArrayBuffer
12
- } from "./chunk-KEDUF24M.js";
12
+ } from "./chunk-A2FNPGBS.js";
13
13
  import {
14
14
  parsePageRange
15
15
  } from "./chunk-MOL7MDBG.js";
@@ -5463,10 +5463,10 @@ async function parsePdfDocument(buffer, options) {
5463
5463
  throw Object.assign(new KordocError(`\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF (${pageCount}\uD398\uC774\uC9C0, ${totalChars}\uC790)`), { isImageBased: true });
5464
5464
  }
5465
5465
  const { resolveOcrProvider } = await import("./resolve-TZVGVOVD.js");
5466
- const { ocrPages } = await import("./provider-HE727F7Z.js");
5467
- const tryProvider = async (provider) => {
5466
+ const { ocrPages } = await import("./provider-7F7NEDTN.js");
5467
+ const tryProvider = async (provider, filter) => {
5468
5468
  try {
5469
- return await ocrPages(doc, provider, pageFilter, effectivePageCount, warnings, concurrency, options?.onProgress);
5469
+ return await ocrPages(doc, provider, filter, effectivePageCount, warnings, concurrency, options?.onProgress);
5470
5470
  } catch {
5471
5471
  return [];
5472
5472
  } finally {
@@ -5479,25 +5479,43 @@ async function parsePdfDocument(buffer, options) {
5479
5479
  };
5480
5480
  let ocrBlocks = [];
5481
5481
  if (options?.ocr) {
5482
- ocrBlocks = await tryProvider(options.ocr);
5482
+ ocrBlocks = await tryProvider(options.ocr, pageFilter);
5483
5483
  } else if (ocrMode === "auto") {
5484
5484
  const { getAutoFallbackChain } = await import("./auto-detect-2YGFYQCN.js");
5485
+ const pendingPages = /* @__PURE__ */ new Set();
5486
+ for (let i = 1; i <= effectivePageCount; i++) {
5487
+ if (!pageFilter || pageFilter.has(i)) pendingPages.add(i);
5488
+ }
5489
+ const allOcrBlocks = [];
5485
5490
  for (const mode of getAutoFallbackChain()) {
5491
+ if (pendingPages.size === 0) break;
5486
5492
  try {
5493
+ const modeFilter = pendingPages.size < effectivePageCount ? new Set(pendingPages) : pageFilter;
5487
5494
  const provider = await resolveOcrProvider(mode, warnings, concurrency, batchSize);
5488
- const blocks2 = await tryProvider(provider);
5495
+ const blocks2 = await tryProvider(provider, modeFilter);
5489
5496
  if (blocks2.length > 0) {
5490
- ocrBlocks = blocks2;
5491
- break;
5497
+ for (const b of blocks2) {
5498
+ if (b.pageNumber !== void 0) pendingPages.delete(b.pageNumber);
5499
+ }
5500
+ for (const b of blocks2) allOcrBlocks.push(b);
5501
+ if (pendingPages.size > 0) {
5502
+ warnings.push({
5503
+ message: `OCR: '${mode}' \uC644\uB8CC (${pendingPages.size}\uD398\uC774\uC9C0 \uBBF8\uCC98\uB9AC \u2192 \uB2E4\uC74C \uC5D4\uC9C4\uC73C\uB85C \uC7AC\uC2DC\uB3C4)`,
5504
+ code: "OCR_CLI_FALLBACK"
5505
+ });
5506
+ }
5507
+ } else {
5508
+ warnings.push({ message: `OCR: '${mode}' \uACB0\uACFC \uC5C6\uC74C, \uB2E4\uC74C \uC5D4\uC9C4\uC73C\uB85C \uC2DC\uB3C4`, code: "OCR_CLI_FALLBACK" });
5492
5509
  }
5493
- warnings.push({ message: `OCR: '${mode}' \uACB0\uACFC \uC5C6\uC74C, \uB2E4\uC74C \uC5D4\uC9C4\uC73C\uB85C \uC2DC\uB3C4`, code: "OCR_CLI_FALLBACK" });
5494
5510
  } catch {
5495
5511
  }
5496
5512
  }
5513
+ allOcrBlocks.sort((a, b) => (a.pageNumber ?? 0) - (b.pageNumber ?? 0));
5514
+ ocrBlocks = allOcrBlocks;
5497
5515
  } else {
5498
5516
  try {
5499
5517
  const provider = await resolveOcrProvider(ocrMode, warnings, concurrency, batchSize);
5500
- ocrBlocks = await tryProvider(provider);
5518
+ ocrBlocks = await tryProvider(provider, pageFilter);
5501
5519
  } catch (resolveErr) {
5502
5520
  throw Object.assign(
5503
5521
  new KordocError(resolveErr instanceof Error ? resolveErr.message : "OCR \uD504\uB85C\uBC14\uC774\uB354 \uCD08\uAE30\uD654 \uC2E4\uD328"),
@@ -9795,4 +9813,4 @@ export {
9795
9813
  cfb/cfb.js:
9796
9814
  (*! crc32.js (C) 2014-present SheetJS -- http://sheetjs.com *)
9797
9815
  */
9798
- //# sourceMappingURL=chunk-5AXJRBBK.js.map
9816
+ //# sourceMappingURL=chunk-L2CLLZ4S.js.map