@clazic/kordoc 2.3.2 → 2.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-STIKJGEA.js → chunk-NU3KFVVZ.js} +2 -2
- package/dist/{chunk-2GFJFTKS.js → chunk-UDFKY7CH.js} +19 -8
- package/dist/chunk-UDFKY7CH.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/index.cjs +19 -8
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +19 -8
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{resolve-QA3VACUP.js → resolve-UOAOPQ4H.js} +3 -3
- package/dist/{resolve-QA3VACUP.js.map → resolve-UOAOPQ4H.js.map} +1 -1
- package/dist/{utils-FFUQJTTI.js → utils-STJT6CFC.js} +2 -2
- package/dist/{watch-2O32L6IF.js → watch-PRQGLOW3.js} +3 -3
- package/package.json +8 -8
- package/dist/chunk-2GFJFTKS.js.map +0 -1
- /package/dist/{chunk-STIKJGEA.js.map → chunk-NU3KFVVZ.js.map} +0 -0
- /package/dist/{utils-FFUQJTTI.js.map → utils-STJT6CFC.js.map} +0 -0
- /package/dist/{watch-2O32L6IF.js.map → watch-PRQGLOW3.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -2425,7 +2425,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
2425
2425
|
return createCliOcrProvider(mode);
|
|
2426
2426
|
}
|
|
2427
2427
|
const detected = detectAvailableOcr();
|
|
2428
|
-
if (detected !== "
|
|
2428
|
+
if (detected !== "codex") {
|
|
2429
2429
|
if (detected === "tesseract") {
|
|
2430
2430
|
warnings?.push({
|
|
2431
2431
|
message: getTesseractFallbackMessage(),
|
|
@@ -2433,7 +2433,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
2433
2433
|
});
|
|
2434
2434
|
} else {
|
|
2435
2435
|
warnings?.push({
|
|
2436
|
-
message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (
|
|
2436
|
+
message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (codex CLI\uAC00 \uC5C6\uC5B4 fallback). \uB354 \uB098\uC740 \uD488\uC9C8\uC744 \uC704\uD574 codex CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.`,
|
|
2437
2437
|
code: "OCR_CLI_FALLBACK"
|
|
2438
2438
|
});
|
|
2439
2439
|
}
|
|
@@ -2790,7 +2790,7 @@ import JSZip2 from "jszip";
|
|
|
2790
2790
|
import { DOMParser } from "@xmldom/xmldom";
|
|
2791
2791
|
|
|
2792
2792
|
// src/utils.ts
|
|
2793
|
-
var VERSION = true ? "2.3.
|
|
2793
|
+
var VERSION = true ? "2.3.3" : "0.0.0-dev";
|
|
2794
2794
|
function toArrayBuffer(buf) {
|
|
2795
2795
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
2796
2796
|
return buf.buffer;
|
|
@@ -6318,15 +6318,26 @@ async function parsePdfDocument(buffer, options) {
|
|
|
6318
6318
|
warnings.push({ page: i, message: `\uD398\uC774\uC9C0 ${i} \uD30C\uC2F1 \uC2E4\uD328: ${pageErr instanceof Error ? pageErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
6319
6319
|
}
|
|
6320
6320
|
};
|
|
6321
|
-
const
|
|
6322
|
-
|
|
6321
|
+
const SAMPLE_SIZE = Math.min(10, targetPageNums.length);
|
|
6322
|
+
const sampledIndices = /* @__PURE__ */ new Set();
|
|
6323
|
+
if (targetPageNums.length <= SAMPLE_SIZE) {
|
|
6324
|
+
for (let i = 0; i < targetPageNums.length; i++) sampledIndices.add(i);
|
|
6325
|
+
} else {
|
|
6326
|
+
for (let i = 0; i < SAMPLE_SIZE; i++) {
|
|
6327
|
+
const idx = Math.round(i * (targetPageNums.length - 1) / (SAMPLE_SIZE - 1));
|
|
6328
|
+
sampledIndices.add(idx);
|
|
6329
|
+
}
|
|
6330
|
+
}
|
|
6331
|
+
for (const si of sampledIndices) {
|
|
6323
6332
|
await parseSinglePage(targetPageNums[si]);
|
|
6324
6333
|
}
|
|
6325
|
-
const sampleParsed = parsedPages ||
|
|
6334
|
+
const sampleParsed = parsedPages || sampledIndices.size;
|
|
6326
6335
|
const isImageBased = totalChars / Math.max(sampleParsed, 1) < 10;
|
|
6327
6336
|
if (!isImageBased) {
|
|
6328
|
-
for (let si =
|
|
6329
|
-
|
|
6337
|
+
for (let si = 0; si < targetPageNums.length; si++) {
|
|
6338
|
+
if (!sampledIndices.has(si)) {
|
|
6339
|
+
await parseSinglePage(targetPageNums[si]);
|
|
6340
|
+
}
|
|
6330
6341
|
}
|
|
6331
6342
|
}
|
|
6332
6343
|
const parsedPageCount = parsedPages || (pageFilter ? pageFilter.size : effectivePageCount);
|