@clazic/kordoc 2.3.2 → 2.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-STIKJGEA.js → chunk-NU3KFVVZ.js} +2 -2
- package/dist/{chunk-2GFJFTKS.js → chunk-UDFKY7CH.js} +19 -8
- package/dist/chunk-UDFKY7CH.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/index.cjs +19 -8
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +19 -8
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{resolve-QA3VACUP.js → resolve-UOAOPQ4H.js} +3 -3
- package/dist/{resolve-QA3VACUP.js.map → resolve-UOAOPQ4H.js.map} +1 -1
- package/dist/{utils-FFUQJTTI.js → utils-STJT6CFC.js} +2 -2
- package/dist/{watch-2O32L6IF.js → watch-PRQGLOW3.js} +3 -3
- package/package.json +8 -8
- package/dist/chunk-2GFJFTKS.js.map +0 -1
- /package/dist/{chunk-STIKJGEA.js.map → chunk-NU3KFVVZ.js.map} +0 -0
- /package/dist/{utils-FFUQJTTI.js.map → utils-STJT6CFC.js.map} +0 -0
- /package/dist/{watch-2O32L6IF.js.map → watch-PRQGLOW3.js.map} +0 -0
package/dist/cli.js
CHANGED
|
@@ -4,11 +4,11 @@ import {
|
|
|
4
4
|
markdownToHwpx,
|
|
5
5
|
markdownToXlsx,
|
|
6
6
|
parse
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-UDFKY7CH.js";
|
|
8
8
|
import {
|
|
9
9
|
VERSION,
|
|
10
10
|
toArrayBuffer
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-NU3KFVVZ.js";
|
|
12
12
|
import "./chunk-MOL7MDBG.js";
|
|
13
13
|
import "./chunk-7FMKAV4P.js";
|
|
14
14
|
import "./chunk-JOGAFNIL.js";
|
|
@@ -137,7 +137,7 @@ async function runParse(files, opts) {
|
|
|
137
137
|
saveImages(absPath);
|
|
138
138
|
}
|
|
139
139
|
} catch (err) {
|
|
140
|
-
const { sanitizeError } = await import("./utils-
|
|
140
|
+
const { sanitizeError } = await import("./utils-STJT6CFC.js");
|
|
141
141
|
process.stderr.write(`
|
|
142
142
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
143
143
|
`);
|
|
@@ -221,7 +221,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
221
221
|
`));
|
|
222
222
|
}
|
|
223
223
|
} catch (err) {
|
|
224
|
-
const { sanitizeError } = await import("./utils-
|
|
224
|
+
const { sanitizeError } = await import("./utils-STJT6CFC.js");
|
|
225
225
|
process.stderr.write(` FAIL
|
|
226
226
|
`);
|
|
227
227
|
process.stderr.write(` \u2192 ${sanitizeError(err)}
|
|
@@ -230,7 +230,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
230
230
|
}
|
|
231
231
|
});
|
|
232
232
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
233
|
-
const { watchDirectory } = await import("./watch-
|
|
233
|
+
const { watchDirectory } = await import("./watch-PRQGLOW3.js");
|
|
234
234
|
await watchDirectory({
|
|
235
235
|
dir,
|
|
236
236
|
outDir: opts.outDir,
|
package/dist/index.cjs
CHANGED
|
@@ -2422,7 +2422,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
2422
2422
|
return createCliOcrProvider(mode);
|
|
2423
2423
|
}
|
|
2424
2424
|
const detected = detectAvailableOcr();
|
|
2425
|
-
if (detected !== "
|
|
2425
|
+
if (detected !== "codex") {
|
|
2426
2426
|
if (detected === "tesseract") {
|
|
2427
2427
|
warnings?.push({
|
|
2428
2428
|
message: getTesseractFallbackMessage(),
|
|
@@ -2430,7 +2430,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
|
|
|
2430
2430
|
});
|
|
2431
2431
|
} else {
|
|
2432
2432
|
warnings?.push({
|
|
2433
|
-
message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (
|
|
2433
|
+
message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (codex CLI\uAC00 \uC5C6\uC5B4 fallback). \uB354 \uB098\uC740 \uD488\uC9C8\uC744 \uC704\uD574 codex CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.`,
|
|
2434
2434
|
code: "OCR_CLI_FALLBACK"
|
|
2435
2435
|
});
|
|
2436
2436
|
}
|
|
@@ -2810,7 +2810,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
2810
2810
|
var import_xmldom = require("@xmldom/xmldom");
|
|
2811
2811
|
|
|
2812
2812
|
// src/utils.ts
|
|
2813
|
-
var VERSION = true ? "2.3.
|
|
2813
|
+
var VERSION = true ? "2.3.3" : "0.0.0-dev";
|
|
2814
2814
|
function toArrayBuffer(buf) {
|
|
2815
2815
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
2816
2816
|
return buf.buffer;
|
|
@@ -6338,15 +6338,26 @@ async function parsePdfDocument(buffer, options) {
|
|
|
6338
6338
|
warnings.push({ page: i, message: `\uD398\uC774\uC9C0 ${i} \uD30C\uC2F1 \uC2E4\uD328: ${pageErr instanceof Error ? pageErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
|
|
6339
6339
|
}
|
|
6340
6340
|
};
|
|
6341
|
-
const
|
|
6342
|
-
|
|
6341
|
+
const SAMPLE_SIZE = Math.min(10, targetPageNums.length);
|
|
6342
|
+
const sampledIndices = /* @__PURE__ */ new Set();
|
|
6343
|
+
if (targetPageNums.length <= SAMPLE_SIZE) {
|
|
6344
|
+
for (let i = 0; i < targetPageNums.length; i++) sampledIndices.add(i);
|
|
6345
|
+
} else {
|
|
6346
|
+
for (let i = 0; i < SAMPLE_SIZE; i++) {
|
|
6347
|
+
const idx = Math.round(i * (targetPageNums.length - 1) / (SAMPLE_SIZE - 1));
|
|
6348
|
+
sampledIndices.add(idx);
|
|
6349
|
+
}
|
|
6350
|
+
}
|
|
6351
|
+
for (const si of sampledIndices) {
|
|
6343
6352
|
await parseSinglePage(targetPageNums[si]);
|
|
6344
6353
|
}
|
|
6345
|
-
const sampleParsed = parsedPages ||
|
|
6354
|
+
const sampleParsed = parsedPages || sampledIndices.size;
|
|
6346
6355
|
const isImageBased = totalChars / Math.max(sampleParsed, 1) < 10;
|
|
6347
6356
|
if (!isImageBased) {
|
|
6348
|
-
for (let si =
|
|
6349
|
-
|
|
6357
|
+
for (let si = 0; si < targetPageNums.length; si++) {
|
|
6358
|
+
if (!sampledIndices.has(si)) {
|
|
6359
|
+
await parseSinglePage(targetPageNums[si]);
|
|
6360
|
+
}
|
|
6350
6361
|
}
|
|
6351
6362
|
}
|
|
6352
6363
|
const parsedPageCount = parsedPages || (pageFilter ? pageFilter.size : effectivePageCount);
|