@clazic/kordoc 2.3.2 → 2.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -4,11 +4,11 @@ import {
4
4
  markdownToHwpx,
5
5
  markdownToXlsx,
6
6
  parse
7
- } from "./chunk-2GFJFTKS.js";
7
+ } from "./chunk-UDFKY7CH.js";
8
8
  import {
9
9
  VERSION,
10
10
  toArrayBuffer
11
- } from "./chunk-STIKJGEA.js";
11
+ } from "./chunk-NU3KFVVZ.js";
12
12
  import "./chunk-MOL7MDBG.js";
13
13
  import "./chunk-7FMKAV4P.js";
14
14
  import "./chunk-JOGAFNIL.js";
@@ -137,7 +137,7 @@ async function runParse(files, opts) {
137
137
  saveImages(absPath);
138
138
  }
139
139
  } catch (err) {
140
- const { sanitizeError } = await import("./utils-FFUQJTTI.js");
140
+ const { sanitizeError } = await import("./utils-STJT6CFC.js");
141
141
  process.stderr.write(`
142
142
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
143
143
  `);
@@ -221,7 +221,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
221
221
  `));
222
222
  }
223
223
  } catch (err) {
224
- const { sanitizeError } = await import("./utils-FFUQJTTI.js");
224
+ const { sanitizeError } = await import("./utils-STJT6CFC.js");
225
225
  process.stderr.write(` FAIL
226
226
  `);
227
227
  process.stderr.write(` \u2192 ${sanitizeError(err)}
@@ -230,7 +230,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
230
230
  }
231
231
  });
232
232
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
233
- const { watchDirectory } = await import("./watch-2O32L6IF.js");
233
+ const { watchDirectory } = await import("./watch-PRQGLOW3.js");
234
234
  await watchDirectory({
235
235
  dir,
236
236
  outDir: opts.outDir,
package/dist/index.cjs CHANGED
@@ -2422,7 +2422,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
2422
2422
  return createCliOcrProvider(mode);
2423
2423
  }
2424
2424
  const detected = detectAvailableOcr();
2425
- if (detected !== "gemini") {
2425
+ if (detected !== "codex") {
2426
2426
  if (detected === "tesseract") {
2427
2427
  warnings?.push({
2428
2428
  message: getTesseractFallbackMessage(),
@@ -2430,7 +2430,7 @@ async function resolveOcrProvider(mode, warnings, concurrency, batchSize) {
2430
2430
  });
2431
2431
  } else {
2432
2432
  warnings?.push({
2433
- message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (gemini CLI\uAC00 \uC5C6\uC5B4 fallback). \uB354 \uB098\uC740 \uD488\uC9C8\uC744 \uC704\uD574 gemini CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.`,
2433
+ message: `OCR: '${detected}' \uC0AC\uC6A9 \uC911 (codex CLI\uAC00 \uC5C6\uC5B4 fallback). \uB354 \uB098\uC740 \uD488\uC9C8\uC744 \uC704\uD574 codex CLI \uC124\uCE58\uB97C \uAD8C\uC7A5\uD569\uB2C8\uB2E4.`,
2434
2434
  code: "OCR_CLI_FALLBACK"
2435
2435
  });
2436
2436
  }
@@ -2810,7 +2810,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
2810
2810
  var import_xmldom = require("@xmldom/xmldom");
2811
2811
 
2812
2812
  // src/utils.ts
2813
- var VERSION = true ? "2.3.2" : "0.0.0-dev";
2813
+ var VERSION = true ? "2.3.3" : "0.0.0-dev";
2814
2814
  function toArrayBuffer(buf) {
2815
2815
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
2816
2816
  return buf.buffer;
@@ -6338,15 +6338,26 @@ async function parsePdfDocument(buffer, options) {
6338
6338
  warnings.push({ page: i, message: `\uD398\uC774\uC9C0 ${i} \uD30C\uC2F1 \uC2E4\uD328: ${pageErr instanceof Error ? pageErr.message : "\uC54C \uC218 \uC5C6\uB294 \uC624\uB958"}`, code: "PARTIAL_PARSE" });
6339
6339
  }
6340
6340
  };
6341
- const sampleCount = Math.min(5, targetPageNums.length);
6342
- for (let si = 0; si < sampleCount; si++) {
6341
+ const SAMPLE_SIZE = Math.min(10, targetPageNums.length);
6342
+ const sampledIndices = /* @__PURE__ */ new Set();
6343
+ if (targetPageNums.length <= SAMPLE_SIZE) {
6344
+ for (let i = 0; i < targetPageNums.length; i++) sampledIndices.add(i);
6345
+ } else {
6346
+ for (let i = 0; i < SAMPLE_SIZE; i++) {
6347
+ const idx = Math.round(i * (targetPageNums.length - 1) / (SAMPLE_SIZE - 1));
6348
+ sampledIndices.add(idx);
6349
+ }
6350
+ }
6351
+ for (const si of sampledIndices) {
6343
6352
  await parseSinglePage(targetPageNums[si]);
6344
6353
  }
6345
- const sampleParsed = parsedPages || sampleCount;
6354
+ const sampleParsed = parsedPages || sampledIndices.size;
6346
6355
  const isImageBased = totalChars / Math.max(sampleParsed, 1) < 10;
6347
6356
  if (!isImageBased) {
6348
- for (let si = sampleCount; si < targetPageNums.length; si++) {
6349
- await parseSinglePage(targetPageNums[si]);
6357
+ for (let si = 0; si < targetPageNums.length; si++) {
6358
+ if (!sampledIndices.has(si)) {
6359
+ await parseSinglePage(targetPageNums[si]);
6360
+ }
6350
6361
  }
6351
6362
  }
6352
6363
  const parsedPageCount = parsedPages || (pageFilter ? pageFilter.size : effectivePageCount);