@clazic/kordoc 2.4.13 → 2.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/utils.ts
4
- var VERSION = true ? "2.4.12" : "0.0.0-dev";
4
+ var VERSION = true ? "2.4.13" : "0.0.0-dev";
5
5
  function toArrayBuffer(buf) {
6
6
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
7
7
  return buf.buffer;
@@ -105,4 +105,4 @@ export {
105
105
  classifyError,
106
106
  normalizeKordocError
107
107
  };
108
- //# sourceMappingURL=chunk-5R37N6KE.js.map
108
+ //# sourceMappingURL=chunk-X6NIA6BK.js.map
package/dist/cli.js CHANGED
@@ -4,12 +4,12 @@ import {
4
4
  markdownToHwpx,
5
5
  markdownToXlsx,
6
6
  parse
7
- } from "./chunk-UX75CBUO.js";
7
+ } from "./chunk-6CADPLGJ.js";
8
8
  import "./chunk-YW5G6BCJ.js";
9
9
  import {
10
10
  VERSION,
11
11
  toArrayBuffer
12
- } from "./chunk-5R37N6KE.js";
12
+ } from "./chunk-X6NIA6BK.js";
13
13
  import "./chunk-MOL7MDBG.js";
14
14
  import "./chunk-7FMKAV4P.js";
15
15
  import "./chunk-34WIGIQC.js";
@@ -177,7 +177,7 @@ async function runParse(files, opts) {
177
177
  saveImages(absPath);
178
178
  }
179
179
  } catch (err) {
180
- const { sanitizeError } = await import("./utils-XLLXVB7V.js");
180
+ const { sanitizeError } = await import("./utils-EK3CPEZG.js");
181
181
  process.stderr.write(`
182
182
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
183
183
  `);
@@ -259,7 +259,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
259
259
  `));
260
260
  }
261
261
  } catch (err) {
262
- const { sanitizeError } = await import("./utils-XLLXVB7V.js");
262
+ const { sanitizeError } = await import("./utils-EK3CPEZG.js");
263
263
  process.stderr.write(` FAIL
264
264
  `);
265
265
  process.stderr.write(` \u2192 ${sanitizeError(err)}
@@ -291,7 +291,7 @@ program.command("init-env").description("kordoc\uC6A9 .env \uD15C\uD50C\uB9BF \u
291
291
  }
292
292
  });
293
293
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
294
- const { watchDirectory } = await import("./watch-3MTAXFEA.js");
294
+ const { watchDirectory } = await import("./watch-NSWBVKQZ.js");
295
295
  await watchDirectory({
296
296
  dir,
297
297
  outDir: opts.outDir,
package/dist/index.cjs CHANGED
@@ -3138,7 +3138,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
3138
3138
  var import_xmldom = require("@xmldom/xmldom");
3139
3139
 
3140
3140
  // src/utils.ts
3141
- var VERSION = true ? "2.4.12" : "0.0.0-dev";
3141
+ var VERSION = true ? "2.4.13" : "0.0.0-dev";
3142
3142
  function toArrayBuffer(buf) {
3143
3143
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3144
3144
  return buf.buffer;
@@ -11450,47 +11450,31 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11450
11450
  logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
11451
11451
  const probeImage = await pickRepresentativeImage(images);
11452
11452
  let probeDone = 0;
11453
- const probeResultsByIndex = await mapWithConcurrency(models, probeConcurrency, async (model, index) => {
11454
- const t0 = Date.now();
11455
- try {
11456
- await ocrImageViaNim({
11457
- imagePath: probeImage,
11458
- prompt: OCR_PROMPT2,
11459
- model,
11460
- maxTokens: modelMaxTokens[model] ?? 8192,
11461
- baseUrl,
11462
- keyPool,
11463
- timeoutMs,
11464
- maxRetries: 2,
11465
- logger,
11466
- stage: "probe"
11467
- });
11468
- const result = { model, durationMs: Date.now() - t0, success: true };
11469
- probeDone += 1;
11470
- markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11471
- logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11472
- return result;
11473
- } catch (err) {
11474
- const result = {
11475
- model,
11476
- durationMs: Date.now() - t0,
11477
- success: false,
11478
- error: err instanceof Error ? err.message : String(err)
11479
- };
11453
+ const probeRuns = startParallelProbeRuns({
11454
+ models,
11455
+ probeConcurrency,
11456
+ probeImage,
11457
+ modelMaxTokens,
11458
+ baseUrl,
11459
+ keyPool,
11460
+ timeoutMs,
11461
+ logger,
11462
+ onProbeResult: ({ index, model, result }) => {
11480
11463
  probeDone += 1;
11481
11464
  markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11482
11465
  logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11483
- return result;
11484
11466
  }
11485
11467
  });
11486
- const probeResults = probeResultsByIndex;
11487
- const selectedModel = chooseFastestModel(probeResults);
11488
- if (!selectedModel) throw new UnifiedOcrError("PROBE_FAILED", "probe", "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.");
11489
- const fallbackModelOrder = probeResults.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs).map((r) => r.model);
11468
+ const selected = await probeRuns.firstSuccess;
11469
+ const selectedModel = selected.selectedModel;
11470
+ const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
11490
11471
  timingsMs.probe = Date.now() - probeStart;
11491
- await updateModelCache(modelCachePath, probeResults);
11492
11472
  markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
11493
- logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC", { selectedModel, probeResults, elapsedMs: timingsMs.probe, modelCachePath });
11473
+ logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
11474
+ const probeResultsPromise = probeRuns.allResults.then(async (results) => {
11475
+ await updateModelCache(modelCachePath, results);
11476
+ return results;
11477
+ });
11494
11478
  const ocrStart = Date.now();
11495
11479
  currentStage = "ocr";
11496
11480
  markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
@@ -11572,7 +11556,7 @@ ${rawMd}
11572
11556
  workspaceDir,
11573
11557
  selectedModel,
11574
11558
  probeImage,
11575
- probeResults,
11559
+ probeResults: await probeResultsPromise,
11576
11560
  pageCount: images.length,
11577
11561
  keyHealth: keyPool.snapshot(),
11578
11562
  timingsMs,
@@ -11713,9 +11697,56 @@ async function mapWithConcurrency(items, concurrency, mapper) {
11713
11697
  await Promise.all(Array.from({ length: workerCount }, () => worker()));
11714
11698
  return results;
11715
11699
  }
11716
- function chooseFastestModel(results) {
11717
- const ok = results.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs);
11718
- return ok[0]?.model ?? null;
11700
+ function startParallelProbeRuns(input) {
11701
+ let firstResolved = false;
11702
+ let doneCount = 0;
11703
+ let resolveFirst;
11704
+ let rejectFirst;
11705
+ const firstSuccess = new Promise((resolve4, reject) => {
11706
+ resolveFirst = resolve4;
11707
+ rejectFirst = reject;
11708
+ });
11709
+ let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
11710
+ const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
11711
+ const t0 = Date.now();
11712
+ try {
11713
+ await ocrImageViaNim({
11714
+ imagePath: input.probeImage,
11715
+ prompt: OCR_PROMPT2,
11716
+ model,
11717
+ maxTokens: input.modelMaxTokens[model] ?? 8192,
11718
+ baseUrl: input.baseUrl,
11719
+ keyPool: input.keyPool,
11720
+ timeoutMs: input.timeoutMs,
11721
+ maxRetries: 2,
11722
+ logger: input.logger,
11723
+ stage: "probe"
11724
+ });
11725
+ const result = { model, durationMs: Date.now() - t0, success: true };
11726
+ input.onProbeResult?.({ index, model, result });
11727
+ if (!firstResolved) {
11728
+ firstResolved = true;
11729
+ resolveFirst?.({ selectedModel: model, firstDurationMs: result.durationMs });
11730
+ }
11731
+ return result;
11732
+ } catch (err) {
11733
+ const result = {
11734
+ model,
11735
+ durationMs: Date.now() - t0,
11736
+ success: false,
11737
+ error: err instanceof Error ? err.message : String(err)
11738
+ };
11739
+ lastErr = result.error ?? lastErr;
11740
+ input.onProbeResult?.({ index, model, result });
11741
+ return result;
11742
+ } finally {
11743
+ doneCount += 1;
11744
+ if (doneCount === input.models.length && !firstResolved) {
11745
+ rejectFirst?.(new UnifiedOcrError("PROBE_FAILED", "probe", `\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: ${lastErr}`));
11746
+ }
11747
+ }
11748
+ });
11749
+ return { firstSuccess, allResults };
11719
11750
  }
11720
11751
  async function loadModelCache(path) {
11721
11752
  try {