@clazic/kordoc 2.4.12 → 2.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/utils.ts
4
- var VERSION = true ? "2.4.12" : "0.0.0-dev";
4
+ var VERSION = true ? "2.4.13" : "0.0.0-dev";
5
5
  function toArrayBuffer(buf) {
6
6
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
7
7
  return buf.buffer;
@@ -105,4 +105,4 @@ export {
105
105
  classifyError,
106
106
  normalizeKordocError
107
107
  };
108
- //# sourceMappingURL=chunk-5R37N6KE.js.map
108
+ //# sourceMappingURL=chunk-X6NIA6BK.js.map
package/dist/cli.js CHANGED
@@ -4,12 +4,12 @@ import {
4
4
  markdownToHwpx,
5
5
  markdownToXlsx,
6
6
  parse
7
- } from "./chunk-KJEZPVEK.js";
7
+ } from "./chunk-6CADPLGJ.js";
8
8
  import "./chunk-YW5G6BCJ.js";
9
9
  import {
10
10
  VERSION,
11
11
  toArrayBuffer
12
- } from "./chunk-5R37N6KE.js";
12
+ } from "./chunk-X6NIA6BK.js";
13
13
  import "./chunk-MOL7MDBG.js";
14
14
  import "./chunk-7FMKAV4P.js";
15
15
  import "./chunk-34WIGIQC.js";
@@ -177,7 +177,7 @@ async function runParse(files, opts) {
177
177
  saveImages(absPath);
178
178
  }
179
179
  } catch (err) {
180
- const { sanitizeError } = await import("./utils-XLLXVB7V.js");
180
+ const { sanitizeError } = await import("./utils-EK3CPEZG.js");
181
181
  process.stderr.write(`
182
182
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
183
183
  `);
@@ -259,7 +259,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
259
259
  `));
260
260
  }
261
261
  } catch (err) {
262
- const { sanitizeError } = await import("./utils-XLLXVB7V.js");
262
+ const { sanitizeError } = await import("./utils-EK3CPEZG.js");
263
263
  process.stderr.write(` FAIL
264
264
  `);
265
265
  process.stderr.write(` \u2192 ${sanitizeError(err)}
@@ -291,7 +291,7 @@ program.command("init-env").description("kordoc\uC6A9 .env \uD15C\uD50C\uB9BF \u
291
291
  }
292
292
  });
293
293
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
294
- const { watchDirectory } = await import("./watch-SOMS2KR7.js");
294
+ const { watchDirectory } = await import("./watch-NSWBVKQZ.js");
295
295
  await watchDirectory({
296
296
  dir,
297
297
  outDir: opts.outDir,
package/dist/index.cjs CHANGED
@@ -3138,7 +3138,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
3138
3138
  var import_xmldom = require("@xmldom/xmldom");
3139
3139
 
3140
3140
  // src/utils.ts
3141
- var VERSION = true ? "2.4.12" : "0.0.0-dev";
3141
+ var VERSION = true ? "2.4.13" : "0.0.0-dev";
3142
3142
  function toArrayBuffer(buf) {
3143
3143
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3144
3144
  return buf.buffer;
@@ -11396,6 +11396,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11396
11396
  const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
11397
11397
  const dpi = options.dpi ?? 300;
11398
11398
  const modelsInput = options.modelCandidates?.length ? options.modelCandidates : DEFAULT_MODELS;
11399
+ const probeConcurrency = Math.max(1, Math.floor(options.probeConcurrency ?? Math.min(3, modelsInput.length)));
11399
11400
  const modelCache = await loadModelCache(modelCachePath);
11400
11401
  const models = sortModelsByCache(modelsInput, modelCache);
11401
11402
  const modelMaxTokens = { ...DEFAULT_MODEL_MAX_TOKENS, ...options.modelMaxTokens ?? {} };
@@ -11446,44 +11447,34 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11446
11447
  const probeStart = Date.now();
11447
11448
  currentStage = "probe";
11448
11449
  markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
11449
- logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models });
11450
+ logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
11450
11451
  const probeImage = await pickRepresentativeImage(images);
11451
- const probeResults = [];
11452
- for (let i = 0; i < models.length; i++) {
11453
- const model = models[i];
11454
- const t0 = Date.now();
11455
- try {
11456
- await ocrImageViaNim({
11457
- imagePath: probeImage,
11458
- prompt: OCR_PROMPT2,
11459
- model,
11460
- maxTokens: modelMaxTokens[model] ?? 8192,
11461
- baseUrl,
11462
- keyPool,
11463
- timeoutMs,
11464
- maxRetries: 2,
11465
- logger,
11466
- stage: "probe"
11467
- });
11468
- probeResults.push({ model, durationMs: Date.now() - t0, success: true });
11469
- } catch (err) {
11470
- probeResults.push({
11471
- model,
11472
- durationMs: Date.now() - t0,
11473
- success: false,
11474
- error: err instanceof Error ? err.message : String(err)
11475
- });
11452
+ let probeDone = 0;
11453
+ const probeRuns = startParallelProbeRuns({
11454
+ models,
11455
+ probeConcurrency,
11456
+ probeImage,
11457
+ modelMaxTokens,
11458
+ baseUrl,
11459
+ keyPool,
11460
+ timeoutMs,
11461
+ logger,
11462
+ onProbeResult: ({ index, model, result }) => {
11463
+ probeDone += 1;
11464
+ markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11465
+ logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11476
11466
  }
11477
- markStageProgress("probe", Math.round((i + 1) / models.length * 100), i + 1, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${i + 1}/${models.length}`);
11478
- logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: i + 1, total: models.length, model, result: probeResults.at(-1) });
11479
- }
11480
- const selectedModel = chooseFastestModel(probeResults);
11481
- if (!selectedModel) throw new UnifiedOcrError("PROBE_FAILED", "probe", "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.");
11482
- const fallbackModelOrder = probeResults.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs).map((r) => r.model);
11467
+ });
11468
+ const selected = await probeRuns.firstSuccess;
11469
+ const selectedModel = selected.selectedModel;
11470
+ const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
11483
11471
  timingsMs.probe = Date.now() - probeStart;
11484
- await updateModelCache(modelCachePath, probeResults);
11485
11472
  markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
11486
- logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC", { selectedModel, probeResults, elapsedMs: timingsMs.probe, modelCachePath });
11473
+ logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
11474
+ const probeResultsPromise = probeRuns.allResults.then(async (results) => {
11475
+ await updateModelCache(modelCachePath, results);
11476
+ return results;
11477
+ });
11487
11478
  const ocrStart = Date.now();
11488
11479
  currentStage = "ocr";
11489
11480
  markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
@@ -11565,7 +11556,7 @@ ${rawMd}
11565
11556
  workspaceDir,
11566
11557
  selectedModel,
11567
11558
  probeImage,
11568
- probeResults,
11559
+ probeResults: await probeResultsPromise,
11569
11560
  pageCount: images.length,
11570
11561
  keyHealth: keyPool.snapshot(),
11571
11562
  timingsMs,
@@ -11691,9 +11682,71 @@ async function pickRepresentativeImage(images) {
11691
11682
  use.sort((a, b) => a.size - b.size);
11692
11683
  return use[Math.floor(use.length / 2)].path;
11693
11684
  }
11694
- function chooseFastestModel(results) {
11695
- const ok = results.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs);
11696
- return ok[0]?.model ?? null;
11685
+ async function mapWithConcurrency(items, concurrency, mapper) {
11686
+ const results = new Array(items.length);
11687
+ let nextIndex = 0;
11688
+ async function worker() {
11689
+ while (true) {
11690
+ const idx = nextIndex;
11691
+ if (idx >= items.length) return;
11692
+ nextIndex += 1;
11693
+ results[idx] = await mapper(items[idx], idx);
11694
+ }
11695
+ }
11696
+ const workerCount = Math.max(1, Math.min(concurrency, items.length));
11697
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
11698
+ return results;
11699
+ }
11700
+ function startParallelProbeRuns(input) {
11701
+ let firstResolved = false;
11702
+ let doneCount = 0;
11703
+ let resolveFirst;
11704
+ let rejectFirst;
11705
+ const firstSuccess = new Promise((resolve4, reject) => {
11706
+ resolveFirst = resolve4;
11707
+ rejectFirst = reject;
11708
+ });
11709
+ let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
11710
+ const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
11711
+ const t0 = Date.now();
11712
+ try {
11713
+ await ocrImageViaNim({
11714
+ imagePath: input.probeImage,
11715
+ prompt: OCR_PROMPT2,
11716
+ model,
11717
+ maxTokens: input.modelMaxTokens[model] ?? 8192,
11718
+ baseUrl: input.baseUrl,
11719
+ keyPool: input.keyPool,
11720
+ timeoutMs: input.timeoutMs,
11721
+ maxRetries: 2,
11722
+ logger: input.logger,
11723
+ stage: "probe"
11724
+ });
11725
+ const result = { model, durationMs: Date.now() - t0, success: true };
11726
+ input.onProbeResult?.({ index, model, result });
11727
+ if (!firstResolved) {
11728
+ firstResolved = true;
11729
+ resolveFirst?.({ selectedModel: model, firstDurationMs: result.durationMs });
11730
+ }
11731
+ return result;
11732
+ } catch (err) {
11733
+ const result = {
11734
+ model,
11735
+ durationMs: Date.now() - t0,
11736
+ success: false,
11737
+ error: err instanceof Error ? err.message : String(err)
11738
+ };
11739
+ lastErr = result.error ?? lastErr;
11740
+ input.onProbeResult?.({ index, model, result });
11741
+ return result;
11742
+ } finally {
11743
+ doneCount += 1;
11744
+ if (doneCount === input.models.length && !firstResolved) {
11745
+ rejectFirst?.(new UnifiedOcrError("PROBE_FAILED", "probe", `\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: ${lastErr}`));
11746
+ }
11747
+ }
11748
+ });
11749
+ return { firstSuccess, allResults };
11697
11750
  }
11698
11751
  async function loadModelCache(path) {
11699
11752
  try {