@clazic/kordoc 2.4.12 → 2.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -433,6 +433,7 @@ interface UnifiedOcrOptions {
433
433
  stageWeights?: Partial<Record<UnifiedStage, number>>;
434
434
  timeoutMs?: number;
435
435
  maxRetriesPerPage?: number;
436
+ probeConcurrency?: number;
436
437
  logger?: Logger;
437
438
  runId?: string;
438
439
  }
package/dist/index.d.ts CHANGED
@@ -433,6 +433,7 @@ interface UnifiedOcrOptions {
433
433
  stageWeights?: Partial<Record<UnifiedStage, number>>;
434
434
  timeoutMs?: number;
435
435
  maxRetriesPerPage?: number;
436
+ probeConcurrency?: number;
436
437
  logger?: Logger;
437
438
  runId?: string;
438
439
  }
package/dist/index.js CHANGED
@@ -11373,6 +11373,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11373
11373
  const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
11374
11374
  const dpi = options.dpi ?? 300;
11375
11375
  const modelsInput = options.modelCandidates?.length ? options.modelCandidates : DEFAULT_MODELS;
11376
+ const probeConcurrency = Math.max(1, Math.floor(options.probeConcurrency ?? Math.min(3, modelsInput.length)));
11376
11377
  const modelCache = await loadModelCache(modelCachePath);
11377
11378
  const models = sortModelsByCache(modelsInput, modelCache);
11378
11379
  const modelMaxTokens = { ...DEFAULT_MODEL_MAX_TOKENS, ...options.modelMaxTokens ?? {} };
@@ -11423,11 +11424,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11423
11424
  const probeStart = Date.now();
11424
11425
  currentStage = "probe";
11425
11426
  markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
11426
- logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models });
11427
+ logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
11427
11428
  const probeImage = await pickRepresentativeImage(images);
11428
- const probeResults = [];
11429
- for (let i = 0; i < models.length; i++) {
11430
- const model = models[i];
11429
+ let probeDone = 0;
11430
+ const probeResultsByIndex = await mapWithConcurrency(models, probeConcurrency, async (model, index) => {
11431
11431
  const t0 = Date.now();
11432
11432
  try {
11433
11433
  await ocrImageViaNim({
@@ -11442,18 +11442,25 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11442
11442
  logger,
11443
11443
  stage: "probe"
11444
11444
  });
11445
- probeResults.push({ model, durationMs: Date.now() - t0, success: true });
11445
+ const result = { model, durationMs: Date.now() - t0, success: true };
11446
+ probeDone += 1;
11447
+ markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11448
+ logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11449
+ return result;
11446
11450
  } catch (err) {
11447
- probeResults.push({
11451
+ const result = {
11448
11452
  model,
11449
11453
  durationMs: Date.now() - t0,
11450
11454
  success: false,
11451
11455
  error: err instanceof Error ? err.message : String(err)
11452
- });
11456
+ };
11457
+ probeDone += 1;
11458
+ markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11459
+ logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11460
+ return result;
11453
11461
  }
11454
- markStageProgress("probe", Math.round((i + 1) / models.length * 100), i + 1, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${i + 1}/${models.length}`);
11455
- logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: i + 1, total: models.length, model, result: probeResults.at(-1) });
11456
- }
11462
+ });
11463
+ const probeResults = probeResultsByIndex;
11457
11464
  const selectedModel = chooseFastestModel(probeResults);
11458
11465
  if (!selectedModel) throw new UnifiedOcrError("PROBE_FAILED", "probe", "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.");
11459
11466
  const fallbackModelOrder = probeResults.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs).map((r) => r.model);
@@ -11668,6 +11675,21 @@ async function pickRepresentativeImage(images) {
11668
11675
  use.sort((a, b) => a.size - b.size);
11669
11676
  return use[Math.floor(use.length / 2)].path;
11670
11677
  }
11678
+ async function mapWithConcurrency(items, concurrency, mapper) {
11679
+ const results = new Array(items.length);
11680
+ let nextIndex = 0;
11681
+ async function worker() {
11682
+ while (true) {
11683
+ const idx = nextIndex;
11684
+ if (idx >= items.length) return;
11685
+ nextIndex += 1;
11686
+ results[idx] = await mapper(items[idx], idx);
11687
+ }
11688
+ }
11689
+ const workerCount = Math.max(1, Math.min(concurrency, items.length));
11690
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
11691
+ return results;
11692
+ }
11671
11693
  function chooseFastestModel(results) {
11672
11694
  const ok = results.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs);
11673
11695
  return ok[0]?.model ?? null;