@clazic/kordoc 2.4.12 → 2.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -433,6 +433,7 @@ interface UnifiedOcrOptions {
433
433
  stageWeights?: Partial<Record<UnifiedStage, number>>;
434
434
  timeoutMs?: number;
435
435
  maxRetriesPerPage?: number;
436
+ probeConcurrency?: number;
436
437
  logger?: Logger;
437
438
  runId?: string;
438
439
  }
package/dist/index.d.ts CHANGED
@@ -433,6 +433,7 @@ interface UnifiedOcrOptions {
433
433
  stageWeights?: Partial<Record<UnifiedStage, number>>;
434
434
  timeoutMs?: number;
435
435
  maxRetriesPerPage?: number;
436
+ probeConcurrency?: number;
436
437
  logger?: Logger;
437
438
  runId?: string;
438
439
  }
package/dist/index.js CHANGED
@@ -3115,7 +3115,7 @@ import JSZip2 from "jszip";
3115
3115
  import { DOMParser } from "@xmldom/xmldom";
3116
3116
 
3117
3117
  // src/utils.ts
3118
- var VERSION = true ? "2.4.12" : "0.0.0-dev";
3118
+ var VERSION = true ? "2.4.13" : "0.0.0-dev";
3119
3119
  function toArrayBuffer(buf) {
3120
3120
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3121
3121
  return buf.buffer;
@@ -11373,6 +11373,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11373
11373
  const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
11374
11374
  const dpi = options.dpi ?? 300;
11375
11375
  const modelsInput = options.modelCandidates?.length ? options.modelCandidates : DEFAULT_MODELS;
11376
+ const probeConcurrency = Math.max(1, Math.floor(options.probeConcurrency ?? Math.min(3, modelsInput.length)));
11376
11377
  const modelCache = await loadModelCache(modelCachePath);
11377
11378
  const models = sortModelsByCache(modelsInput, modelCache);
11378
11379
  const modelMaxTokens = { ...DEFAULT_MODEL_MAX_TOKENS, ...options.modelMaxTokens ?? {} };
@@ -11423,44 +11424,34 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11423
11424
  const probeStart = Date.now();
11424
11425
  currentStage = "probe";
11425
11426
  markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
11426
- logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models });
11427
+ logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
11427
11428
  const probeImage = await pickRepresentativeImage(images);
11428
- const probeResults = [];
11429
- for (let i = 0; i < models.length; i++) {
11430
- const model = models[i];
11431
- const t0 = Date.now();
11432
- try {
11433
- await ocrImageViaNim({
11434
- imagePath: probeImage,
11435
- prompt: OCR_PROMPT2,
11436
- model,
11437
- maxTokens: modelMaxTokens[model] ?? 8192,
11438
- baseUrl,
11439
- keyPool,
11440
- timeoutMs,
11441
- maxRetries: 2,
11442
- logger,
11443
- stage: "probe"
11444
- });
11445
- probeResults.push({ model, durationMs: Date.now() - t0, success: true });
11446
- } catch (err) {
11447
- probeResults.push({
11448
- model,
11449
- durationMs: Date.now() - t0,
11450
- success: false,
11451
- error: err instanceof Error ? err.message : String(err)
11452
- });
11429
+ let probeDone = 0;
11430
+ const probeRuns = startParallelProbeRuns({
11431
+ models,
11432
+ probeConcurrency,
11433
+ probeImage,
11434
+ modelMaxTokens,
11435
+ baseUrl,
11436
+ keyPool,
11437
+ timeoutMs,
11438
+ logger,
11439
+ onProbeResult: ({ index, model, result }) => {
11440
+ probeDone += 1;
11441
+ markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11442
+ logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11453
11443
  }
11454
- markStageProgress("probe", Math.round((i + 1) / models.length * 100), i + 1, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${i + 1}/${models.length}`);
11455
- logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: i + 1, total: models.length, model, result: probeResults.at(-1) });
11456
- }
11457
- const selectedModel = chooseFastestModel(probeResults);
11458
- if (!selectedModel) throw new UnifiedOcrError("PROBE_FAILED", "probe", "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.");
11459
- const fallbackModelOrder = probeResults.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs).map((r) => r.model);
11444
+ });
11445
+ const selected = await probeRuns.firstSuccess;
11446
+ const selectedModel = selected.selectedModel;
11447
+ const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
11460
11448
  timingsMs.probe = Date.now() - probeStart;
11461
- await updateModelCache(modelCachePath, probeResults);
11462
11449
  markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
11463
- logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC", { selectedModel, probeResults, elapsedMs: timingsMs.probe, modelCachePath });
11450
+ logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
11451
+ const probeResultsPromise = probeRuns.allResults.then(async (results) => {
11452
+ await updateModelCache(modelCachePath, results);
11453
+ return results;
11454
+ });
11464
11455
  const ocrStart = Date.now();
11465
11456
  currentStage = "ocr";
11466
11457
  markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
@@ -11542,7 +11533,7 @@ ${rawMd}
11542
11533
  workspaceDir,
11543
11534
  selectedModel,
11544
11535
  probeImage,
11545
- probeResults,
11536
+ probeResults: await probeResultsPromise,
11546
11537
  pageCount: images.length,
11547
11538
  keyHealth: keyPool.snapshot(),
11548
11539
  timingsMs,
@@ -11668,9 +11659,71 @@ async function pickRepresentativeImage(images) {
11668
11659
  use.sort((a, b) => a.size - b.size);
11669
11660
  return use[Math.floor(use.length / 2)].path;
11670
11661
  }
11671
- function chooseFastestModel(results) {
11672
- const ok = results.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs);
11673
- return ok[0]?.model ?? null;
11662
+ async function mapWithConcurrency(items, concurrency, mapper) {
11663
+ const results = new Array(items.length);
11664
+ let nextIndex = 0;
11665
+ async function worker() {
11666
+ while (true) {
11667
+ const idx = nextIndex;
11668
+ if (idx >= items.length) return;
11669
+ nextIndex += 1;
11670
+ results[idx] = await mapper(items[idx], idx);
11671
+ }
11672
+ }
11673
+ const workerCount = Math.max(1, Math.min(concurrency, items.length));
11674
+ await Promise.all(Array.from({ length: workerCount }, () => worker()));
11675
+ return results;
11676
+ }
11677
+ function startParallelProbeRuns(input) {
11678
+ let firstResolved = false;
11679
+ let doneCount = 0;
11680
+ let resolveFirst;
11681
+ let rejectFirst;
11682
+ const firstSuccess = new Promise((resolve4, reject) => {
11683
+ resolveFirst = resolve4;
11684
+ rejectFirst = reject;
11685
+ });
11686
+ let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
11687
+ const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
11688
+ const t0 = Date.now();
11689
+ try {
11690
+ await ocrImageViaNim({
11691
+ imagePath: input.probeImage,
11692
+ prompt: OCR_PROMPT2,
11693
+ model,
11694
+ maxTokens: input.modelMaxTokens[model] ?? 8192,
11695
+ baseUrl: input.baseUrl,
11696
+ keyPool: input.keyPool,
11697
+ timeoutMs: input.timeoutMs,
11698
+ maxRetries: 2,
11699
+ logger: input.logger,
11700
+ stage: "probe"
11701
+ });
11702
+ const result = { model, durationMs: Date.now() - t0, success: true };
11703
+ input.onProbeResult?.({ index, model, result });
11704
+ if (!firstResolved) {
11705
+ firstResolved = true;
11706
+ resolveFirst?.({ selectedModel: model, firstDurationMs: result.durationMs });
11707
+ }
11708
+ return result;
11709
+ } catch (err) {
11710
+ const result = {
11711
+ model,
11712
+ durationMs: Date.now() - t0,
11713
+ success: false,
11714
+ error: err instanceof Error ? err.message : String(err)
11715
+ };
11716
+ lastErr = result.error ?? lastErr;
11717
+ input.onProbeResult?.({ index, model, result });
11718
+ return result;
11719
+ } finally {
11720
+ doneCount += 1;
11721
+ if (doneCount === input.models.length && !firstResolved) {
11722
+ rejectFirst?.(new UnifiedOcrError("PROBE_FAILED", "probe", `\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: ${lastErr}`));
11723
+ }
11724
+ }
11725
+ });
11726
+ return { firstSuccess, allResults };
11674
11727
  }
11675
11728
  async function loadModelCache(path) {
11676
11729
  try {