@clazic/kordoc 2.4.13 → 2.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3115,7 +3115,7 @@ import JSZip2 from "jszip";
3115
3115
  import { DOMParser } from "@xmldom/xmldom";
3116
3116
 
3117
3117
  // src/utils.ts
3118
- var VERSION = true ? "2.4.12" : "0.0.0-dev";
3118
+ var VERSION = true ? "2.4.13" : "0.0.0-dev";
3119
3119
  function toArrayBuffer(buf) {
3120
3120
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3121
3121
  return buf.buffer;
@@ -11427,47 +11427,31 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11427
11427
  logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
11428
11428
  const probeImage = await pickRepresentativeImage(images);
11429
11429
  let probeDone = 0;
11430
- const probeResultsByIndex = await mapWithConcurrency(models, probeConcurrency, async (model, index) => {
11431
- const t0 = Date.now();
11432
- try {
11433
- await ocrImageViaNim({
11434
- imagePath: probeImage,
11435
- prompt: OCR_PROMPT2,
11436
- model,
11437
- maxTokens: modelMaxTokens[model] ?? 8192,
11438
- baseUrl,
11439
- keyPool,
11440
- timeoutMs,
11441
- maxRetries: 2,
11442
- logger,
11443
- stage: "probe"
11444
- });
11445
- const result = { model, durationMs: Date.now() - t0, success: true };
11446
- probeDone += 1;
11447
- markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11448
- logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11449
- return result;
11450
- } catch (err) {
11451
- const result = {
11452
- model,
11453
- durationMs: Date.now() - t0,
11454
- success: false,
11455
- error: err instanceof Error ? err.message : String(err)
11456
- };
11430
+ const probeRuns = startParallelProbeRuns({
11431
+ models,
11432
+ probeConcurrency,
11433
+ probeImage,
11434
+ modelMaxTokens,
11435
+ baseUrl,
11436
+ keyPool,
11437
+ timeoutMs,
11438
+ logger,
11439
+ onProbeResult: ({ index, model, result }) => {
11457
11440
  probeDone += 1;
11458
11441
  markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11459
11442
  logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11460
- return result;
11461
11443
  }
11462
11444
  });
11463
- const probeResults = probeResultsByIndex;
11464
- const selectedModel = chooseFastestModel(probeResults);
11465
- if (!selectedModel) throw new UnifiedOcrError("PROBE_FAILED", "probe", "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.");
11466
- const fallbackModelOrder = probeResults.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs).map((r) => r.model);
11445
+ const selected = await probeRuns.firstSuccess;
11446
+ const selectedModel = selected.selectedModel;
11447
+ const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
11467
11448
  timingsMs.probe = Date.now() - probeStart;
11468
- await updateModelCache(modelCachePath, probeResults);
11469
11449
  markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
11470
- logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC", { selectedModel, probeResults, elapsedMs: timingsMs.probe, modelCachePath });
11450
+ logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
11451
+ const probeResultsPromise = probeRuns.allResults.then(async (results) => {
11452
+ await updateModelCache(modelCachePath, results);
11453
+ return results;
11454
+ });
11471
11455
  const ocrStart = Date.now();
11472
11456
  currentStage = "ocr";
11473
11457
  markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
@@ -11549,7 +11533,7 @@ ${rawMd}
11549
11533
  workspaceDir,
11550
11534
  selectedModel,
11551
11535
  probeImage,
11552
- probeResults,
11536
+ probeResults: await probeResultsPromise,
11553
11537
  pageCount: images.length,
11554
11538
  keyHealth: keyPool.snapshot(),
11555
11539
  timingsMs,
@@ -11690,9 +11674,56 @@ async function mapWithConcurrency(items, concurrency, mapper) {
11690
11674
  await Promise.all(Array.from({ length: workerCount }, () => worker()));
11691
11675
  return results;
11692
11676
  }
11693
- function chooseFastestModel(results) {
11694
- const ok = results.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs);
11695
- return ok[0]?.model ?? null;
11677
+ function startParallelProbeRuns(input) {
11678
+ let firstResolved = false;
11679
+ let doneCount = 0;
11680
+ let resolveFirst;
11681
+ let rejectFirst;
11682
+ const firstSuccess = new Promise((resolve4, reject) => {
11683
+ resolveFirst = resolve4;
11684
+ rejectFirst = reject;
11685
+ });
11686
+ let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
11687
+ const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
11688
+ const t0 = Date.now();
11689
+ try {
11690
+ await ocrImageViaNim({
11691
+ imagePath: input.probeImage,
11692
+ prompt: OCR_PROMPT2,
11693
+ model,
11694
+ maxTokens: input.modelMaxTokens[model] ?? 8192,
11695
+ baseUrl: input.baseUrl,
11696
+ keyPool: input.keyPool,
11697
+ timeoutMs: input.timeoutMs,
11698
+ maxRetries: 2,
11699
+ logger: input.logger,
11700
+ stage: "probe"
11701
+ });
11702
+ const result = { model, durationMs: Date.now() - t0, success: true };
11703
+ input.onProbeResult?.({ index, model, result });
11704
+ if (!firstResolved) {
11705
+ firstResolved = true;
11706
+ resolveFirst?.({ selectedModel: model, firstDurationMs: result.durationMs });
11707
+ }
11708
+ return result;
11709
+ } catch (err) {
11710
+ const result = {
11711
+ model,
11712
+ durationMs: Date.now() - t0,
11713
+ success: false,
11714
+ error: err instanceof Error ? err.message : String(err)
11715
+ };
11716
+ lastErr = result.error ?? lastErr;
11717
+ input.onProbeResult?.({ index, model, result });
11718
+ return result;
11719
+ } finally {
11720
+ doneCount += 1;
11721
+ if (doneCount === input.models.length && !firstResolved) {
11722
+ rejectFirst?.(new UnifiedOcrError("PROBE_FAILED", "probe", `\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: ${lastErr}`));
11723
+ }
11724
+ }
11725
+ });
11726
+ return { firstSuccess, allResults };
11696
11727
  }
11697
11728
  async function loadModelCache(path) {
11698
11729
  try {