@clazic/kordoc 2.4.13 → 2.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3115,7 +3115,7 @@ import JSZip2 from "jszip";
3115
3115
  import { DOMParser } from "@xmldom/xmldom";
3116
3116
 
3117
3117
  // src/utils.ts
3118
- var VERSION = true ? "2.4.12" : "0.0.0-dev";
3118
+ var VERSION = true ? "2.4.14" : "0.0.0-dev";
3119
3119
  function toArrayBuffer(buf) {
3120
3120
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3121
3121
  return buf.buffer;
@@ -11309,6 +11309,7 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
11309
11309
  import { mkdir, readdir, readFile, stat, writeFile } from "fs/promises";
11310
11310
  import { basename as basename2, dirname as dirname3, extname, join as join4, resolve as resolve3 } from "path";
11311
11311
  import { spawn as spawn2 } from "child_process";
11312
+ import { performance } from "perf_hooks";
11312
11313
  import libre from "libreoffice-convert";
11313
11314
  init_logger();
11314
11315
  var libreConvert = libre.convert;
@@ -11357,6 +11358,9 @@ var PROOFREAD_PROMPT = [
11357
11358
  "- \uC624\uD0C8\uC790, \uB744\uC5B4\uC4F0\uAE30, \uC904\uBC14\uAFC8, Markdown \uAD6C\uC870\uB9CC \uAD50\uC815",
11358
11359
  "- \uACB0\uACFC\uB294 Markdown \uBCF8\uBB38\uB9CC \uCD9C\uB825"
11359
11360
  ].join("\n");
11361
+ function elapsedMs(startAt) {
11362
+ return Math.round(performance.now() - startAt);
11363
+ }
11360
11364
  async function runUnifiedOcrPipeline(inputPath, options = {}) {
11361
11365
  const absInput = resolve3(inputPath);
11362
11366
  const stem = basename2(absInput, extname(absInput));
@@ -11396,7 +11400,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11396
11400
  try {
11397
11401
  ensureSupportedInput(absInput);
11398
11402
  let workingPdfPath = absInput;
11399
- const convertStart = Date.now();
11403
+ const convertStart = performance.now();
11400
11404
  currentStage = "convert";
11401
11405
  markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
11402
11406
  logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
@@ -11407,10 +11411,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11407
11411
  const out = await convertWithLibreOffice(inputBuffer, ".pdf");
11408
11412
  await writeFile(workingPdfPath, out);
11409
11413
  }
11410
- timingsMs.convert = Date.now() - convertStart;
11414
+ timingsMs.convert = elapsedMs(convertStart);
11411
11415
  markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
11412
11416
  logStage("info", "convert", "done", "PDF \uBCC0\uD658 \uC644\uB8CC", { elapsedMs: timingsMs.convert });
11413
- const renderStart = Date.now();
11417
+ const renderStart = performance.now();
11414
11418
  currentStage = "render";
11415
11419
  markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
11416
11420
  logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi });
@@ -11418,57 +11422,41 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11418
11422
  const images = await listPageImages(imagesDir);
11419
11423
  if (images.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328: \uACB0\uACFC \uC774\uBBF8\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
11420
11424
  markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
11421
- timingsMs.render = Date.now() - renderStart;
11425
+ timingsMs.render = elapsedMs(renderStart);
11422
11426
  markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
11423
11427
  logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", { pages: images.length, elapsedMs: timingsMs.render });
11424
- const probeStart = Date.now();
11428
+ const probeStart = performance.now();
11425
11429
  currentStage = "probe";
11426
11430
  markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
11427
11431
  logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
11428
11432
  const probeImage = await pickRepresentativeImage(images);
11429
11433
  let probeDone = 0;
11430
- const probeResultsByIndex = await mapWithConcurrency(models, probeConcurrency, async (model, index) => {
11431
- const t0 = Date.now();
11432
- try {
11433
- await ocrImageViaNim({
11434
- imagePath: probeImage,
11435
- prompt: OCR_PROMPT2,
11436
- model,
11437
- maxTokens: modelMaxTokens[model] ?? 8192,
11438
- baseUrl,
11439
- keyPool,
11440
- timeoutMs,
11441
- maxRetries: 2,
11442
- logger,
11443
- stage: "probe"
11444
- });
11445
- const result = { model, durationMs: Date.now() - t0, success: true };
11446
- probeDone += 1;
11447
- markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11448
- logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11449
- return result;
11450
- } catch (err) {
11451
- const result = {
11452
- model,
11453
- durationMs: Date.now() - t0,
11454
- success: false,
11455
- error: err instanceof Error ? err.message : String(err)
11456
- };
11434
+ const probeRuns = startParallelProbeRuns({
11435
+ models,
11436
+ probeConcurrency,
11437
+ probeImage,
11438
+ modelMaxTokens,
11439
+ baseUrl,
11440
+ keyPool,
11441
+ timeoutMs,
11442
+ logger,
11443
+ onProbeResult: ({ index, model, result }) => {
11457
11444
  probeDone += 1;
11458
11445
  markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11459
11446
  logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11460
- return result;
11461
11447
  }
11462
11448
  });
11463
- const probeResults = probeResultsByIndex;
11464
- const selectedModel = chooseFastestModel(probeResults);
11465
- if (!selectedModel) throw new UnifiedOcrError("PROBE_FAILED", "probe", "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.");
11466
- const fallbackModelOrder = probeResults.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs).map((r) => r.model);
11467
- timingsMs.probe = Date.now() - probeStart;
11468
- await updateModelCache(modelCachePath, probeResults);
11449
+ const selected = await probeRuns.firstSuccess;
11450
+ const selectedModel = selected.selectedModel;
11451
+ const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
11452
+ timingsMs.probe = elapsedMs(probeStart);
11469
11453
  markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
11470
- logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC", { selectedModel, probeResults, elapsedMs: timingsMs.probe, modelCachePath });
11471
- const ocrStart = Date.now();
11454
+ logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
11455
+ const probeResultsPromise = probeRuns.allResults.then(async (results) => {
11456
+ await updateModelCache(modelCachePath, results);
11457
+ return results;
11458
+ });
11459
+ const ocrStart = performance.now();
11472
11460
  currentStage = "ocr";
11473
11461
  markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
11474
11462
  logStage("info", "ocr", "start", "\uD398\uC774\uC9C0 OCR \uC2DC\uC791", { selectedModel, pageCount: images.length });
@@ -11492,10 +11480,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11492
11480
  markStageProgress("ocr", Math.round((i + 1) / images.length * 100), i + 1, images.length, `OCR ${i + 1}/${images.length}`);
11493
11481
  logStage("debug", "ocr", "progress", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { page: i + 1, total: images.length });
11494
11482
  }
11495
- timingsMs.ocr = Date.now() - ocrStart;
11483
+ timingsMs.ocr = elapsedMs(ocrStart);
11496
11484
  markStageDone("ocr", "OCR \uC644\uB8CC");
11497
11485
  logStage("info", "ocr", "done", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { elapsedMs: timingsMs.ocr });
11498
- const proofStart = Date.now();
11486
+ const proofStart = performance.now();
11499
11487
  currentStage = "proofread";
11500
11488
  markStageStart("proofread", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC9C4\uD589 \uC911");
11501
11489
  logStage("info", "proofread", "start", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC2DC\uC791", { pages: rawPagePaths.length });
@@ -11531,16 +11519,16 @@ ${rawMd}
11531
11519
  markStageProgress("proofread", Math.round((i + 1) / rawPagePaths.length * 100), i + 1, rawPagePaths.length, `\uAD50\uC815 ${i + 1}/${rawPagePaths.length}`);
11532
11520
  logStage("debug", "proofread", "progress", "\uD398\uC774\uC9C0 \uAD50\uC815 \uC644\uB8CC", { page: i + 1, total: rawPagePaths.length });
11533
11521
  }
11534
- timingsMs.proofread = Date.now() - proofStart;
11522
+ timingsMs.proofread = elapsedMs(proofStart);
11535
11523
  markStageDone("proofread", "\uAD50\uC815 \uC644\uB8CC");
11536
11524
  logStage("info", "proofread", "done", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC644\uB8CC", { elapsedMs: timingsMs.proofread });
11537
- const mergeStart = Date.now();
11525
+ const mergeStart = performance.now();
11538
11526
  currentStage = "merge";
11539
11527
  markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
11540
11528
  logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: proofedPaths.length });
11541
11529
  const merged = await mergeMarkdownPages(proofedPaths);
11542
11530
  await writeFile(outputPath, merged, "utf-8");
11543
- timingsMs.merge = Date.now() - mergeStart;
11531
+ timingsMs.merge = elapsedMs(mergeStart);
11544
11532
  markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
11545
11533
  logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
11546
11534
  const report = {
@@ -11549,7 +11537,7 @@ ${rawMd}
11549
11537
  workspaceDir,
11550
11538
  selectedModel,
11551
11539
  probeImage,
11552
- probeResults,
11540
+ probeResults: await probeResultsPromise,
11553
11541
  pageCount: images.length,
11554
11542
  keyHealth: keyPool.snapshot(),
11555
11543
  timingsMs,
@@ -11690,9 +11678,56 @@ async function mapWithConcurrency(items, concurrency, mapper) {
11690
11678
  await Promise.all(Array.from({ length: workerCount }, () => worker()));
11691
11679
  return results;
11692
11680
  }
11693
- function chooseFastestModel(results) {
11694
- const ok = results.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs);
11695
- return ok[0]?.model ?? null;
11681
+ function startParallelProbeRuns(input) {
11682
+ let firstResolved = false;
11683
+ let doneCount = 0;
11684
+ let resolveFirst;
11685
+ let rejectFirst;
11686
+ const firstSuccess = new Promise((resolve4, reject) => {
11687
+ resolveFirst = resolve4;
11688
+ rejectFirst = reject;
11689
+ });
11690
+ let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
11691
+ const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
11692
+ const t0 = performance.now();
11693
+ try {
11694
+ await ocrImageViaNim({
11695
+ imagePath: input.probeImage,
11696
+ prompt: OCR_PROMPT2,
11697
+ model,
11698
+ maxTokens: input.modelMaxTokens[model] ?? 8192,
11699
+ baseUrl: input.baseUrl,
11700
+ keyPool: input.keyPool,
11701
+ timeoutMs: input.timeoutMs,
11702
+ maxRetries: 2,
11703
+ logger: input.logger,
11704
+ stage: "probe"
11705
+ });
11706
+ const result = { model, durationMs: elapsedMs(t0), success: true };
11707
+ input.onProbeResult?.({ index, model, result });
11708
+ if (!firstResolved) {
11709
+ firstResolved = true;
11710
+ resolveFirst?.({ selectedModel: model, firstDurationMs: result.durationMs });
11711
+ }
11712
+ return result;
11713
+ } catch (err) {
11714
+ const result = {
11715
+ model,
11716
+ durationMs: elapsedMs(t0),
11717
+ success: false,
11718
+ error: err instanceof Error ? err.message : String(err)
11719
+ };
11720
+ lastErr = result.error ?? lastErr;
11721
+ input.onProbeResult?.({ index, model, result });
11722
+ return result;
11723
+ } finally {
11724
+ doneCount += 1;
11725
+ if (doneCount === input.models.length && !firstResolved) {
11726
+ rejectFirst?.(new UnifiedOcrError("PROBE_FAILED", "probe", `\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: ${lastErr}`));
11727
+ }
11728
+ }
11729
+ });
11730
+ return { firstSuccess, allResults };
11696
11731
  }
11697
11732
  async function loadModelCache(path) {
11698
11733
  try {