@clazic/kordoc 2.4.13 → 2.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -4,12 +4,12 @@ import {
4
4
  markdownToHwpx,
5
5
  markdownToXlsx,
6
6
  parse
7
- } from "./chunk-UX75CBUO.js";
7
+ } from "./chunk-ZER7GYXK.js";
8
8
  import "./chunk-YW5G6BCJ.js";
9
9
  import {
10
10
  VERSION,
11
11
  toArrayBuffer
12
- } from "./chunk-5R37N6KE.js";
12
+ } from "./chunk-YHPNDX7A.js";
13
13
  import "./chunk-MOL7MDBG.js";
14
14
  import "./chunk-7FMKAV4P.js";
15
15
  import "./chunk-34WIGIQC.js";
@@ -177,7 +177,7 @@ async function runParse(files, opts) {
177
177
  saveImages(absPath);
178
178
  }
179
179
  } catch (err) {
180
- const { sanitizeError } = await import("./utils-XLLXVB7V.js");
180
+ const { sanitizeError } = await import("./utils-ZQA6RCXN.js");
181
181
  process.stderr.write(`
182
182
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
183
183
  `);
@@ -259,7 +259,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
259
259
  `));
260
260
  }
261
261
  } catch (err) {
262
- const { sanitizeError } = await import("./utils-XLLXVB7V.js");
262
+ const { sanitizeError } = await import("./utils-ZQA6RCXN.js");
263
263
  process.stderr.write(` FAIL
264
264
  `);
265
265
  process.stderr.write(` \u2192 ${sanitizeError(err)}
@@ -291,7 +291,7 @@ program.command("init-env").description("kordoc\uC6A9 .env \uD15C\uD50C\uB9BF \u
291
291
  }
292
292
  });
293
293
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
294
- const { watchDirectory } = await import("./watch-3MTAXFEA.js");
294
+ const { watchDirectory } = await import("./watch-ULSOWHFE.js");
295
295
  await watchDirectory({
296
296
  dir,
297
297
  outDir: opts.outDir,
package/dist/index.cjs CHANGED
@@ -3138,7 +3138,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
3138
3138
  var import_xmldom = require("@xmldom/xmldom");
3139
3139
 
3140
3140
  // src/utils.ts
3141
- var VERSION = true ? "2.4.12" : "0.0.0-dev";
3141
+ var VERSION = true ? "2.4.14" : "0.0.0-dev";
3142
3142
  function toArrayBuffer(buf) {
3143
3143
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3144
3144
  return buf.buffer;
@@ -11332,6 +11332,7 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
11332
11332
  var import_promises2 = require("fs/promises");
11333
11333
  var import_path5 = require("path");
11334
11334
  var import_child_process4 = require("child_process");
11335
+ var import_node_perf_hooks = require("perf_hooks");
11335
11336
  var import_libreoffice_convert = __toESM(require("libreoffice-convert"), 1);
11336
11337
  init_logger();
11337
11338
  var libreConvert = import_libreoffice_convert.default.convert;
@@ -11380,6 +11381,9 @@ var PROOFREAD_PROMPT = [
11380
11381
  "- \uC624\uD0C8\uC790, \uB744\uC5B4\uC4F0\uAE30, \uC904\uBC14\uAFC8, Markdown \uAD6C\uC870\uB9CC \uAD50\uC815",
11381
11382
  "- \uACB0\uACFC\uB294 Markdown \uBCF8\uBB38\uB9CC \uCD9C\uB825"
11382
11383
  ].join("\n");
11384
+ function elapsedMs(startAt) {
11385
+ return Math.round(import_node_perf_hooks.performance.now() - startAt);
11386
+ }
11383
11387
  async function runUnifiedOcrPipeline(inputPath, options = {}) {
11384
11388
  const absInput = (0, import_path5.resolve)(inputPath);
11385
11389
  const stem = (0, import_path5.basename)(absInput, (0, import_path5.extname)(absInput));
@@ -11419,7 +11423,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11419
11423
  try {
11420
11424
  ensureSupportedInput(absInput);
11421
11425
  let workingPdfPath = absInput;
11422
- const convertStart = Date.now();
11426
+ const convertStart = import_node_perf_hooks.performance.now();
11423
11427
  currentStage = "convert";
11424
11428
  markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
11425
11429
  logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
@@ -11430,10 +11434,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11430
11434
  const out = await convertWithLibreOffice(inputBuffer, ".pdf");
11431
11435
  await (0, import_promises2.writeFile)(workingPdfPath, out);
11432
11436
  }
11433
- timingsMs.convert = Date.now() - convertStart;
11437
+ timingsMs.convert = elapsedMs(convertStart);
11434
11438
  markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
11435
11439
  logStage("info", "convert", "done", "PDF \uBCC0\uD658 \uC644\uB8CC", { elapsedMs: timingsMs.convert });
11436
- const renderStart = Date.now();
11440
+ const renderStart = import_node_perf_hooks.performance.now();
11437
11441
  currentStage = "render";
11438
11442
  markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
11439
11443
  logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi });
@@ -11441,57 +11445,41 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11441
11445
  const images = await listPageImages(imagesDir);
11442
11446
  if (images.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328: \uACB0\uACFC \uC774\uBBF8\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
11443
11447
  markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
11444
- timingsMs.render = Date.now() - renderStart;
11448
+ timingsMs.render = elapsedMs(renderStart);
11445
11449
  markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
11446
11450
  logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", { pages: images.length, elapsedMs: timingsMs.render });
11447
- const probeStart = Date.now();
11451
+ const probeStart = import_node_perf_hooks.performance.now();
11448
11452
  currentStage = "probe";
11449
11453
  markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
11450
11454
  logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
11451
11455
  const probeImage = await pickRepresentativeImage(images);
11452
11456
  let probeDone = 0;
11453
- const probeResultsByIndex = await mapWithConcurrency(models, probeConcurrency, async (model, index) => {
11454
- const t0 = Date.now();
11455
- try {
11456
- await ocrImageViaNim({
11457
- imagePath: probeImage,
11458
- prompt: OCR_PROMPT2,
11459
- model,
11460
- maxTokens: modelMaxTokens[model] ?? 8192,
11461
- baseUrl,
11462
- keyPool,
11463
- timeoutMs,
11464
- maxRetries: 2,
11465
- logger,
11466
- stage: "probe"
11467
- });
11468
- const result = { model, durationMs: Date.now() - t0, success: true };
11469
- probeDone += 1;
11470
- markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11471
- logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11472
- return result;
11473
- } catch (err) {
11474
- const result = {
11475
- model,
11476
- durationMs: Date.now() - t0,
11477
- success: false,
11478
- error: err instanceof Error ? err.message : String(err)
11479
- };
11457
+ const probeRuns = startParallelProbeRuns({
11458
+ models,
11459
+ probeConcurrency,
11460
+ probeImage,
11461
+ modelMaxTokens,
11462
+ baseUrl,
11463
+ keyPool,
11464
+ timeoutMs,
11465
+ logger,
11466
+ onProbeResult: ({ index, model, result }) => {
11480
11467
  probeDone += 1;
11481
11468
  markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
11482
11469
  logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
11483
- return result;
11484
11470
  }
11485
11471
  });
11486
- const probeResults = probeResultsByIndex;
11487
- const selectedModel = chooseFastestModel(probeResults);
11488
- if (!selectedModel) throw new UnifiedOcrError("PROBE_FAILED", "probe", "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.");
11489
- const fallbackModelOrder = probeResults.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs).map((r) => r.model);
11490
- timingsMs.probe = Date.now() - probeStart;
11491
- await updateModelCache(modelCachePath, probeResults);
11472
+ const selected = await probeRuns.firstSuccess;
11473
+ const selectedModel = selected.selectedModel;
11474
+ const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
11475
+ timingsMs.probe = elapsedMs(probeStart);
11492
11476
  markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
11493
- logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC", { selectedModel, probeResults, elapsedMs: timingsMs.probe, modelCachePath });
11494
- const ocrStart = Date.now();
11477
+ logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
11478
+ const probeResultsPromise = probeRuns.allResults.then(async (results) => {
11479
+ await updateModelCache(modelCachePath, results);
11480
+ return results;
11481
+ });
11482
+ const ocrStart = import_node_perf_hooks.performance.now();
11495
11483
  currentStage = "ocr";
11496
11484
  markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
11497
11485
  logStage("info", "ocr", "start", "\uD398\uC774\uC9C0 OCR \uC2DC\uC791", { selectedModel, pageCount: images.length });
@@ -11515,10 +11503,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11515
11503
  markStageProgress("ocr", Math.round((i + 1) / images.length * 100), i + 1, images.length, `OCR ${i + 1}/${images.length}`);
11516
11504
  logStage("debug", "ocr", "progress", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { page: i + 1, total: images.length });
11517
11505
  }
11518
- timingsMs.ocr = Date.now() - ocrStart;
11506
+ timingsMs.ocr = elapsedMs(ocrStart);
11519
11507
  markStageDone("ocr", "OCR \uC644\uB8CC");
11520
11508
  logStage("info", "ocr", "done", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { elapsedMs: timingsMs.ocr });
11521
- const proofStart = Date.now();
11509
+ const proofStart = import_node_perf_hooks.performance.now();
11522
11510
  currentStage = "proofread";
11523
11511
  markStageStart("proofread", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC9C4\uD589 \uC911");
11524
11512
  logStage("info", "proofread", "start", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC2DC\uC791", { pages: rawPagePaths.length });
@@ -11554,16 +11542,16 @@ ${rawMd}
11554
11542
  markStageProgress("proofread", Math.round((i + 1) / rawPagePaths.length * 100), i + 1, rawPagePaths.length, `\uAD50\uC815 ${i + 1}/${rawPagePaths.length}`);
11555
11543
  logStage("debug", "proofread", "progress", "\uD398\uC774\uC9C0 \uAD50\uC815 \uC644\uB8CC", { page: i + 1, total: rawPagePaths.length });
11556
11544
  }
11557
- timingsMs.proofread = Date.now() - proofStart;
11545
+ timingsMs.proofread = elapsedMs(proofStart);
11558
11546
  markStageDone("proofread", "\uAD50\uC815 \uC644\uB8CC");
11559
11547
  logStage("info", "proofread", "done", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC644\uB8CC", { elapsedMs: timingsMs.proofread });
11560
- const mergeStart = Date.now();
11548
+ const mergeStart = import_node_perf_hooks.performance.now();
11561
11549
  currentStage = "merge";
11562
11550
  markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
11563
11551
  logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: proofedPaths.length });
11564
11552
  const merged = await mergeMarkdownPages(proofedPaths);
11565
11553
  await (0, import_promises2.writeFile)(outputPath, merged, "utf-8");
11566
- timingsMs.merge = Date.now() - mergeStart;
11554
+ timingsMs.merge = elapsedMs(mergeStart);
11567
11555
  markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
11568
11556
  logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
11569
11557
  const report = {
@@ -11572,7 +11560,7 @@ ${rawMd}
11572
11560
  workspaceDir,
11573
11561
  selectedModel,
11574
11562
  probeImage,
11575
- probeResults,
11563
+ probeResults: await probeResultsPromise,
11576
11564
  pageCount: images.length,
11577
11565
  keyHealth: keyPool.snapshot(),
11578
11566
  timingsMs,
@@ -11713,9 +11701,56 @@ async function mapWithConcurrency(items, concurrency, mapper) {
11713
11701
  await Promise.all(Array.from({ length: workerCount }, () => worker()));
11714
11702
  return results;
11715
11703
  }
11716
- function chooseFastestModel(results) {
11717
- const ok = results.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs);
11718
- return ok[0]?.model ?? null;
11704
+ function startParallelProbeRuns(input) {
11705
+ let firstResolved = false;
11706
+ let doneCount = 0;
11707
+ let resolveFirst;
11708
+ let rejectFirst;
11709
+ const firstSuccess = new Promise((resolve4, reject) => {
11710
+ resolveFirst = resolve4;
11711
+ rejectFirst = reject;
11712
+ });
11713
+ let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
11714
+ const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
11715
+ const t0 = import_node_perf_hooks.performance.now();
11716
+ try {
11717
+ await ocrImageViaNim({
11718
+ imagePath: input.probeImage,
11719
+ prompt: OCR_PROMPT2,
11720
+ model,
11721
+ maxTokens: input.modelMaxTokens[model] ?? 8192,
11722
+ baseUrl: input.baseUrl,
11723
+ keyPool: input.keyPool,
11724
+ timeoutMs: input.timeoutMs,
11725
+ maxRetries: 2,
11726
+ logger: input.logger,
11727
+ stage: "probe"
11728
+ });
11729
+ const result = { model, durationMs: elapsedMs(t0), success: true };
11730
+ input.onProbeResult?.({ index, model, result });
11731
+ if (!firstResolved) {
11732
+ firstResolved = true;
11733
+ resolveFirst?.({ selectedModel: model, firstDurationMs: result.durationMs });
11734
+ }
11735
+ return result;
11736
+ } catch (err) {
11737
+ const result = {
11738
+ model,
11739
+ durationMs: elapsedMs(t0),
11740
+ success: false,
11741
+ error: err instanceof Error ? err.message : String(err)
11742
+ };
11743
+ lastErr = result.error ?? lastErr;
11744
+ input.onProbeResult?.({ index, model, result });
11745
+ return result;
11746
+ } finally {
11747
+ doneCount += 1;
11748
+ if (doneCount === input.models.length && !firstResolved) {
11749
+ rejectFirst?.(new UnifiedOcrError("PROBE_FAILED", "probe", `\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: ${lastErr}`));
11750
+ }
11751
+ }
11752
+ });
11753
+ return { firstSuccess, allResults };
11719
11754
  }
11720
11755
  async function loadModelCache(path) {
11721
11756
  try {