@clazic/kordoc 2.4.14 → 2.4.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3115,7 +3115,7 @@ import JSZip2 from "jszip";
3115
3115
  import { DOMParser } from "@xmldom/xmldom";
3116
3116
 
3117
3117
  // src/utils.ts
3118
- var VERSION = true ? "2.4.13" : "0.0.0-dev";
3118
+ var VERSION = true ? "2.4.15" : "0.0.0-dev";
3119
3119
  function toArrayBuffer(buf) {
3120
3120
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3121
3121
  return buf.buffer;
@@ -11309,6 +11309,7 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
11309
11309
  import { mkdir, readdir, readFile, stat, writeFile } from "fs/promises";
11310
11310
  import { basename as basename2, dirname as dirname3, extname, join as join4, resolve as resolve3 } from "path";
11311
11311
  import { spawn as spawn2 } from "child_process";
11312
+ import { performance } from "perf_hooks";
11312
11313
  import libre from "libreoffice-convert";
11313
11314
  init_logger();
11314
11315
  var libreConvert = libre.convert;
@@ -11357,6 +11358,9 @@ var PROOFREAD_PROMPT = [
11357
11358
  "- \uC624\uD0C8\uC790, \uB744\uC5B4\uC4F0\uAE30, \uC904\uBC14\uAFC8, Markdown \uAD6C\uC870\uB9CC \uAD50\uC815",
11358
11359
  "- \uACB0\uACFC\uB294 Markdown \uBCF8\uBB38\uB9CC \uCD9C\uB825"
11359
11360
  ].join("\n");
11361
+ function elapsedMs(startAt) {
11362
+ return Math.round(performance.now() - startAt);
11363
+ }
11360
11364
  async function runUnifiedOcrPipeline(inputPath, options = {}) {
11361
11365
  const absInput = resolve3(inputPath);
11362
11366
  const stem = basename2(absInput, extname(absInput));
@@ -11396,7 +11400,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11396
11400
  try {
11397
11401
  ensureSupportedInput(absInput);
11398
11402
  let workingPdfPath = absInput;
11399
- const convertStart = Date.now();
11403
+ const convertStart = performance.now();
11400
11404
  currentStage = "convert";
11401
11405
  markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
11402
11406
  logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
@@ -11407,21 +11411,40 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11407
11411
  const out = await convertWithLibreOffice(inputBuffer, ".pdf");
11408
11412
  await writeFile(workingPdfPath, out);
11409
11413
  }
11410
- timingsMs.convert = Date.now() - convertStart;
11414
+ timingsMs.convert = elapsedMs(convertStart);
11411
11415
  markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
11412
11416
  logStage("info", "convert", "done", "PDF \uBCC0\uD658 \uC644\uB8CC", { elapsedMs: timingsMs.convert });
11413
- const renderStart = Date.now();
11417
+ const renderStart = performance.now();
11414
11418
  currentStage = "render";
11415
11419
  markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
11416
11420
  logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi });
11417
- await renderPdfToPng(workingPdfPath, join4(imagesDir, "page"), dpi);
11421
+ const renderWithProgress = await renderPdfToPngWithProgress(
11422
+ workingPdfPath,
11423
+ join4(imagesDir, "page"),
11424
+ dpi,
11425
+ (current, total) => {
11426
+ markStageProgress(
11427
+ "render",
11428
+ Math.round(current / total * 100),
11429
+ current,
11430
+ total,
11431
+ `\uD398\uC774\uC9C0 ${current}/${total} \uB80C\uB354\uB9C1`
11432
+ );
11433
+ }
11434
+ );
11418
11435
  const images = await listPageImages(imagesDir);
11419
11436
  if (images.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328: \uACB0\uACFC \uC774\uBBF8\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
11420
- markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
11421
- timingsMs.render = Date.now() - renderStart;
11437
+ if (!renderWithProgress.emittedPerPageProgress) {
11438
+ markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
11439
+ }
11440
+ timingsMs.render = elapsedMs(renderStart);
11422
11441
  markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
11423
- logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", { pages: images.length, elapsedMs: timingsMs.render });
11424
- const probeStart = Date.now();
11442
+ logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", {
11443
+ pages: images.length,
11444
+ elapsedMs: timingsMs.render,
11445
+ pageCountSource: renderWithProgress.pageCountSource
11446
+ });
11447
+ const probeStart = performance.now();
11425
11448
  currentStage = "probe";
11426
11449
  markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
11427
11450
  logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
@@ -11445,14 +11468,14 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11445
11468
  const selected = await probeRuns.firstSuccess;
11446
11469
  const selectedModel = selected.selectedModel;
11447
11470
  const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
11448
- timingsMs.probe = Date.now() - probeStart;
11471
+ timingsMs.probe = elapsedMs(probeStart);
11449
11472
  markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
11450
11473
  logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
11451
11474
  const probeResultsPromise = probeRuns.allResults.then(async (results) => {
11452
11475
  await updateModelCache(modelCachePath, results);
11453
11476
  return results;
11454
11477
  });
11455
- const ocrStart = Date.now();
11478
+ const ocrStart = performance.now();
11456
11479
  currentStage = "ocr";
11457
11480
  markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
11458
11481
  logStage("info", "ocr", "start", "\uD398\uC774\uC9C0 OCR \uC2DC\uC791", { selectedModel, pageCount: images.length });
@@ -11476,10 +11499,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11476
11499
  markStageProgress("ocr", Math.round((i + 1) / images.length * 100), i + 1, images.length, `OCR ${i + 1}/${images.length}`);
11477
11500
  logStage("debug", "ocr", "progress", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { page: i + 1, total: images.length });
11478
11501
  }
11479
- timingsMs.ocr = Date.now() - ocrStart;
11502
+ timingsMs.ocr = elapsedMs(ocrStart);
11480
11503
  markStageDone("ocr", "OCR \uC644\uB8CC");
11481
11504
  logStage("info", "ocr", "done", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { elapsedMs: timingsMs.ocr });
11482
- const proofStart = Date.now();
11505
+ const proofStart = performance.now();
11483
11506
  currentStage = "proofread";
11484
11507
  markStageStart("proofread", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC9C4\uD589 \uC911");
11485
11508
  logStage("info", "proofread", "start", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC2DC\uC791", { pages: rawPagePaths.length });
@@ -11515,16 +11538,16 @@ ${rawMd}
11515
11538
  markStageProgress("proofread", Math.round((i + 1) / rawPagePaths.length * 100), i + 1, rawPagePaths.length, `\uAD50\uC815 ${i + 1}/${rawPagePaths.length}`);
11516
11539
  logStage("debug", "proofread", "progress", "\uD398\uC774\uC9C0 \uAD50\uC815 \uC644\uB8CC", { page: i + 1, total: rawPagePaths.length });
11517
11540
  }
11518
- timingsMs.proofread = Date.now() - proofStart;
11541
+ timingsMs.proofread = elapsedMs(proofStart);
11519
11542
  markStageDone("proofread", "\uAD50\uC815 \uC644\uB8CC");
11520
11543
  logStage("info", "proofread", "done", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC644\uB8CC", { elapsedMs: timingsMs.proofread });
11521
- const mergeStart = Date.now();
11544
+ const mergeStart = performance.now();
11522
11545
  currentStage = "merge";
11523
11546
  markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
11524
11547
  logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: proofedPaths.length });
11525
11548
  const merged = await mergeMarkdownPages(proofedPaths);
11526
11549
  await writeFile(outputPath, merged, "utf-8");
11527
- timingsMs.merge = Date.now() - mergeStart;
11550
+ timingsMs.merge = elapsedMs(mergeStart);
11528
11551
  markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
11529
11552
  logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
11530
11553
  const report = {
@@ -11618,6 +11641,49 @@ async function renderPdfToPng(pdfPath, prefixPath, dpi) {
11618
11641
  throw new UnifiedOcrError("RENDER_FAILED", "render", err instanceof Error ? err.message : String(err));
11619
11642
  }
11620
11643
  }
11644
+ async function getPdfPageCount(pdfPath) {
11645
+ const stdout = await runCommandWithStdout("pdfinfo", [pdfPath]);
11646
+ const m = stdout.match(/^\s*Pages:\s*(\d+)\s*$/mi);
11647
+ if (!m) {
11648
+ throw new Error("pdfinfo \uCD9C\uB825\uC5D0\uC11C \uD398\uC774\uC9C0 \uC218\uB97C \uCC3E\uC9C0 \uBABB\uD588\uC2B5\uB2C8\uB2E4.");
11649
+ }
11650
+ const n = Number(m[1]);
11651
+ if (!Number.isFinite(n) || n <= 0) {
11652
+ throw new Error(`\uC798\uBABB\uB41C \uD398\uC774\uC9C0 \uC218: ${m[1]}`);
11653
+ }
11654
+ return n;
11655
+ }
11656
+ async function renderPdfToPngWithProgress(pdfPath, prefixPath, dpi, onPageDone) {
11657
+ let totalPages = 0;
11658
+ try {
11659
+ totalPages = await getPdfPageCount(pdfPath);
11660
+ } catch {
11661
+ totalPages = 0;
11662
+ }
11663
+ if (totalPages > 0) {
11664
+ try {
11665
+ for (let page = 1; page <= totalPages; page++) {
11666
+ await runCommand("pdftoppm", [
11667
+ "-png",
11668
+ "-r",
11669
+ String(dpi),
11670
+ "-f",
11671
+ String(page),
11672
+ "-l",
11673
+ String(page),
11674
+ pdfPath,
11675
+ prefixPath
11676
+ ]);
11677
+ onPageDone(page, totalPages);
11678
+ }
11679
+ return { emittedPerPageProgress: true, pageCountSource: "pdfinfo" };
11680
+ } catch (err) {
11681
+ throw new UnifiedOcrError("RENDER_FAILED", "render", err instanceof Error ? err.message : String(err));
11682
+ }
11683
+ }
11684
+ await renderPdfToPng(pdfPath, prefixPath, dpi);
11685
+ return { emittedPerPageProgress: false, pageCountSource: "fallback" };
11686
+ }
11621
11687
  async function runCommand(cmd, args) {
11622
11688
  await new Promise((resolvePromise, reject) => {
11623
11689
  const child = spawn2(cmd, args, { stdio: "pipe" });
@@ -11632,6 +11698,24 @@ async function runCommand(cmd, args) {
11632
11698
  });
11633
11699
  });
11634
11700
  }
11701
+ async function runCommandWithStdout(cmd, args) {
11702
+ return await new Promise((resolvePromise, reject) => {
11703
+ const child = spawn2(cmd, args, { stdio: "pipe" });
11704
+ let stdout = "";
11705
+ let stderr = "";
11706
+ child.stdout.on("data", (d) => {
11707
+ stdout += String(d);
11708
+ });
11709
+ child.stderr.on("data", (d) => {
11710
+ stderr += String(d);
11711
+ });
11712
+ child.on("error", reject);
11713
+ child.on("close", (code) => {
11714
+ if (code === 0) resolvePromise(stdout);
11715
+ else reject(new Error(`${cmd} \uC2E4\uD328 (code=${code}): ${stderr.trim()}`));
11716
+ });
11717
+ });
11718
+ }
11635
11719
  async function assertSofficeAvailable() {
11636
11720
  try {
11637
11721
  await runCommand("soffice", ["--version"]);
@@ -11685,7 +11769,7 @@ function startParallelProbeRuns(input) {
11685
11769
  });
11686
11770
  let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
11687
11771
  const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
11688
- const t0 = Date.now();
11772
+ const t0 = performance.now();
11689
11773
  try {
11690
11774
  await ocrImageViaNim({
11691
11775
  imagePath: input.probeImage,
@@ -11699,7 +11783,7 @@ function startParallelProbeRuns(input) {
11699
11783
  logger: input.logger,
11700
11784
  stage: "probe"
11701
11785
  });
11702
- const result = { model, durationMs: Date.now() - t0, success: true };
11786
+ const result = { model, durationMs: elapsedMs(t0), success: true };
11703
11787
  input.onProbeResult?.({ index, model, result });
11704
11788
  if (!firstResolved) {
11705
11789
  firstResolved = true;
@@ -11709,7 +11793,7 @@ function startParallelProbeRuns(input) {
11709
11793
  } catch (err) {
11710
11794
  const result = {
11711
11795
  model,
11712
- durationMs: Date.now() - t0,
11796
+ durationMs: elapsedMs(t0),
11713
11797
  success: false,
11714
11798
  error: err instanceof Error ? err.message : String(err)
11715
11799
  };