@clazic/kordoc 2.4.15 → 2.4.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3115,7 +3115,7 @@ import JSZip2 from "jszip";
3115
3115
  import { DOMParser } from "@xmldom/xmldom";
3116
3116
 
3117
3117
  // src/utils.ts
3118
- var VERSION = true ? "2.4.14" : "0.0.0-dev";
3118
+ var VERSION = true ? "2.4.15" : "0.0.0-dev";
3119
3119
  function toArrayBuffer(buf) {
3120
3120
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3121
3121
  return buf.buffer;
@@ -11418,13 +11418,32 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11418
11418
  currentStage = "render";
11419
11419
  markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
11420
11420
  logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi });
11421
- await renderPdfToPng(workingPdfPath, join4(imagesDir, "page"), dpi);
11421
+ const renderWithProgress = await renderPdfToPngWithProgress(
11422
+ workingPdfPath,
11423
+ join4(imagesDir, "page"),
11424
+ dpi,
11425
+ (current, total) => {
11426
+ markStageProgress(
11427
+ "render",
11428
+ Math.round(current / total * 100),
11429
+ current,
11430
+ total,
11431
+ `\uD398\uC774\uC9C0 ${current}/${total} \uB80C\uB354\uB9C1`
11432
+ );
11433
+ }
11434
+ );
11422
11435
  const images = await listPageImages(imagesDir);
11423
11436
  if (images.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328: \uACB0\uACFC \uC774\uBBF8\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
11424
- markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
11437
+ if (!renderWithProgress.emittedPerPageProgress) {
11438
+ markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
11439
+ }
11425
11440
  timingsMs.render = elapsedMs(renderStart);
11426
11441
  markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
11427
- logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", { pages: images.length, elapsedMs: timingsMs.render });
11442
+ logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", {
11443
+ pages: images.length,
11444
+ elapsedMs: timingsMs.render,
11445
+ pageCountSource: renderWithProgress.pageCountSource
11446
+ });
11428
11447
  const probeStart = performance.now();
11429
11448
  currentStage = "probe";
11430
11449
  markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
@@ -11622,6 +11641,49 @@ async function renderPdfToPng(pdfPath, prefixPath, dpi) {
11622
11641
  throw new UnifiedOcrError("RENDER_FAILED", "render", err instanceof Error ? err.message : String(err));
11623
11642
  }
11624
11643
  }
11644
+ async function getPdfPageCount(pdfPath) {
11645
+ const stdout = await runCommandWithStdout("pdfinfo", [pdfPath]);
11646
+ const m = stdout.match(/^\s*Pages:\s*(\d+)\s*$/mi);
11647
+ if (!m) {
11648
+ throw new Error("pdfinfo \uCD9C\uB825\uC5D0\uC11C \uD398\uC774\uC9C0 \uC218\uB97C \uCC3E\uC9C0 \uBABB\uD588\uC2B5\uB2C8\uB2E4.");
11649
+ }
11650
+ const n = Number(m[1]);
11651
+ if (!Number.isFinite(n) || n <= 0) {
11652
+ throw new Error(`\uC798\uBABB\uB41C \uD398\uC774\uC9C0 \uC218: ${m[1]}`);
11653
+ }
11654
+ return n;
11655
+ }
11656
+ async function renderPdfToPngWithProgress(pdfPath, prefixPath, dpi, onPageDone) {
11657
+ let totalPages = 0;
11658
+ try {
11659
+ totalPages = await getPdfPageCount(pdfPath);
11660
+ } catch {
11661
+ totalPages = 0;
11662
+ }
11663
+ if (totalPages > 0) {
11664
+ try {
11665
+ for (let page = 1; page <= totalPages; page++) {
11666
+ await runCommand("pdftoppm", [
11667
+ "-png",
11668
+ "-r",
11669
+ String(dpi),
11670
+ "-f",
11671
+ String(page),
11672
+ "-l",
11673
+ String(page),
11674
+ pdfPath,
11675
+ prefixPath
11676
+ ]);
11677
+ onPageDone(page, totalPages);
11678
+ }
11679
+ return { emittedPerPageProgress: true, pageCountSource: "pdfinfo" };
11680
+ } catch (err) {
11681
+ throw new UnifiedOcrError("RENDER_FAILED", "render", err instanceof Error ? err.message : String(err));
11682
+ }
11683
+ }
11684
+ await renderPdfToPng(pdfPath, prefixPath, dpi);
11685
+ return { emittedPerPageProgress: false, pageCountSource: "fallback" };
11686
+ }
11625
11687
  async function runCommand(cmd, args) {
11626
11688
  await new Promise((resolvePromise, reject) => {
11627
11689
  const child = spawn2(cmd, args, { stdio: "pipe" });
@@ -11636,6 +11698,24 @@ async function runCommand(cmd, args) {
11636
11698
  });
11637
11699
  });
11638
11700
  }
11701
+ async function runCommandWithStdout(cmd, args) {
11702
+ return await new Promise((resolvePromise, reject) => {
11703
+ const child = spawn2(cmd, args, { stdio: "pipe" });
11704
+ let stdout = "";
11705
+ let stderr = "";
11706
+ child.stdout.on("data", (d) => {
11707
+ stdout += String(d);
11708
+ });
11709
+ child.stderr.on("data", (d) => {
11710
+ stderr += String(d);
11711
+ });
11712
+ child.on("error", reject);
11713
+ child.on("close", (code) => {
11714
+ if (code === 0) resolvePromise(stdout);
11715
+ else reject(new Error(`${cmd} \uC2E4\uD328 (code=${code}): ${stderr.trim()}`));
11716
+ });
11717
+ });
11718
+ }
11639
11719
  async function assertSofficeAvailable() {
11640
11720
  try {
11641
11721
  await runCommand("soffice", ["--version"]);