@clazic/kordoc 2.4.14 → 2.4.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -4,12 +4,12 @@ import {
4
4
  markdownToHwpx,
5
5
  markdownToXlsx,
6
6
  parse
7
- } from "./chunk-6CADPLGJ.js";
7
+ } from "./chunk-RH6IBTHH.js";
8
8
  import "./chunk-YW5G6BCJ.js";
9
9
  import {
10
10
  VERSION,
11
11
  toArrayBuffer
12
- } from "./chunk-X6NIA6BK.js";
12
+ } from "./chunk-QR27D67R.js";
13
13
  import "./chunk-MOL7MDBG.js";
14
14
  import "./chunk-7FMKAV4P.js";
15
15
  import "./chunk-34WIGIQC.js";
@@ -177,7 +177,7 @@ async function runParse(files, opts) {
177
177
  saveImages(absPath);
178
178
  }
179
179
  } catch (err) {
180
- const { sanitizeError } = await import("./utils-EK3CPEZG.js");
180
+ const { sanitizeError } = await import("./utils-HHJDSSR6.js");
181
181
  process.stderr.write(`
182
182
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
183
183
  `);
@@ -259,7 +259,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
259
259
  `));
260
260
  }
261
261
  } catch (err) {
262
- const { sanitizeError } = await import("./utils-EK3CPEZG.js");
262
+ const { sanitizeError } = await import("./utils-HHJDSSR6.js");
263
263
  process.stderr.write(` FAIL
264
264
  `);
265
265
  process.stderr.write(` \u2192 ${sanitizeError(err)}
@@ -291,7 +291,7 @@ program.command("init-env").description("kordoc\uC6A9 .env \uD15C\uD50C\uB9BF \u
291
291
  }
292
292
  });
293
293
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
294
- const { watchDirectory } = await import("./watch-NSWBVKQZ.js");
294
+ const { watchDirectory } = await import("./watch-YAILKKKP.js");
295
295
  await watchDirectory({
296
296
  dir,
297
297
  outDir: opts.outDir,
package/dist/index.cjs CHANGED
@@ -3138,7 +3138,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
3138
3138
  var import_xmldom = require("@xmldom/xmldom");
3139
3139
 
3140
3140
  // src/utils.ts
3141
- var VERSION = true ? "2.4.13" : "0.0.0-dev";
3141
+ var VERSION = true ? "2.4.15" : "0.0.0-dev";
3142
3142
  function toArrayBuffer(buf) {
3143
3143
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
3144
3144
  return buf.buffer;
@@ -11332,6 +11332,7 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
11332
11332
  var import_promises2 = require("fs/promises");
11333
11333
  var import_path5 = require("path");
11334
11334
  var import_child_process4 = require("child_process");
11335
+ var import_node_perf_hooks = require("perf_hooks");
11335
11336
  var import_libreoffice_convert = __toESM(require("libreoffice-convert"), 1);
11336
11337
  init_logger();
11337
11338
  var libreConvert = import_libreoffice_convert.default.convert;
@@ -11380,6 +11381,9 @@ var PROOFREAD_PROMPT = [
11380
11381
  "- \uC624\uD0C8\uC790, \uB744\uC5B4\uC4F0\uAE30, \uC904\uBC14\uAFC8, Markdown \uAD6C\uC870\uB9CC \uAD50\uC815",
11381
11382
  "- \uACB0\uACFC\uB294 Markdown \uBCF8\uBB38\uB9CC \uCD9C\uB825"
11382
11383
  ].join("\n");
11384
+ function elapsedMs(startAt) {
11385
+ return Math.round(import_node_perf_hooks.performance.now() - startAt);
11386
+ }
11383
11387
  async function runUnifiedOcrPipeline(inputPath, options = {}) {
11384
11388
  const absInput = (0, import_path5.resolve)(inputPath);
11385
11389
  const stem = (0, import_path5.basename)(absInput, (0, import_path5.extname)(absInput));
@@ -11419,7 +11423,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11419
11423
  try {
11420
11424
  ensureSupportedInput(absInput);
11421
11425
  let workingPdfPath = absInput;
11422
- const convertStart = Date.now();
11426
+ const convertStart = import_node_perf_hooks.performance.now();
11423
11427
  currentStage = "convert";
11424
11428
  markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
11425
11429
  logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
@@ -11430,21 +11434,40 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11430
11434
  const out = await convertWithLibreOffice(inputBuffer, ".pdf");
11431
11435
  await (0, import_promises2.writeFile)(workingPdfPath, out);
11432
11436
  }
11433
- timingsMs.convert = Date.now() - convertStart;
11437
+ timingsMs.convert = elapsedMs(convertStart);
11434
11438
  markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
11435
11439
  logStage("info", "convert", "done", "PDF \uBCC0\uD658 \uC644\uB8CC", { elapsedMs: timingsMs.convert });
11436
- const renderStart = Date.now();
11440
+ const renderStart = import_node_perf_hooks.performance.now();
11437
11441
  currentStage = "render";
11438
11442
  markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
11439
11443
  logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi });
11440
- await renderPdfToPng(workingPdfPath, (0, import_path5.join)(imagesDir, "page"), dpi);
11444
+ const renderWithProgress = await renderPdfToPngWithProgress(
11445
+ workingPdfPath,
11446
+ (0, import_path5.join)(imagesDir, "page"),
11447
+ dpi,
11448
+ (current, total) => {
11449
+ markStageProgress(
11450
+ "render",
11451
+ Math.round(current / total * 100),
11452
+ current,
11453
+ total,
11454
+ `\uD398\uC774\uC9C0 ${current}/${total} \uB80C\uB354\uB9C1`
11455
+ );
11456
+ }
11457
+ );
11441
11458
  const images = await listPageImages(imagesDir);
11442
11459
  if (images.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328: \uACB0\uACFC \uC774\uBBF8\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
11443
- markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
11444
- timingsMs.render = Date.now() - renderStart;
11460
+ if (!renderWithProgress.emittedPerPageProgress) {
11461
+ markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
11462
+ }
11463
+ timingsMs.render = elapsedMs(renderStart);
11445
11464
  markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
11446
- logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", { pages: images.length, elapsedMs: timingsMs.render });
11447
- const probeStart = Date.now();
11465
+ logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", {
11466
+ pages: images.length,
11467
+ elapsedMs: timingsMs.render,
11468
+ pageCountSource: renderWithProgress.pageCountSource
11469
+ });
11470
+ const probeStart = import_node_perf_hooks.performance.now();
11448
11471
  currentStage = "probe";
11449
11472
  markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
11450
11473
  logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
@@ -11468,14 +11491,14 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11468
11491
  const selected = await probeRuns.firstSuccess;
11469
11492
  const selectedModel = selected.selectedModel;
11470
11493
  const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
11471
- timingsMs.probe = Date.now() - probeStart;
11494
+ timingsMs.probe = elapsedMs(probeStart);
11472
11495
  markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
11473
11496
  logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
11474
11497
  const probeResultsPromise = probeRuns.allResults.then(async (results) => {
11475
11498
  await updateModelCache(modelCachePath, results);
11476
11499
  return results;
11477
11500
  });
11478
- const ocrStart = Date.now();
11501
+ const ocrStart = import_node_perf_hooks.performance.now();
11479
11502
  currentStage = "ocr";
11480
11503
  markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
11481
11504
  logStage("info", "ocr", "start", "\uD398\uC774\uC9C0 OCR \uC2DC\uC791", { selectedModel, pageCount: images.length });
@@ -11499,10 +11522,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
11499
11522
  markStageProgress("ocr", Math.round((i + 1) / images.length * 100), i + 1, images.length, `OCR ${i + 1}/${images.length}`);
11500
11523
  logStage("debug", "ocr", "progress", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { page: i + 1, total: images.length });
11501
11524
  }
11502
- timingsMs.ocr = Date.now() - ocrStart;
11525
+ timingsMs.ocr = elapsedMs(ocrStart);
11503
11526
  markStageDone("ocr", "OCR \uC644\uB8CC");
11504
11527
  logStage("info", "ocr", "done", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { elapsedMs: timingsMs.ocr });
11505
- const proofStart = Date.now();
11528
+ const proofStart = import_node_perf_hooks.performance.now();
11506
11529
  currentStage = "proofread";
11507
11530
  markStageStart("proofread", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC9C4\uD589 \uC911");
11508
11531
  logStage("info", "proofread", "start", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC2DC\uC791", { pages: rawPagePaths.length });
@@ -11538,16 +11561,16 @@ ${rawMd}
11538
11561
  markStageProgress("proofread", Math.round((i + 1) / rawPagePaths.length * 100), i + 1, rawPagePaths.length, `\uAD50\uC815 ${i + 1}/${rawPagePaths.length}`);
11539
11562
  logStage("debug", "proofread", "progress", "\uD398\uC774\uC9C0 \uAD50\uC815 \uC644\uB8CC", { page: i + 1, total: rawPagePaths.length });
11540
11563
  }
11541
- timingsMs.proofread = Date.now() - proofStart;
11564
+ timingsMs.proofread = elapsedMs(proofStart);
11542
11565
  markStageDone("proofread", "\uAD50\uC815 \uC644\uB8CC");
11543
11566
  logStage("info", "proofread", "done", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC644\uB8CC", { elapsedMs: timingsMs.proofread });
11544
- const mergeStart = Date.now();
11567
+ const mergeStart = import_node_perf_hooks.performance.now();
11545
11568
  currentStage = "merge";
11546
11569
  markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
11547
11570
  logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: proofedPaths.length });
11548
11571
  const merged = await mergeMarkdownPages(proofedPaths);
11549
11572
  await (0, import_promises2.writeFile)(outputPath, merged, "utf-8");
11550
- timingsMs.merge = Date.now() - mergeStart;
11573
+ timingsMs.merge = elapsedMs(mergeStart);
11551
11574
  markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
11552
11575
  logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
11553
11576
  const report = {
@@ -11641,6 +11664,49 @@ async function renderPdfToPng(pdfPath, prefixPath, dpi) {
11641
11664
  throw new UnifiedOcrError("RENDER_FAILED", "render", err instanceof Error ? err.message : String(err));
11642
11665
  }
11643
11666
  }
11667
+ async function getPdfPageCount(pdfPath) {
11668
+ const stdout = await runCommandWithStdout("pdfinfo", [pdfPath]);
11669
+ const m = stdout.match(/^\s*Pages:\s*(\d+)\s*$/mi);
11670
+ if (!m) {
11671
+ throw new Error("pdfinfo \uCD9C\uB825\uC5D0\uC11C \uD398\uC774\uC9C0 \uC218\uB97C \uCC3E\uC9C0 \uBABB\uD588\uC2B5\uB2C8\uB2E4.");
11672
+ }
11673
+ const n = Number(m[1]);
11674
+ if (!Number.isFinite(n) || n <= 0) {
11675
+ throw new Error(`\uC798\uBABB\uB41C \uD398\uC774\uC9C0 \uC218: ${m[1]}`);
11676
+ }
11677
+ return n;
11678
+ }
11679
+ async function renderPdfToPngWithProgress(pdfPath, prefixPath, dpi, onPageDone) {
11680
+ let totalPages = 0;
11681
+ try {
11682
+ totalPages = await getPdfPageCount(pdfPath);
11683
+ } catch {
11684
+ totalPages = 0;
11685
+ }
11686
+ if (totalPages > 0) {
11687
+ try {
11688
+ for (let page = 1; page <= totalPages; page++) {
11689
+ await runCommand("pdftoppm", [
11690
+ "-png",
11691
+ "-r",
11692
+ String(dpi),
11693
+ "-f",
11694
+ String(page),
11695
+ "-l",
11696
+ String(page),
11697
+ pdfPath,
11698
+ prefixPath
11699
+ ]);
11700
+ onPageDone(page, totalPages);
11701
+ }
11702
+ return { emittedPerPageProgress: true, pageCountSource: "pdfinfo" };
11703
+ } catch (err) {
11704
+ throw new UnifiedOcrError("RENDER_FAILED", "render", err instanceof Error ? err.message : String(err));
11705
+ }
11706
+ }
11707
+ await renderPdfToPng(pdfPath, prefixPath, dpi);
11708
+ return { emittedPerPageProgress: false, pageCountSource: "fallback" };
11709
+ }
11644
11710
  async function runCommand(cmd, args) {
11645
11711
  await new Promise((resolvePromise, reject) => {
11646
11712
  const child = (0, import_child_process4.spawn)(cmd, args, { stdio: "pipe" });
@@ -11655,6 +11721,24 @@ async function runCommand(cmd, args) {
11655
11721
  });
11656
11722
  });
11657
11723
  }
11724
+ async function runCommandWithStdout(cmd, args) {
11725
+ return await new Promise((resolvePromise, reject) => {
11726
+ const child = (0, import_child_process4.spawn)(cmd, args, { stdio: "pipe" });
11727
+ let stdout = "";
11728
+ let stderr = "";
11729
+ child.stdout.on("data", (d) => {
11730
+ stdout += String(d);
11731
+ });
11732
+ child.stderr.on("data", (d) => {
11733
+ stderr += String(d);
11734
+ });
11735
+ child.on("error", reject);
11736
+ child.on("close", (code) => {
11737
+ if (code === 0) resolvePromise(stdout);
11738
+ else reject(new Error(`${cmd} \uC2E4\uD328 (code=${code}): ${stderr.trim()}`));
11739
+ });
11740
+ });
11741
+ }
11658
11742
  async function assertSofficeAvailable() {
11659
11743
  try {
11660
11744
  await runCommand("soffice", ["--version"]);
@@ -11708,7 +11792,7 @@ function startParallelProbeRuns(input) {
11708
11792
  });
11709
11793
  let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
11710
11794
  const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
11711
- const t0 = Date.now();
11795
+ const t0 = import_node_perf_hooks.performance.now();
11712
11796
  try {
11713
11797
  await ocrImageViaNim({
11714
11798
  imagePath: input.probeImage,
@@ -11722,7 +11806,7 @@ function startParallelProbeRuns(input) {
11722
11806
  logger: input.logger,
11723
11807
  stage: "probe"
11724
11808
  });
11725
- const result = { model, durationMs: Date.now() - t0, success: true };
11809
+ const result = { model, durationMs: elapsedMs(t0), success: true };
11726
11810
  input.onProbeResult?.({ index, model, result });
11727
11811
  if (!firstResolved) {
11728
11812
  firstResolved = true;
@@ -11732,7 +11816,7 @@ function startParallelProbeRuns(input) {
11732
11816
  } catch (err) {
11733
11817
  const result = {
11734
11818
  model,
11735
- durationMs: Date.now() - t0,
11819
+ durationMs: elapsedMs(t0),
11736
11820
  success: false,
11737
11821
  error: err instanceof Error ? err.message : String(err)
11738
11822
  };