@clazic/kordoc 2.4.14 → 2.4.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-X6NIA6BK.js → chunk-QR27D67R.js} +2 -2
- package/dist/{chunk-6CADPLGJ.js → chunk-RH6IBTHH.js} +3 -2
- package/dist/{chunk-6CADPLGJ.js.map → chunk-RH6IBTHH.js.map} +1 -1
- package/dist/cli.js +5 -5
- package/dist/index.cjs +103 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +103 -19
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-EK3CPEZG.js → utils-HHJDSSR6.js} +2 -2
- package/dist/{watch-NSWBVKQZ.js → watch-YAILKKKP.js} +3 -3
- package/package.json +1 -1
- /package/dist/{chunk-X6NIA6BK.js.map → chunk-QR27D67R.js.map} +0 -0
- /package/dist/{utils-EK3CPEZG.js.map → utils-HHJDSSR6.js.map} +0 -0
- /package/dist/{watch-NSWBVKQZ.js.map → watch-YAILKKKP.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -3115,7 +3115,7 @@ import JSZip2 from "jszip";
|
|
|
3115
3115
|
import { DOMParser } from "@xmldom/xmldom";
|
|
3116
3116
|
|
|
3117
3117
|
// src/utils.ts
|
|
3118
|
-
var VERSION = true ? "2.4.
|
|
3118
|
+
var VERSION = true ? "2.4.15" : "0.0.0-dev";
|
|
3119
3119
|
function toArrayBuffer(buf) {
|
|
3120
3120
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3121
3121
|
return buf.buffer;
|
|
@@ -11309,6 +11309,7 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
|
|
|
11309
11309
|
import { mkdir, readdir, readFile, stat, writeFile } from "fs/promises";
|
|
11310
11310
|
import { basename as basename2, dirname as dirname3, extname, join as join4, resolve as resolve3 } from "path";
|
|
11311
11311
|
import { spawn as spawn2 } from "child_process";
|
|
11312
|
+
import { performance } from "perf_hooks";
|
|
11312
11313
|
import libre from "libreoffice-convert";
|
|
11313
11314
|
init_logger();
|
|
11314
11315
|
var libreConvert = libre.convert;
|
|
@@ -11357,6 +11358,9 @@ var PROOFREAD_PROMPT = [
|
|
|
11357
11358
|
"- \uC624\uD0C8\uC790, \uB744\uC5B4\uC4F0\uAE30, \uC904\uBC14\uAFC8, Markdown \uAD6C\uC870\uB9CC \uAD50\uC815",
|
|
11358
11359
|
"- \uACB0\uACFC\uB294 Markdown \uBCF8\uBB38\uB9CC \uCD9C\uB825"
|
|
11359
11360
|
].join("\n");
|
|
11361
|
+
function elapsedMs(startAt) {
|
|
11362
|
+
return Math.round(performance.now() - startAt);
|
|
11363
|
+
}
|
|
11360
11364
|
async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
11361
11365
|
const absInput = resolve3(inputPath);
|
|
11362
11366
|
const stem = basename2(absInput, extname(absInput));
|
|
@@ -11396,7 +11400,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11396
11400
|
try {
|
|
11397
11401
|
ensureSupportedInput(absInput);
|
|
11398
11402
|
let workingPdfPath = absInput;
|
|
11399
|
-
const convertStart =
|
|
11403
|
+
const convertStart = performance.now();
|
|
11400
11404
|
currentStage = "convert";
|
|
11401
11405
|
markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
|
|
11402
11406
|
logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
|
|
@@ -11407,21 +11411,40 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11407
11411
|
const out = await convertWithLibreOffice(inputBuffer, ".pdf");
|
|
11408
11412
|
await writeFile(workingPdfPath, out);
|
|
11409
11413
|
}
|
|
11410
|
-
timingsMs.convert =
|
|
11414
|
+
timingsMs.convert = elapsedMs(convertStart);
|
|
11411
11415
|
markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
|
|
11412
11416
|
logStage("info", "convert", "done", "PDF \uBCC0\uD658 \uC644\uB8CC", { elapsedMs: timingsMs.convert });
|
|
11413
|
-
const renderStart =
|
|
11417
|
+
const renderStart = performance.now();
|
|
11414
11418
|
currentStage = "render";
|
|
11415
11419
|
markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
|
|
11416
11420
|
logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi });
|
|
11417
|
-
|
|
11421
|
+
const renderWithProgress = await renderPdfToPngWithProgress(
|
|
11422
|
+
workingPdfPath,
|
|
11423
|
+
join4(imagesDir, "page"),
|
|
11424
|
+
dpi,
|
|
11425
|
+
(current, total) => {
|
|
11426
|
+
markStageProgress(
|
|
11427
|
+
"render",
|
|
11428
|
+
Math.round(current / total * 100),
|
|
11429
|
+
current,
|
|
11430
|
+
total,
|
|
11431
|
+
`\uD398\uC774\uC9C0 ${current}/${total} \uB80C\uB354\uB9C1`
|
|
11432
|
+
);
|
|
11433
|
+
}
|
|
11434
|
+
);
|
|
11418
11435
|
const images = await listPageImages(imagesDir);
|
|
11419
11436
|
if (images.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328: \uACB0\uACFC \uC774\uBBF8\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
11420
|
-
|
|
11421
|
-
|
|
11437
|
+
if (!renderWithProgress.emittedPerPageProgress) {
|
|
11438
|
+
markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
|
|
11439
|
+
}
|
|
11440
|
+
timingsMs.render = elapsedMs(renderStart);
|
|
11422
11441
|
markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
|
|
11423
|
-
logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", {
|
|
11424
|
-
|
|
11442
|
+
logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", {
|
|
11443
|
+
pages: images.length,
|
|
11444
|
+
elapsedMs: timingsMs.render,
|
|
11445
|
+
pageCountSource: renderWithProgress.pageCountSource
|
|
11446
|
+
});
|
|
11447
|
+
const probeStart = performance.now();
|
|
11425
11448
|
currentStage = "probe";
|
|
11426
11449
|
markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
|
|
11427
11450
|
logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
|
|
@@ -11445,14 +11468,14 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11445
11468
|
const selected = await probeRuns.firstSuccess;
|
|
11446
11469
|
const selectedModel = selected.selectedModel;
|
|
11447
11470
|
const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
|
|
11448
|
-
timingsMs.probe =
|
|
11471
|
+
timingsMs.probe = elapsedMs(probeStart);
|
|
11449
11472
|
markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
|
|
11450
11473
|
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
|
|
11451
11474
|
const probeResultsPromise = probeRuns.allResults.then(async (results) => {
|
|
11452
11475
|
await updateModelCache(modelCachePath, results);
|
|
11453
11476
|
return results;
|
|
11454
11477
|
});
|
|
11455
|
-
const ocrStart =
|
|
11478
|
+
const ocrStart = performance.now();
|
|
11456
11479
|
currentStage = "ocr";
|
|
11457
11480
|
markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
|
|
11458
11481
|
logStage("info", "ocr", "start", "\uD398\uC774\uC9C0 OCR \uC2DC\uC791", { selectedModel, pageCount: images.length });
|
|
@@ -11476,10 +11499,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11476
11499
|
markStageProgress("ocr", Math.round((i + 1) / images.length * 100), i + 1, images.length, `OCR ${i + 1}/${images.length}`);
|
|
11477
11500
|
logStage("debug", "ocr", "progress", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { page: i + 1, total: images.length });
|
|
11478
11501
|
}
|
|
11479
|
-
timingsMs.ocr =
|
|
11502
|
+
timingsMs.ocr = elapsedMs(ocrStart);
|
|
11480
11503
|
markStageDone("ocr", "OCR \uC644\uB8CC");
|
|
11481
11504
|
logStage("info", "ocr", "done", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { elapsedMs: timingsMs.ocr });
|
|
11482
|
-
const proofStart =
|
|
11505
|
+
const proofStart = performance.now();
|
|
11483
11506
|
currentStage = "proofread";
|
|
11484
11507
|
markStageStart("proofread", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC9C4\uD589 \uC911");
|
|
11485
11508
|
logStage("info", "proofread", "start", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC2DC\uC791", { pages: rawPagePaths.length });
|
|
@@ -11515,16 +11538,16 @@ ${rawMd}
|
|
|
11515
11538
|
markStageProgress("proofread", Math.round((i + 1) / rawPagePaths.length * 100), i + 1, rawPagePaths.length, `\uAD50\uC815 ${i + 1}/${rawPagePaths.length}`);
|
|
11516
11539
|
logStage("debug", "proofread", "progress", "\uD398\uC774\uC9C0 \uAD50\uC815 \uC644\uB8CC", { page: i + 1, total: rawPagePaths.length });
|
|
11517
11540
|
}
|
|
11518
|
-
timingsMs.proofread =
|
|
11541
|
+
timingsMs.proofread = elapsedMs(proofStart);
|
|
11519
11542
|
markStageDone("proofread", "\uAD50\uC815 \uC644\uB8CC");
|
|
11520
11543
|
logStage("info", "proofread", "done", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC644\uB8CC", { elapsedMs: timingsMs.proofread });
|
|
11521
|
-
const mergeStart =
|
|
11544
|
+
const mergeStart = performance.now();
|
|
11522
11545
|
currentStage = "merge";
|
|
11523
11546
|
markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
|
|
11524
11547
|
logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: proofedPaths.length });
|
|
11525
11548
|
const merged = await mergeMarkdownPages(proofedPaths);
|
|
11526
11549
|
await writeFile(outputPath, merged, "utf-8");
|
|
11527
|
-
timingsMs.merge =
|
|
11550
|
+
timingsMs.merge = elapsedMs(mergeStart);
|
|
11528
11551
|
markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
|
|
11529
11552
|
logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
|
|
11530
11553
|
const report = {
|
|
@@ -11618,6 +11641,49 @@ async function renderPdfToPng(pdfPath, prefixPath, dpi) {
|
|
|
11618
11641
|
throw new UnifiedOcrError("RENDER_FAILED", "render", err instanceof Error ? err.message : String(err));
|
|
11619
11642
|
}
|
|
11620
11643
|
}
|
|
11644
|
+
async function getPdfPageCount(pdfPath) {
|
|
11645
|
+
const stdout = await runCommandWithStdout("pdfinfo", [pdfPath]);
|
|
11646
|
+
const m = stdout.match(/^\s*Pages:\s*(\d+)\s*$/mi);
|
|
11647
|
+
if (!m) {
|
|
11648
|
+
throw new Error("pdfinfo \uCD9C\uB825\uC5D0\uC11C \uD398\uC774\uC9C0 \uC218\uB97C \uCC3E\uC9C0 \uBABB\uD588\uC2B5\uB2C8\uB2E4.");
|
|
11649
|
+
}
|
|
11650
|
+
const n = Number(m[1]);
|
|
11651
|
+
if (!Number.isFinite(n) || n <= 0) {
|
|
11652
|
+
throw new Error(`\uC798\uBABB\uB41C \uD398\uC774\uC9C0 \uC218: ${m[1]}`);
|
|
11653
|
+
}
|
|
11654
|
+
return n;
|
|
11655
|
+
}
|
|
11656
|
+
async function renderPdfToPngWithProgress(pdfPath, prefixPath, dpi, onPageDone) {
|
|
11657
|
+
let totalPages = 0;
|
|
11658
|
+
try {
|
|
11659
|
+
totalPages = await getPdfPageCount(pdfPath);
|
|
11660
|
+
} catch {
|
|
11661
|
+
totalPages = 0;
|
|
11662
|
+
}
|
|
11663
|
+
if (totalPages > 0) {
|
|
11664
|
+
try {
|
|
11665
|
+
for (let page = 1; page <= totalPages; page++) {
|
|
11666
|
+
await runCommand("pdftoppm", [
|
|
11667
|
+
"-png",
|
|
11668
|
+
"-r",
|
|
11669
|
+
String(dpi),
|
|
11670
|
+
"-f",
|
|
11671
|
+
String(page),
|
|
11672
|
+
"-l",
|
|
11673
|
+
String(page),
|
|
11674
|
+
pdfPath,
|
|
11675
|
+
prefixPath
|
|
11676
|
+
]);
|
|
11677
|
+
onPageDone(page, totalPages);
|
|
11678
|
+
}
|
|
11679
|
+
return { emittedPerPageProgress: true, pageCountSource: "pdfinfo" };
|
|
11680
|
+
} catch (err) {
|
|
11681
|
+
throw new UnifiedOcrError("RENDER_FAILED", "render", err instanceof Error ? err.message : String(err));
|
|
11682
|
+
}
|
|
11683
|
+
}
|
|
11684
|
+
await renderPdfToPng(pdfPath, prefixPath, dpi);
|
|
11685
|
+
return { emittedPerPageProgress: false, pageCountSource: "fallback" };
|
|
11686
|
+
}
|
|
11621
11687
|
async function runCommand(cmd, args) {
|
|
11622
11688
|
await new Promise((resolvePromise, reject) => {
|
|
11623
11689
|
const child = spawn2(cmd, args, { stdio: "pipe" });
|
|
@@ -11632,6 +11698,24 @@ async function runCommand(cmd, args) {
|
|
|
11632
11698
|
});
|
|
11633
11699
|
});
|
|
11634
11700
|
}
|
|
11701
|
+
async function runCommandWithStdout(cmd, args) {
|
|
11702
|
+
return await new Promise((resolvePromise, reject) => {
|
|
11703
|
+
const child = spawn2(cmd, args, { stdio: "pipe" });
|
|
11704
|
+
let stdout = "";
|
|
11705
|
+
let stderr = "";
|
|
11706
|
+
child.stdout.on("data", (d) => {
|
|
11707
|
+
stdout += String(d);
|
|
11708
|
+
});
|
|
11709
|
+
child.stderr.on("data", (d) => {
|
|
11710
|
+
stderr += String(d);
|
|
11711
|
+
});
|
|
11712
|
+
child.on("error", reject);
|
|
11713
|
+
child.on("close", (code) => {
|
|
11714
|
+
if (code === 0) resolvePromise(stdout);
|
|
11715
|
+
else reject(new Error(`${cmd} \uC2E4\uD328 (code=${code}): ${stderr.trim()}`));
|
|
11716
|
+
});
|
|
11717
|
+
});
|
|
11718
|
+
}
|
|
11635
11719
|
async function assertSofficeAvailable() {
|
|
11636
11720
|
try {
|
|
11637
11721
|
await runCommand("soffice", ["--version"]);
|
|
@@ -11685,7 +11769,7 @@ function startParallelProbeRuns(input) {
|
|
|
11685
11769
|
});
|
|
11686
11770
|
let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
|
|
11687
11771
|
const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
|
|
11688
|
-
const t0 =
|
|
11772
|
+
const t0 = performance.now();
|
|
11689
11773
|
try {
|
|
11690
11774
|
await ocrImageViaNim({
|
|
11691
11775
|
imagePath: input.probeImage,
|
|
@@ -11699,7 +11783,7 @@ function startParallelProbeRuns(input) {
|
|
|
11699
11783
|
logger: input.logger,
|
|
11700
11784
|
stage: "probe"
|
|
11701
11785
|
});
|
|
11702
|
-
const result = { model, durationMs:
|
|
11786
|
+
const result = { model, durationMs: elapsedMs(t0), success: true };
|
|
11703
11787
|
input.onProbeResult?.({ index, model, result });
|
|
11704
11788
|
if (!firstResolved) {
|
|
11705
11789
|
firstResolved = true;
|
|
@@ -11709,7 +11793,7 @@ function startParallelProbeRuns(input) {
|
|
|
11709
11793
|
} catch (err) {
|
|
11710
11794
|
const result = {
|
|
11711
11795
|
model,
|
|
11712
|
-
durationMs:
|
|
11796
|
+
durationMs: elapsedMs(t0),
|
|
11713
11797
|
success: false,
|
|
11714
11798
|
error: err instanceof Error ? err.message : String(err)
|
|
11715
11799
|
};
|