@clazic/kordoc 2.4.14 → 2.4.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-X6NIA6BK.js → chunk-QR27D67R.js} +2 -2
- package/dist/{chunk-6CADPLGJ.js → chunk-RH6IBTHH.js} +3 -2
- package/dist/{chunk-6CADPLGJ.js.map → chunk-RH6IBTHH.js.map} +1 -1
- package/dist/cli.js +5 -5
- package/dist/index.cjs +103 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +103 -19
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-EK3CPEZG.js → utils-HHJDSSR6.js} +2 -2
- package/dist/{watch-NSWBVKQZ.js → watch-YAILKKKP.js} +3 -3
- package/package.json +1 -1
- /package/dist/{chunk-X6NIA6BK.js.map → chunk-QR27D67R.js.map} +0 -0
- /package/dist/{utils-EK3CPEZG.js.map → utils-HHJDSSR6.js.map} +0 -0
- /package/dist/{watch-NSWBVKQZ.js.map → watch-YAILKKKP.js.map} +0 -0
package/dist/cli.js
CHANGED
|
@@ -4,12 +4,12 @@ import {
|
|
|
4
4
|
markdownToHwpx,
|
|
5
5
|
markdownToXlsx,
|
|
6
6
|
parse
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-RH6IBTHH.js";
|
|
8
8
|
import "./chunk-YW5G6BCJ.js";
|
|
9
9
|
import {
|
|
10
10
|
VERSION,
|
|
11
11
|
toArrayBuffer
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-QR27D67R.js";
|
|
13
13
|
import "./chunk-MOL7MDBG.js";
|
|
14
14
|
import "./chunk-7FMKAV4P.js";
|
|
15
15
|
import "./chunk-34WIGIQC.js";
|
|
@@ -177,7 +177,7 @@ async function runParse(files, opts) {
|
|
|
177
177
|
saveImages(absPath);
|
|
178
178
|
}
|
|
179
179
|
} catch (err) {
|
|
180
|
-
const { sanitizeError } = await import("./utils-
|
|
180
|
+
const { sanitizeError } = await import("./utils-HHJDSSR6.js");
|
|
181
181
|
process.stderr.write(`
|
|
182
182
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
183
183
|
`);
|
|
@@ -259,7 +259,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
259
259
|
`));
|
|
260
260
|
}
|
|
261
261
|
} catch (err) {
|
|
262
|
-
const { sanitizeError } = await import("./utils-
|
|
262
|
+
const { sanitizeError } = await import("./utils-HHJDSSR6.js");
|
|
263
263
|
process.stderr.write(` FAIL
|
|
264
264
|
`);
|
|
265
265
|
process.stderr.write(` \u2192 ${sanitizeError(err)}
|
|
@@ -291,7 +291,7 @@ program.command("init-env").description("kordoc\uC6A9 .env \uD15C\uD50C\uB9BF \u
|
|
|
291
291
|
}
|
|
292
292
|
});
|
|
293
293
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
294
|
-
const { watchDirectory } = await import("./watch-
|
|
294
|
+
const { watchDirectory } = await import("./watch-YAILKKKP.js");
|
|
295
295
|
await watchDirectory({
|
|
296
296
|
dir,
|
|
297
297
|
outDir: opts.outDir,
|
package/dist/index.cjs
CHANGED
|
@@ -3138,7 +3138,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
3138
3138
|
var import_xmldom = require("@xmldom/xmldom");
|
|
3139
3139
|
|
|
3140
3140
|
// src/utils.ts
|
|
3141
|
-
var VERSION = true ? "2.4.
|
|
3141
|
+
var VERSION = true ? "2.4.15" : "0.0.0-dev";
|
|
3142
3142
|
function toArrayBuffer(buf) {
|
|
3143
3143
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3144
3144
|
return buf.buffer;
|
|
@@ -11332,6 +11332,7 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
|
|
|
11332
11332
|
var import_promises2 = require("fs/promises");
|
|
11333
11333
|
var import_path5 = require("path");
|
|
11334
11334
|
var import_child_process4 = require("child_process");
|
|
11335
|
+
var import_node_perf_hooks = require("perf_hooks");
|
|
11335
11336
|
var import_libreoffice_convert = __toESM(require("libreoffice-convert"), 1);
|
|
11336
11337
|
init_logger();
|
|
11337
11338
|
var libreConvert = import_libreoffice_convert.default.convert;
|
|
@@ -11380,6 +11381,9 @@ var PROOFREAD_PROMPT = [
|
|
|
11380
11381
|
"- \uC624\uD0C8\uC790, \uB744\uC5B4\uC4F0\uAE30, \uC904\uBC14\uAFC8, Markdown \uAD6C\uC870\uB9CC \uAD50\uC815",
|
|
11381
11382
|
"- \uACB0\uACFC\uB294 Markdown \uBCF8\uBB38\uB9CC \uCD9C\uB825"
|
|
11382
11383
|
].join("\n");
|
|
11384
|
+
function elapsedMs(startAt) {
|
|
11385
|
+
return Math.round(import_node_perf_hooks.performance.now() - startAt);
|
|
11386
|
+
}
|
|
11383
11387
|
async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
11384
11388
|
const absInput = (0, import_path5.resolve)(inputPath);
|
|
11385
11389
|
const stem = (0, import_path5.basename)(absInput, (0, import_path5.extname)(absInput));
|
|
@@ -11419,7 +11423,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11419
11423
|
try {
|
|
11420
11424
|
ensureSupportedInput(absInput);
|
|
11421
11425
|
let workingPdfPath = absInput;
|
|
11422
|
-
const convertStart =
|
|
11426
|
+
const convertStart = import_node_perf_hooks.performance.now();
|
|
11423
11427
|
currentStage = "convert";
|
|
11424
11428
|
markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
|
|
11425
11429
|
logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
|
|
@@ -11430,21 +11434,40 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11430
11434
|
const out = await convertWithLibreOffice(inputBuffer, ".pdf");
|
|
11431
11435
|
await (0, import_promises2.writeFile)(workingPdfPath, out);
|
|
11432
11436
|
}
|
|
11433
|
-
timingsMs.convert =
|
|
11437
|
+
timingsMs.convert = elapsedMs(convertStart);
|
|
11434
11438
|
markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
|
|
11435
11439
|
logStage("info", "convert", "done", "PDF \uBCC0\uD658 \uC644\uB8CC", { elapsedMs: timingsMs.convert });
|
|
11436
|
-
const renderStart =
|
|
11440
|
+
const renderStart = import_node_perf_hooks.performance.now();
|
|
11437
11441
|
currentStage = "render";
|
|
11438
11442
|
markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
|
|
11439
11443
|
logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi });
|
|
11440
|
-
|
|
11444
|
+
const renderWithProgress = await renderPdfToPngWithProgress(
|
|
11445
|
+
workingPdfPath,
|
|
11446
|
+
(0, import_path5.join)(imagesDir, "page"),
|
|
11447
|
+
dpi,
|
|
11448
|
+
(current, total) => {
|
|
11449
|
+
markStageProgress(
|
|
11450
|
+
"render",
|
|
11451
|
+
Math.round(current / total * 100),
|
|
11452
|
+
current,
|
|
11453
|
+
total,
|
|
11454
|
+
`\uD398\uC774\uC9C0 ${current}/${total} \uB80C\uB354\uB9C1`
|
|
11455
|
+
);
|
|
11456
|
+
}
|
|
11457
|
+
);
|
|
11441
11458
|
const images = await listPageImages(imagesDir);
|
|
11442
11459
|
if (images.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328: \uACB0\uACFC \uC774\uBBF8\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
11443
|
-
|
|
11444
|
-
|
|
11460
|
+
if (!renderWithProgress.emittedPerPageProgress) {
|
|
11461
|
+
markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
|
|
11462
|
+
}
|
|
11463
|
+
timingsMs.render = elapsedMs(renderStart);
|
|
11445
11464
|
markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
|
|
11446
|
-
logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", {
|
|
11447
|
-
|
|
11465
|
+
logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", {
|
|
11466
|
+
pages: images.length,
|
|
11467
|
+
elapsedMs: timingsMs.render,
|
|
11468
|
+
pageCountSource: renderWithProgress.pageCountSource
|
|
11469
|
+
});
|
|
11470
|
+
const probeStart = import_node_perf_hooks.performance.now();
|
|
11448
11471
|
currentStage = "probe";
|
|
11449
11472
|
markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
|
|
11450
11473
|
logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
|
|
@@ -11468,14 +11491,14 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11468
11491
|
const selected = await probeRuns.firstSuccess;
|
|
11469
11492
|
const selectedModel = selected.selectedModel;
|
|
11470
11493
|
const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
|
|
11471
|
-
timingsMs.probe =
|
|
11494
|
+
timingsMs.probe = elapsedMs(probeStart);
|
|
11472
11495
|
markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
|
|
11473
11496
|
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
|
|
11474
11497
|
const probeResultsPromise = probeRuns.allResults.then(async (results) => {
|
|
11475
11498
|
await updateModelCache(modelCachePath, results);
|
|
11476
11499
|
return results;
|
|
11477
11500
|
});
|
|
11478
|
-
const ocrStart =
|
|
11501
|
+
const ocrStart = import_node_perf_hooks.performance.now();
|
|
11479
11502
|
currentStage = "ocr";
|
|
11480
11503
|
markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
|
|
11481
11504
|
logStage("info", "ocr", "start", "\uD398\uC774\uC9C0 OCR \uC2DC\uC791", { selectedModel, pageCount: images.length });
|
|
@@ -11499,10 +11522,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11499
11522
|
markStageProgress("ocr", Math.round((i + 1) / images.length * 100), i + 1, images.length, `OCR ${i + 1}/${images.length}`);
|
|
11500
11523
|
logStage("debug", "ocr", "progress", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { page: i + 1, total: images.length });
|
|
11501
11524
|
}
|
|
11502
|
-
timingsMs.ocr =
|
|
11525
|
+
timingsMs.ocr = elapsedMs(ocrStart);
|
|
11503
11526
|
markStageDone("ocr", "OCR \uC644\uB8CC");
|
|
11504
11527
|
logStage("info", "ocr", "done", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { elapsedMs: timingsMs.ocr });
|
|
11505
|
-
const proofStart =
|
|
11528
|
+
const proofStart = import_node_perf_hooks.performance.now();
|
|
11506
11529
|
currentStage = "proofread";
|
|
11507
11530
|
markStageStart("proofread", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC9C4\uD589 \uC911");
|
|
11508
11531
|
logStage("info", "proofread", "start", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC2DC\uC791", { pages: rawPagePaths.length });
|
|
@@ -11538,16 +11561,16 @@ ${rawMd}
|
|
|
11538
11561
|
markStageProgress("proofread", Math.round((i + 1) / rawPagePaths.length * 100), i + 1, rawPagePaths.length, `\uAD50\uC815 ${i + 1}/${rawPagePaths.length}`);
|
|
11539
11562
|
logStage("debug", "proofread", "progress", "\uD398\uC774\uC9C0 \uAD50\uC815 \uC644\uB8CC", { page: i + 1, total: rawPagePaths.length });
|
|
11540
11563
|
}
|
|
11541
|
-
timingsMs.proofread =
|
|
11564
|
+
timingsMs.proofread = elapsedMs(proofStart);
|
|
11542
11565
|
markStageDone("proofread", "\uAD50\uC815 \uC644\uB8CC");
|
|
11543
11566
|
logStage("info", "proofread", "done", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC644\uB8CC", { elapsedMs: timingsMs.proofread });
|
|
11544
|
-
const mergeStart =
|
|
11567
|
+
const mergeStart = import_node_perf_hooks.performance.now();
|
|
11545
11568
|
currentStage = "merge";
|
|
11546
11569
|
markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
|
|
11547
11570
|
logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: proofedPaths.length });
|
|
11548
11571
|
const merged = await mergeMarkdownPages(proofedPaths);
|
|
11549
11572
|
await (0, import_promises2.writeFile)(outputPath, merged, "utf-8");
|
|
11550
|
-
timingsMs.merge =
|
|
11573
|
+
timingsMs.merge = elapsedMs(mergeStart);
|
|
11551
11574
|
markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
|
|
11552
11575
|
logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
|
|
11553
11576
|
const report = {
|
|
@@ -11641,6 +11664,49 @@ async function renderPdfToPng(pdfPath, prefixPath, dpi) {
|
|
|
11641
11664
|
throw new UnifiedOcrError("RENDER_FAILED", "render", err instanceof Error ? err.message : String(err));
|
|
11642
11665
|
}
|
|
11643
11666
|
}
|
|
11667
|
+
async function getPdfPageCount(pdfPath) {
|
|
11668
|
+
const stdout = await runCommandWithStdout("pdfinfo", [pdfPath]);
|
|
11669
|
+
const m = stdout.match(/^\s*Pages:\s*(\d+)\s*$/mi);
|
|
11670
|
+
if (!m) {
|
|
11671
|
+
throw new Error("pdfinfo \uCD9C\uB825\uC5D0\uC11C \uD398\uC774\uC9C0 \uC218\uB97C \uCC3E\uC9C0 \uBABB\uD588\uC2B5\uB2C8\uB2E4.");
|
|
11672
|
+
}
|
|
11673
|
+
const n = Number(m[1]);
|
|
11674
|
+
if (!Number.isFinite(n) || n <= 0) {
|
|
11675
|
+
throw new Error(`\uC798\uBABB\uB41C \uD398\uC774\uC9C0 \uC218: ${m[1]}`);
|
|
11676
|
+
}
|
|
11677
|
+
return n;
|
|
11678
|
+
}
|
|
11679
|
+
async function renderPdfToPngWithProgress(pdfPath, prefixPath, dpi, onPageDone) {
|
|
11680
|
+
let totalPages = 0;
|
|
11681
|
+
try {
|
|
11682
|
+
totalPages = await getPdfPageCount(pdfPath);
|
|
11683
|
+
} catch {
|
|
11684
|
+
totalPages = 0;
|
|
11685
|
+
}
|
|
11686
|
+
if (totalPages > 0) {
|
|
11687
|
+
try {
|
|
11688
|
+
for (let page = 1; page <= totalPages; page++) {
|
|
11689
|
+
await runCommand("pdftoppm", [
|
|
11690
|
+
"-png",
|
|
11691
|
+
"-r",
|
|
11692
|
+
String(dpi),
|
|
11693
|
+
"-f",
|
|
11694
|
+
String(page),
|
|
11695
|
+
"-l",
|
|
11696
|
+
String(page),
|
|
11697
|
+
pdfPath,
|
|
11698
|
+
prefixPath
|
|
11699
|
+
]);
|
|
11700
|
+
onPageDone(page, totalPages);
|
|
11701
|
+
}
|
|
11702
|
+
return { emittedPerPageProgress: true, pageCountSource: "pdfinfo" };
|
|
11703
|
+
} catch (err) {
|
|
11704
|
+
throw new UnifiedOcrError("RENDER_FAILED", "render", err instanceof Error ? err.message : String(err));
|
|
11705
|
+
}
|
|
11706
|
+
}
|
|
11707
|
+
await renderPdfToPng(pdfPath, prefixPath, dpi);
|
|
11708
|
+
return { emittedPerPageProgress: false, pageCountSource: "fallback" };
|
|
11709
|
+
}
|
|
11644
11710
|
async function runCommand(cmd, args) {
|
|
11645
11711
|
await new Promise((resolvePromise, reject) => {
|
|
11646
11712
|
const child = (0, import_child_process4.spawn)(cmd, args, { stdio: "pipe" });
|
|
@@ -11655,6 +11721,24 @@ async function runCommand(cmd, args) {
|
|
|
11655
11721
|
});
|
|
11656
11722
|
});
|
|
11657
11723
|
}
|
|
11724
|
+
async function runCommandWithStdout(cmd, args) {
|
|
11725
|
+
return await new Promise((resolvePromise, reject) => {
|
|
11726
|
+
const child = (0, import_child_process4.spawn)(cmd, args, { stdio: "pipe" });
|
|
11727
|
+
let stdout = "";
|
|
11728
|
+
let stderr = "";
|
|
11729
|
+
child.stdout.on("data", (d) => {
|
|
11730
|
+
stdout += String(d);
|
|
11731
|
+
});
|
|
11732
|
+
child.stderr.on("data", (d) => {
|
|
11733
|
+
stderr += String(d);
|
|
11734
|
+
});
|
|
11735
|
+
child.on("error", reject);
|
|
11736
|
+
child.on("close", (code) => {
|
|
11737
|
+
if (code === 0) resolvePromise(stdout);
|
|
11738
|
+
else reject(new Error(`${cmd} \uC2E4\uD328 (code=${code}): ${stderr.trim()}`));
|
|
11739
|
+
});
|
|
11740
|
+
});
|
|
11741
|
+
}
|
|
11658
11742
|
async function assertSofficeAvailable() {
|
|
11659
11743
|
try {
|
|
11660
11744
|
await runCommand("soffice", ["--version"]);
|
|
@@ -11708,7 +11792,7 @@ function startParallelProbeRuns(input) {
|
|
|
11708
11792
|
});
|
|
11709
11793
|
let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
|
|
11710
11794
|
const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
|
|
11711
|
-
const t0 =
|
|
11795
|
+
const t0 = import_node_perf_hooks.performance.now();
|
|
11712
11796
|
try {
|
|
11713
11797
|
await ocrImageViaNim({
|
|
11714
11798
|
imagePath: input.probeImage,
|
|
@@ -11722,7 +11806,7 @@ function startParallelProbeRuns(input) {
|
|
|
11722
11806
|
logger: input.logger,
|
|
11723
11807
|
stage: "probe"
|
|
11724
11808
|
});
|
|
11725
|
-
const result = { model, durationMs:
|
|
11809
|
+
const result = { model, durationMs: elapsedMs(t0), success: true };
|
|
11726
11810
|
input.onProbeResult?.({ index, model, result });
|
|
11727
11811
|
if (!firstResolved) {
|
|
11728
11812
|
firstResolved = true;
|
|
@@ -11732,7 +11816,7 @@ function startParallelProbeRuns(input) {
|
|
|
11732
11816
|
} catch (err) {
|
|
11733
11817
|
const result = {
|
|
11734
11818
|
model,
|
|
11735
|
-
durationMs:
|
|
11819
|
+
durationMs: elapsedMs(t0),
|
|
11736
11820
|
success: false,
|
|
11737
11821
|
error: err instanceof Error ? err.message : String(err)
|
|
11738
11822
|
};
|