@clazic/kordoc 2.4.13 → 2.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-5R37N6KE.js → chunk-YHPNDX7A.js} +2 -2
- package/dist/{chunk-UX75CBUO.js → chunk-ZER7GYXK.js} +3 -2
- package/dist/{chunk-UX75CBUO.js.map → chunk-ZER7GYXK.js.map} +1 -1
- package/dist/cli.js +5 -5
- package/dist/index.cjs +86 -51
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +86 -51
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-XLLXVB7V.js → utils-ZQA6RCXN.js} +2 -2
- package/dist/{watch-3MTAXFEA.js → watch-ULSOWHFE.js} +3 -3
- package/package.json +1 -1
- /package/dist/{chunk-5R37N6KE.js.map → chunk-YHPNDX7A.js.map} +0 -0
- /package/dist/{utils-XLLXVB7V.js.map → utils-ZQA6RCXN.js.map} +0 -0
- /package/dist/{watch-3MTAXFEA.js.map → watch-ULSOWHFE.js.map} +0 -0
package/dist/cli.js
CHANGED
|
@@ -4,12 +4,12 @@ import {
|
|
|
4
4
|
markdownToHwpx,
|
|
5
5
|
markdownToXlsx,
|
|
6
6
|
parse
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-ZER7GYXK.js";
|
|
8
8
|
import "./chunk-YW5G6BCJ.js";
|
|
9
9
|
import {
|
|
10
10
|
VERSION,
|
|
11
11
|
toArrayBuffer
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-YHPNDX7A.js";
|
|
13
13
|
import "./chunk-MOL7MDBG.js";
|
|
14
14
|
import "./chunk-7FMKAV4P.js";
|
|
15
15
|
import "./chunk-34WIGIQC.js";
|
|
@@ -177,7 +177,7 @@ async function runParse(files, opts) {
|
|
|
177
177
|
saveImages(absPath);
|
|
178
178
|
}
|
|
179
179
|
} catch (err) {
|
|
180
|
-
const { sanitizeError } = await import("./utils-
|
|
180
|
+
const { sanitizeError } = await import("./utils-ZQA6RCXN.js");
|
|
181
181
|
process.stderr.write(`
|
|
182
182
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
183
183
|
`);
|
|
@@ -259,7 +259,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
259
259
|
`));
|
|
260
260
|
}
|
|
261
261
|
} catch (err) {
|
|
262
|
-
const { sanitizeError } = await import("./utils-
|
|
262
|
+
const { sanitizeError } = await import("./utils-ZQA6RCXN.js");
|
|
263
263
|
process.stderr.write(` FAIL
|
|
264
264
|
`);
|
|
265
265
|
process.stderr.write(` \u2192 ${sanitizeError(err)}
|
|
@@ -291,7 +291,7 @@ program.command("init-env").description("kordoc\uC6A9 .env \uD15C\uD50C\uB9BF \u
|
|
|
291
291
|
}
|
|
292
292
|
});
|
|
293
293
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
294
|
-
const { watchDirectory } = await import("./watch-
|
|
294
|
+
const { watchDirectory } = await import("./watch-ULSOWHFE.js");
|
|
295
295
|
await watchDirectory({
|
|
296
296
|
dir,
|
|
297
297
|
outDir: opts.outDir,
|
package/dist/index.cjs
CHANGED
|
@@ -3138,7 +3138,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
3138
3138
|
var import_xmldom = require("@xmldom/xmldom");
|
|
3139
3139
|
|
|
3140
3140
|
// src/utils.ts
|
|
3141
|
-
var VERSION = true ? "2.4.
|
|
3141
|
+
var VERSION = true ? "2.4.14" : "0.0.0-dev";
|
|
3142
3142
|
function toArrayBuffer(buf) {
|
|
3143
3143
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3144
3144
|
return buf.buffer;
|
|
@@ -11332,6 +11332,7 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
|
|
|
11332
11332
|
var import_promises2 = require("fs/promises");
|
|
11333
11333
|
var import_path5 = require("path");
|
|
11334
11334
|
var import_child_process4 = require("child_process");
|
|
11335
|
+
var import_node_perf_hooks = require("perf_hooks");
|
|
11335
11336
|
var import_libreoffice_convert = __toESM(require("libreoffice-convert"), 1);
|
|
11336
11337
|
init_logger();
|
|
11337
11338
|
var libreConvert = import_libreoffice_convert.default.convert;
|
|
@@ -11380,6 +11381,9 @@ var PROOFREAD_PROMPT = [
|
|
|
11380
11381
|
"- \uC624\uD0C8\uC790, \uB744\uC5B4\uC4F0\uAE30, \uC904\uBC14\uAFC8, Markdown \uAD6C\uC870\uB9CC \uAD50\uC815",
|
|
11381
11382
|
"- \uACB0\uACFC\uB294 Markdown \uBCF8\uBB38\uB9CC \uCD9C\uB825"
|
|
11382
11383
|
].join("\n");
|
|
11384
|
+
function elapsedMs(startAt) {
|
|
11385
|
+
return Math.round(import_node_perf_hooks.performance.now() - startAt);
|
|
11386
|
+
}
|
|
11383
11387
|
async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
11384
11388
|
const absInput = (0, import_path5.resolve)(inputPath);
|
|
11385
11389
|
const stem = (0, import_path5.basename)(absInput, (0, import_path5.extname)(absInput));
|
|
@@ -11419,7 +11423,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11419
11423
|
try {
|
|
11420
11424
|
ensureSupportedInput(absInput);
|
|
11421
11425
|
let workingPdfPath = absInput;
|
|
11422
|
-
const convertStart =
|
|
11426
|
+
const convertStart = import_node_perf_hooks.performance.now();
|
|
11423
11427
|
currentStage = "convert";
|
|
11424
11428
|
markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
|
|
11425
11429
|
logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
|
|
@@ -11430,10 +11434,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11430
11434
|
const out = await convertWithLibreOffice(inputBuffer, ".pdf");
|
|
11431
11435
|
await (0, import_promises2.writeFile)(workingPdfPath, out);
|
|
11432
11436
|
}
|
|
11433
|
-
timingsMs.convert =
|
|
11437
|
+
timingsMs.convert = elapsedMs(convertStart);
|
|
11434
11438
|
markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
|
|
11435
11439
|
logStage("info", "convert", "done", "PDF \uBCC0\uD658 \uC644\uB8CC", { elapsedMs: timingsMs.convert });
|
|
11436
|
-
const renderStart =
|
|
11440
|
+
const renderStart = import_node_perf_hooks.performance.now();
|
|
11437
11441
|
currentStage = "render";
|
|
11438
11442
|
markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
|
|
11439
11443
|
logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi });
|
|
@@ -11441,57 +11445,41 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11441
11445
|
const images = await listPageImages(imagesDir);
|
|
11442
11446
|
if (images.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328: \uACB0\uACFC \uC774\uBBF8\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
11443
11447
|
markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
|
|
11444
|
-
timingsMs.render =
|
|
11448
|
+
timingsMs.render = elapsedMs(renderStart);
|
|
11445
11449
|
markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
|
|
11446
11450
|
logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", { pages: images.length, elapsedMs: timingsMs.render });
|
|
11447
|
-
const probeStart =
|
|
11451
|
+
const probeStart = import_node_perf_hooks.performance.now();
|
|
11448
11452
|
currentStage = "probe";
|
|
11449
11453
|
markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
|
|
11450
11454
|
logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
|
|
11451
11455
|
const probeImage = await pickRepresentativeImage(images);
|
|
11452
11456
|
let probeDone = 0;
|
|
11453
|
-
const
|
|
11454
|
-
|
|
11455
|
-
|
|
11456
|
-
|
|
11457
|
-
|
|
11458
|
-
|
|
11459
|
-
|
|
11460
|
-
|
|
11461
|
-
|
|
11462
|
-
|
|
11463
|
-
timeoutMs,
|
|
11464
|
-
maxRetries: 2,
|
|
11465
|
-
logger,
|
|
11466
|
-
stage: "probe"
|
|
11467
|
-
});
|
|
11468
|
-
const result = { model, durationMs: Date.now() - t0, success: true };
|
|
11469
|
-
probeDone += 1;
|
|
11470
|
-
markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
|
|
11471
|
-
logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
|
|
11472
|
-
return result;
|
|
11473
|
-
} catch (err) {
|
|
11474
|
-
const result = {
|
|
11475
|
-
model,
|
|
11476
|
-
durationMs: Date.now() - t0,
|
|
11477
|
-
success: false,
|
|
11478
|
-
error: err instanceof Error ? err.message : String(err)
|
|
11479
|
-
};
|
|
11457
|
+
const probeRuns = startParallelProbeRuns({
|
|
11458
|
+
models,
|
|
11459
|
+
probeConcurrency,
|
|
11460
|
+
probeImage,
|
|
11461
|
+
modelMaxTokens,
|
|
11462
|
+
baseUrl,
|
|
11463
|
+
keyPool,
|
|
11464
|
+
timeoutMs,
|
|
11465
|
+
logger,
|
|
11466
|
+
onProbeResult: ({ index, model, result }) => {
|
|
11480
11467
|
probeDone += 1;
|
|
11481
11468
|
markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
|
|
11482
11469
|
logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
|
|
11483
|
-
return result;
|
|
11484
11470
|
}
|
|
11485
11471
|
});
|
|
11486
|
-
const
|
|
11487
|
-
const selectedModel =
|
|
11488
|
-
|
|
11489
|
-
|
|
11490
|
-
timingsMs.probe = Date.now() - probeStart;
|
|
11491
|
-
await updateModelCache(modelCachePath, probeResults);
|
|
11472
|
+
const selected = await probeRuns.firstSuccess;
|
|
11473
|
+
const selectedModel = selected.selectedModel;
|
|
11474
|
+
const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
|
|
11475
|
+
timingsMs.probe = elapsedMs(probeStart);
|
|
11492
11476
|
markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
|
|
11493
|
-
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC", { selectedModel,
|
|
11494
|
-
const
|
|
11477
|
+
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
|
|
11478
|
+
const probeResultsPromise = probeRuns.allResults.then(async (results) => {
|
|
11479
|
+
await updateModelCache(modelCachePath, results);
|
|
11480
|
+
return results;
|
|
11481
|
+
});
|
|
11482
|
+
const ocrStart = import_node_perf_hooks.performance.now();
|
|
11495
11483
|
currentStage = "ocr";
|
|
11496
11484
|
markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
|
|
11497
11485
|
logStage("info", "ocr", "start", "\uD398\uC774\uC9C0 OCR \uC2DC\uC791", { selectedModel, pageCount: images.length });
|
|
@@ -11515,10 +11503,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11515
11503
|
markStageProgress("ocr", Math.round((i + 1) / images.length * 100), i + 1, images.length, `OCR ${i + 1}/${images.length}`);
|
|
11516
11504
|
logStage("debug", "ocr", "progress", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { page: i + 1, total: images.length });
|
|
11517
11505
|
}
|
|
11518
|
-
timingsMs.ocr =
|
|
11506
|
+
timingsMs.ocr = elapsedMs(ocrStart);
|
|
11519
11507
|
markStageDone("ocr", "OCR \uC644\uB8CC");
|
|
11520
11508
|
logStage("info", "ocr", "done", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { elapsedMs: timingsMs.ocr });
|
|
11521
|
-
const proofStart =
|
|
11509
|
+
const proofStart = import_node_perf_hooks.performance.now();
|
|
11522
11510
|
currentStage = "proofread";
|
|
11523
11511
|
markStageStart("proofread", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC9C4\uD589 \uC911");
|
|
11524
11512
|
logStage("info", "proofread", "start", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC2DC\uC791", { pages: rawPagePaths.length });
|
|
@@ -11554,16 +11542,16 @@ ${rawMd}
|
|
|
11554
11542
|
markStageProgress("proofread", Math.round((i + 1) / rawPagePaths.length * 100), i + 1, rawPagePaths.length, `\uAD50\uC815 ${i + 1}/${rawPagePaths.length}`);
|
|
11555
11543
|
logStage("debug", "proofread", "progress", "\uD398\uC774\uC9C0 \uAD50\uC815 \uC644\uB8CC", { page: i + 1, total: rawPagePaths.length });
|
|
11556
11544
|
}
|
|
11557
|
-
timingsMs.proofread =
|
|
11545
|
+
timingsMs.proofread = elapsedMs(proofStart);
|
|
11558
11546
|
markStageDone("proofread", "\uAD50\uC815 \uC644\uB8CC");
|
|
11559
11547
|
logStage("info", "proofread", "done", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC644\uB8CC", { elapsedMs: timingsMs.proofread });
|
|
11560
|
-
const mergeStart =
|
|
11548
|
+
const mergeStart = import_node_perf_hooks.performance.now();
|
|
11561
11549
|
currentStage = "merge";
|
|
11562
11550
|
markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
|
|
11563
11551
|
logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: proofedPaths.length });
|
|
11564
11552
|
const merged = await mergeMarkdownPages(proofedPaths);
|
|
11565
11553
|
await (0, import_promises2.writeFile)(outputPath, merged, "utf-8");
|
|
11566
|
-
timingsMs.merge =
|
|
11554
|
+
timingsMs.merge = elapsedMs(mergeStart);
|
|
11567
11555
|
markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
|
|
11568
11556
|
logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
|
|
11569
11557
|
const report = {
|
|
@@ -11572,7 +11560,7 @@ ${rawMd}
|
|
|
11572
11560
|
workspaceDir,
|
|
11573
11561
|
selectedModel,
|
|
11574
11562
|
probeImage,
|
|
11575
|
-
probeResults,
|
|
11563
|
+
probeResults: await probeResultsPromise,
|
|
11576
11564
|
pageCount: images.length,
|
|
11577
11565
|
keyHealth: keyPool.snapshot(),
|
|
11578
11566
|
timingsMs,
|
|
@@ -11713,9 +11701,56 @@ async function mapWithConcurrency(items, concurrency, mapper) {
|
|
|
11713
11701
|
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
11714
11702
|
return results;
|
|
11715
11703
|
}
|
|
11716
|
-
function
|
|
11717
|
-
|
|
11718
|
-
|
|
11704
|
+
function startParallelProbeRuns(input) {
|
|
11705
|
+
let firstResolved = false;
|
|
11706
|
+
let doneCount = 0;
|
|
11707
|
+
let resolveFirst;
|
|
11708
|
+
let rejectFirst;
|
|
11709
|
+
const firstSuccess = new Promise((resolve4, reject) => {
|
|
11710
|
+
resolveFirst = resolve4;
|
|
11711
|
+
rejectFirst = reject;
|
|
11712
|
+
});
|
|
11713
|
+
let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
|
|
11714
|
+
const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
|
|
11715
|
+
const t0 = import_node_perf_hooks.performance.now();
|
|
11716
|
+
try {
|
|
11717
|
+
await ocrImageViaNim({
|
|
11718
|
+
imagePath: input.probeImage,
|
|
11719
|
+
prompt: OCR_PROMPT2,
|
|
11720
|
+
model,
|
|
11721
|
+
maxTokens: input.modelMaxTokens[model] ?? 8192,
|
|
11722
|
+
baseUrl: input.baseUrl,
|
|
11723
|
+
keyPool: input.keyPool,
|
|
11724
|
+
timeoutMs: input.timeoutMs,
|
|
11725
|
+
maxRetries: 2,
|
|
11726
|
+
logger: input.logger,
|
|
11727
|
+
stage: "probe"
|
|
11728
|
+
});
|
|
11729
|
+
const result = { model, durationMs: elapsedMs(t0), success: true };
|
|
11730
|
+
input.onProbeResult?.({ index, model, result });
|
|
11731
|
+
if (!firstResolved) {
|
|
11732
|
+
firstResolved = true;
|
|
11733
|
+
resolveFirst?.({ selectedModel: model, firstDurationMs: result.durationMs });
|
|
11734
|
+
}
|
|
11735
|
+
return result;
|
|
11736
|
+
} catch (err) {
|
|
11737
|
+
const result = {
|
|
11738
|
+
model,
|
|
11739
|
+
durationMs: elapsedMs(t0),
|
|
11740
|
+
success: false,
|
|
11741
|
+
error: err instanceof Error ? err.message : String(err)
|
|
11742
|
+
};
|
|
11743
|
+
lastErr = result.error ?? lastErr;
|
|
11744
|
+
input.onProbeResult?.({ index, model, result });
|
|
11745
|
+
return result;
|
|
11746
|
+
} finally {
|
|
11747
|
+
doneCount += 1;
|
|
11748
|
+
if (doneCount === input.models.length && !firstResolved) {
|
|
11749
|
+
rejectFirst?.(new UnifiedOcrError("PROBE_FAILED", "probe", `\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: ${lastErr}`));
|
|
11750
|
+
}
|
|
11751
|
+
}
|
|
11752
|
+
});
|
|
11753
|
+
return { firstSuccess, allResults };
|
|
11719
11754
|
}
|
|
11720
11755
|
async function loadModelCache(path) {
|
|
11721
11756
|
try {
|