@clazic/kordoc 2.4.13 → 2.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-5R37N6KE.js → chunk-YHPNDX7A.js} +2 -2
- package/dist/{chunk-UX75CBUO.js → chunk-ZER7GYXK.js} +3 -2
- package/dist/{chunk-UX75CBUO.js.map → chunk-ZER7GYXK.js.map} +1 -1
- package/dist/cli.js +5 -5
- package/dist/index.cjs +86 -51
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +86 -51
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-XLLXVB7V.js → utils-ZQA6RCXN.js} +2 -2
- package/dist/{watch-3MTAXFEA.js → watch-ULSOWHFE.js} +3 -3
- package/package.json +1 -1
- /package/dist/{chunk-5R37N6KE.js.map → chunk-YHPNDX7A.js.map} +0 -0
- /package/dist/{utils-XLLXVB7V.js.map → utils-ZQA6RCXN.js.map} +0 -0
- /package/dist/{watch-3MTAXFEA.js.map → watch-ULSOWHFE.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -3115,7 +3115,7 @@ import JSZip2 from "jszip";
|
|
|
3115
3115
|
import { DOMParser } from "@xmldom/xmldom";
|
|
3116
3116
|
|
|
3117
3117
|
// src/utils.ts
|
|
3118
|
-
var VERSION = true ? "2.4.
|
|
3118
|
+
var VERSION = true ? "2.4.14" : "0.0.0-dev";
|
|
3119
3119
|
function toArrayBuffer(buf) {
|
|
3120
3120
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3121
3121
|
return buf.buffer;
|
|
@@ -11309,6 +11309,7 @@ var ApiKeyRotationPool = class _ApiKeyRotationPool {
|
|
|
11309
11309
|
import { mkdir, readdir, readFile, stat, writeFile } from "fs/promises";
|
|
11310
11310
|
import { basename as basename2, dirname as dirname3, extname, join as join4, resolve as resolve3 } from "path";
|
|
11311
11311
|
import { spawn as spawn2 } from "child_process";
|
|
11312
|
+
import { performance } from "perf_hooks";
|
|
11312
11313
|
import libre from "libreoffice-convert";
|
|
11313
11314
|
init_logger();
|
|
11314
11315
|
var libreConvert = libre.convert;
|
|
@@ -11357,6 +11358,9 @@ var PROOFREAD_PROMPT = [
|
|
|
11357
11358
|
"- \uC624\uD0C8\uC790, \uB744\uC5B4\uC4F0\uAE30, \uC904\uBC14\uAFC8, Markdown \uAD6C\uC870\uB9CC \uAD50\uC815",
|
|
11358
11359
|
"- \uACB0\uACFC\uB294 Markdown \uBCF8\uBB38\uB9CC \uCD9C\uB825"
|
|
11359
11360
|
].join("\n");
|
|
11361
|
+
function elapsedMs(startAt) {
|
|
11362
|
+
return Math.round(performance.now() - startAt);
|
|
11363
|
+
}
|
|
11360
11364
|
async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
11361
11365
|
const absInput = resolve3(inputPath);
|
|
11362
11366
|
const stem = basename2(absInput, extname(absInput));
|
|
@@ -11396,7 +11400,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11396
11400
|
try {
|
|
11397
11401
|
ensureSupportedInput(absInput);
|
|
11398
11402
|
let workingPdfPath = absInput;
|
|
11399
|
-
const convertStart =
|
|
11403
|
+
const convertStart = performance.now();
|
|
11400
11404
|
currentStage = "convert";
|
|
11401
11405
|
markStageStart("convert", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC911");
|
|
11402
11406
|
logStage("info", "convert", "start", "\uBB38\uC11C\uB97C PDF\uB85C \uBCC0\uD658 \uC2DC\uC791", { input: absInput });
|
|
@@ -11407,10 +11411,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11407
11411
|
const out = await convertWithLibreOffice(inputBuffer, ".pdf");
|
|
11408
11412
|
await writeFile(workingPdfPath, out);
|
|
11409
11413
|
}
|
|
11410
|
-
timingsMs.convert =
|
|
11414
|
+
timingsMs.convert = elapsedMs(convertStart);
|
|
11411
11415
|
markStageDone("convert", "PDF \uBCC0\uD658 \uC644\uB8CC");
|
|
11412
11416
|
logStage("info", "convert", "done", "PDF \uBCC0\uD658 \uC644\uB8CC", { elapsedMs: timingsMs.convert });
|
|
11413
|
-
const renderStart =
|
|
11417
|
+
const renderStart = performance.now();
|
|
11414
11418
|
currentStage = "render";
|
|
11415
11419
|
markStageStart("render", "PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC911");
|
|
11416
11420
|
logStage("info", "render", "start", "PDF \uD398\uC774\uC9C0 \uB80C\uB354\uB9C1 \uC2DC\uC791", { pdf: workingPdfPath, dpi });
|
|
@@ -11418,57 +11422,41 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11418
11422
|
const images = await listPageImages(imagesDir);
|
|
11419
11423
|
if (images.length === 0) throw new UnifiedOcrError("RENDER_FAILED", "render", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC2E4\uD328: \uACB0\uACFC \uC774\uBBF8\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
11420
11424
|
markStageProgress("render", 100, images.length, images.length, `\uD398\uC774\uC9C0 ${images.length}\uC7A5 \uC0DD\uC131`);
|
|
11421
|
-
timingsMs.render =
|
|
11425
|
+
timingsMs.render = elapsedMs(renderStart);
|
|
11422
11426
|
markStageDone("render", "\uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC");
|
|
11423
11427
|
logStage("info", "render", "done", "\uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0 \uC0DD\uC131 \uC644\uB8CC", { pages: images.length, elapsedMs: timingsMs.render });
|
|
11424
|
-
const probeStart =
|
|
11428
|
+
const probeStart = performance.now();
|
|
11425
11429
|
currentStage = "probe";
|
|
11426
11430
|
markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
|
|
11427
11431
|
logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
|
|
11428
11432
|
const probeImage = await pickRepresentativeImage(images);
|
|
11429
11433
|
let probeDone = 0;
|
|
11430
|
-
const
|
|
11431
|
-
|
|
11432
|
-
|
|
11433
|
-
|
|
11434
|
-
|
|
11435
|
-
|
|
11436
|
-
|
|
11437
|
-
|
|
11438
|
-
|
|
11439
|
-
|
|
11440
|
-
timeoutMs,
|
|
11441
|
-
maxRetries: 2,
|
|
11442
|
-
logger,
|
|
11443
|
-
stage: "probe"
|
|
11444
|
-
});
|
|
11445
|
-
const result = { model, durationMs: Date.now() - t0, success: true };
|
|
11446
|
-
probeDone += 1;
|
|
11447
|
-
markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
|
|
11448
|
-
logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
|
|
11449
|
-
return result;
|
|
11450
|
-
} catch (err) {
|
|
11451
|
-
const result = {
|
|
11452
|
-
model,
|
|
11453
|
-
durationMs: Date.now() - t0,
|
|
11454
|
-
success: false,
|
|
11455
|
-
error: err instanceof Error ? err.message : String(err)
|
|
11456
|
-
};
|
|
11434
|
+
const probeRuns = startParallelProbeRuns({
|
|
11435
|
+
models,
|
|
11436
|
+
probeConcurrency,
|
|
11437
|
+
probeImage,
|
|
11438
|
+
modelMaxTokens,
|
|
11439
|
+
baseUrl,
|
|
11440
|
+
keyPool,
|
|
11441
|
+
timeoutMs,
|
|
11442
|
+
logger,
|
|
11443
|
+
onProbeResult: ({ index, model, result }) => {
|
|
11457
11444
|
probeDone += 1;
|
|
11458
11445
|
markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
|
|
11459
11446
|
logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
|
|
11460
|
-
return result;
|
|
11461
11447
|
}
|
|
11462
11448
|
});
|
|
11463
|
-
const
|
|
11464
|
-
const selectedModel =
|
|
11465
|
-
|
|
11466
|
-
|
|
11467
|
-
timingsMs.probe = Date.now() - probeStart;
|
|
11468
|
-
await updateModelCache(modelCachePath, probeResults);
|
|
11449
|
+
const selected = await probeRuns.firstSuccess;
|
|
11450
|
+
const selectedModel = selected.selectedModel;
|
|
11451
|
+
const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
|
|
11452
|
+
timingsMs.probe = elapsedMs(probeStart);
|
|
11469
11453
|
markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
|
|
11470
|
-
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC", { selectedModel,
|
|
11471
|
-
const
|
|
11454
|
+
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
|
|
11455
|
+
const probeResultsPromise = probeRuns.allResults.then(async (results) => {
|
|
11456
|
+
await updateModelCache(modelCachePath, results);
|
|
11457
|
+
return results;
|
|
11458
|
+
});
|
|
11459
|
+
const ocrStart = performance.now();
|
|
11472
11460
|
currentStage = "ocr";
|
|
11473
11461
|
markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
|
|
11474
11462
|
logStage("info", "ocr", "start", "\uD398\uC774\uC9C0 OCR \uC2DC\uC791", { selectedModel, pageCount: images.length });
|
|
@@ -11492,10 +11480,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11492
11480
|
markStageProgress("ocr", Math.round((i + 1) / images.length * 100), i + 1, images.length, `OCR ${i + 1}/${images.length}`);
|
|
11493
11481
|
logStage("debug", "ocr", "progress", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { page: i + 1, total: images.length });
|
|
11494
11482
|
}
|
|
11495
|
-
timingsMs.ocr =
|
|
11483
|
+
timingsMs.ocr = elapsedMs(ocrStart);
|
|
11496
11484
|
markStageDone("ocr", "OCR \uC644\uB8CC");
|
|
11497
11485
|
logStage("info", "ocr", "done", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { elapsedMs: timingsMs.ocr });
|
|
11498
|
-
const proofStart =
|
|
11486
|
+
const proofStart = performance.now();
|
|
11499
11487
|
currentStage = "proofread";
|
|
11500
11488
|
markStageStart("proofread", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC9C4\uD589 \uC911");
|
|
11501
11489
|
logStage("info", "proofread", "start", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC2DC\uC791", { pages: rawPagePaths.length });
|
|
@@ -11531,16 +11519,16 @@ ${rawMd}
|
|
|
11531
11519
|
markStageProgress("proofread", Math.round((i + 1) / rawPagePaths.length * 100), i + 1, rawPagePaths.length, `\uAD50\uC815 ${i + 1}/${rawPagePaths.length}`);
|
|
11532
11520
|
logStage("debug", "proofread", "progress", "\uD398\uC774\uC9C0 \uAD50\uC815 \uC644\uB8CC", { page: i + 1, total: rawPagePaths.length });
|
|
11533
11521
|
}
|
|
11534
|
-
timingsMs.proofread =
|
|
11522
|
+
timingsMs.proofread = elapsedMs(proofStart);
|
|
11535
11523
|
markStageDone("proofread", "\uAD50\uC815 \uC644\uB8CC");
|
|
11536
11524
|
logStage("info", "proofread", "done", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC644\uB8CC", { elapsedMs: timingsMs.proofread });
|
|
11537
|
-
const mergeStart =
|
|
11525
|
+
const mergeStart = performance.now();
|
|
11538
11526
|
currentStage = "merge";
|
|
11539
11527
|
markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
|
|
11540
11528
|
logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: proofedPaths.length });
|
|
11541
11529
|
const merged = await mergeMarkdownPages(proofedPaths);
|
|
11542
11530
|
await writeFile(outputPath, merged, "utf-8");
|
|
11543
|
-
timingsMs.merge =
|
|
11531
|
+
timingsMs.merge = elapsedMs(mergeStart);
|
|
11544
11532
|
markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
|
|
11545
11533
|
logStage("info", "merge", "done", "\uCD5C\uC885 \uBCD1\uD569 \uC644\uB8CC", { outputPath, elapsedMs: timingsMs.merge });
|
|
11546
11534
|
const report = {
|
|
@@ -11549,7 +11537,7 @@ ${rawMd}
|
|
|
11549
11537
|
workspaceDir,
|
|
11550
11538
|
selectedModel,
|
|
11551
11539
|
probeImage,
|
|
11552
|
-
probeResults,
|
|
11540
|
+
probeResults: await probeResultsPromise,
|
|
11553
11541
|
pageCount: images.length,
|
|
11554
11542
|
keyHealth: keyPool.snapshot(),
|
|
11555
11543
|
timingsMs,
|
|
@@ -11690,9 +11678,56 @@ async function mapWithConcurrency(items, concurrency, mapper) {
|
|
|
11690
11678
|
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
11691
11679
|
return results;
|
|
11692
11680
|
}
|
|
11693
|
-
function
|
|
11694
|
-
|
|
11695
|
-
|
|
11681
|
+
function startParallelProbeRuns(input) {
|
|
11682
|
+
let firstResolved = false;
|
|
11683
|
+
let doneCount = 0;
|
|
11684
|
+
let resolveFirst;
|
|
11685
|
+
let rejectFirst;
|
|
11686
|
+
const firstSuccess = new Promise((resolve4, reject) => {
|
|
11687
|
+
resolveFirst = resolve4;
|
|
11688
|
+
rejectFirst = reject;
|
|
11689
|
+
});
|
|
11690
|
+
let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
|
|
11691
|
+
const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
|
|
11692
|
+
const t0 = performance.now();
|
|
11693
|
+
try {
|
|
11694
|
+
await ocrImageViaNim({
|
|
11695
|
+
imagePath: input.probeImage,
|
|
11696
|
+
prompt: OCR_PROMPT2,
|
|
11697
|
+
model,
|
|
11698
|
+
maxTokens: input.modelMaxTokens[model] ?? 8192,
|
|
11699
|
+
baseUrl: input.baseUrl,
|
|
11700
|
+
keyPool: input.keyPool,
|
|
11701
|
+
timeoutMs: input.timeoutMs,
|
|
11702
|
+
maxRetries: 2,
|
|
11703
|
+
logger: input.logger,
|
|
11704
|
+
stage: "probe"
|
|
11705
|
+
});
|
|
11706
|
+
const result = { model, durationMs: elapsedMs(t0), success: true };
|
|
11707
|
+
input.onProbeResult?.({ index, model, result });
|
|
11708
|
+
if (!firstResolved) {
|
|
11709
|
+
firstResolved = true;
|
|
11710
|
+
resolveFirst?.({ selectedModel: model, firstDurationMs: result.durationMs });
|
|
11711
|
+
}
|
|
11712
|
+
return result;
|
|
11713
|
+
} catch (err) {
|
|
11714
|
+
const result = {
|
|
11715
|
+
model,
|
|
11716
|
+
durationMs: elapsedMs(t0),
|
|
11717
|
+
success: false,
|
|
11718
|
+
error: err instanceof Error ? err.message : String(err)
|
|
11719
|
+
};
|
|
11720
|
+
lastErr = result.error ?? lastErr;
|
|
11721
|
+
input.onProbeResult?.({ index, model, result });
|
|
11722
|
+
return result;
|
|
11723
|
+
} finally {
|
|
11724
|
+
doneCount += 1;
|
|
11725
|
+
if (doneCount === input.models.length && !firstResolved) {
|
|
11726
|
+
rejectFirst?.(new UnifiedOcrError("PROBE_FAILED", "probe", `\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: ${lastErr}`));
|
|
11727
|
+
}
|
|
11728
|
+
}
|
|
11729
|
+
});
|
|
11730
|
+
return { firstSuccess, allResults };
|
|
11696
11731
|
}
|
|
11697
11732
|
async function loadModelCache(path) {
|
|
11698
11733
|
try {
|