@clazic/kordoc 2.4.12 → 2.4.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-KJEZPVEK.js → chunk-6CADPLGJ.js} +2 -2
- package/dist/{chunk-KJEZPVEK.js.map → chunk-6CADPLGJ.js.map} +1 -1
- package/dist/{chunk-5R37N6KE.js → chunk-X6NIA6BK.js} +2 -2
- package/dist/cli.js +5 -5
- package/dist/index.cjs +92 -39
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +92 -39
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-XLLXVB7V.js → utils-EK3CPEZG.js} +2 -2
- package/dist/{watch-SOMS2KR7.js → watch-NSWBVKQZ.js} +3 -3
- package/package.json +1 -1
- /package/dist/{chunk-5R37N6KE.js.map → chunk-X6NIA6BK.js.map} +0 -0
- /package/dist/{utils-XLLXVB7V.js.map → utils-EK3CPEZG.js.map} +0 -0
- /package/dist/{watch-SOMS2KR7.js.map → watch-NSWBVKQZ.js.map} +0 -0
package/dist/index.d.cts
CHANGED
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -3115,7 +3115,7 @@ import JSZip2 from "jszip";
|
|
|
3115
3115
|
import { DOMParser } from "@xmldom/xmldom";
|
|
3116
3116
|
|
|
3117
3117
|
// src/utils.ts
|
|
3118
|
-
var VERSION = true ? "2.4.
|
|
3118
|
+
var VERSION = true ? "2.4.13" : "0.0.0-dev";
|
|
3119
3119
|
function toArrayBuffer(buf) {
|
|
3120
3120
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3121
3121
|
return buf.buffer;
|
|
@@ -11373,6 +11373,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11373
11373
|
const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
|
|
11374
11374
|
const dpi = options.dpi ?? 300;
|
|
11375
11375
|
const modelsInput = options.modelCandidates?.length ? options.modelCandidates : DEFAULT_MODELS;
|
|
11376
|
+
const probeConcurrency = Math.max(1, Math.floor(options.probeConcurrency ?? Math.min(3, modelsInput.length)));
|
|
11376
11377
|
const modelCache = await loadModelCache(modelCachePath);
|
|
11377
11378
|
const models = sortModelsByCache(modelsInput, modelCache);
|
|
11378
11379
|
const modelMaxTokens = { ...DEFAULT_MODEL_MAX_TOKENS, ...options.modelMaxTokens ?? {} };
|
|
@@ -11423,44 +11424,34 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11423
11424
|
const probeStart = Date.now();
|
|
11424
11425
|
currentStage = "probe";
|
|
11425
11426
|
markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
|
|
11426
|
-
logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models });
|
|
11427
|
+
logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
|
|
11427
11428
|
const probeImage = await pickRepresentativeImage(images);
|
|
11428
|
-
|
|
11429
|
-
|
|
11430
|
-
|
|
11431
|
-
|
|
11432
|
-
|
|
11433
|
-
|
|
11434
|
-
|
|
11435
|
-
|
|
11436
|
-
|
|
11437
|
-
|
|
11438
|
-
|
|
11439
|
-
|
|
11440
|
-
|
|
11441
|
-
|
|
11442
|
-
logger,
|
|
11443
|
-
stage: "probe"
|
|
11444
|
-
});
|
|
11445
|
-
probeResults.push({ model, durationMs: Date.now() - t0, success: true });
|
|
11446
|
-
} catch (err) {
|
|
11447
|
-
probeResults.push({
|
|
11448
|
-
model,
|
|
11449
|
-
durationMs: Date.now() - t0,
|
|
11450
|
-
success: false,
|
|
11451
|
-
error: err instanceof Error ? err.message : String(err)
|
|
11452
|
-
});
|
|
11429
|
+
let probeDone = 0;
|
|
11430
|
+
const probeRuns = startParallelProbeRuns({
|
|
11431
|
+
models,
|
|
11432
|
+
probeConcurrency,
|
|
11433
|
+
probeImage,
|
|
11434
|
+
modelMaxTokens,
|
|
11435
|
+
baseUrl,
|
|
11436
|
+
keyPool,
|
|
11437
|
+
timeoutMs,
|
|
11438
|
+
logger,
|
|
11439
|
+
onProbeResult: ({ index, model, result }) => {
|
|
11440
|
+
probeDone += 1;
|
|
11441
|
+
markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
|
|
11442
|
+
logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
|
|
11453
11443
|
}
|
|
11454
|
-
|
|
11455
|
-
|
|
11456
|
-
|
|
11457
|
-
const
|
|
11458
|
-
if (!selectedModel) throw new UnifiedOcrError("PROBE_FAILED", "probe", "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
11459
|
-
const fallbackModelOrder = probeResults.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs).map((r) => r.model);
|
|
11444
|
+
});
|
|
11445
|
+
const selected = await probeRuns.firstSuccess;
|
|
11446
|
+
const selectedModel = selected.selectedModel;
|
|
11447
|
+
const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
|
|
11460
11448
|
timingsMs.probe = Date.now() - probeStart;
|
|
11461
|
-
await updateModelCache(modelCachePath, probeResults);
|
|
11462
11449
|
markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
|
|
11463
|
-
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC", { selectedModel,
|
|
11450
|
+
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
|
|
11451
|
+
const probeResultsPromise = probeRuns.allResults.then(async (results) => {
|
|
11452
|
+
await updateModelCache(modelCachePath, results);
|
|
11453
|
+
return results;
|
|
11454
|
+
});
|
|
11464
11455
|
const ocrStart = Date.now();
|
|
11465
11456
|
currentStage = "ocr";
|
|
11466
11457
|
markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
|
|
@@ -11542,7 +11533,7 @@ ${rawMd}
|
|
|
11542
11533
|
workspaceDir,
|
|
11543
11534
|
selectedModel,
|
|
11544
11535
|
probeImage,
|
|
11545
|
-
probeResults,
|
|
11536
|
+
probeResults: await probeResultsPromise,
|
|
11546
11537
|
pageCount: images.length,
|
|
11547
11538
|
keyHealth: keyPool.snapshot(),
|
|
11548
11539
|
timingsMs,
|
|
@@ -11668,9 +11659,71 @@ async function pickRepresentativeImage(images) {
|
|
|
11668
11659
|
use.sort((a, b) => a.size - b.size);
|
|
11669
11660
|
return use[Math.floor(use.length / 2)].path;
|
|
11670
11661
|
}
|
|
11671
|
-
function
|
|
11672
|
-
const
|
|
11673
|
-
|
|
11662
|
+
async function mapWithConcurrency(items, concurrency, mapper) {
|
|
11663
|
+
const results = new Array(items.length);
|
|
11664
|
+
let nextIndex = 0;
|
|
11665
|
+
async function worker() {
|
|
11666
|
+
while (true) {
|
|
11667
|
+
const idx = nextIndex;
|
|
11668
|
+
if (idx >= items.length) return;
|
|
11669
|
+
nextIndex += 1;
|
|
11670
|
+
results[idx] = await mapper(items[idx], idx);
|
|
11671
|
+
}
|
|
11672
|
+
}
|
|
11673
|
+
const workerCount = Math.max(1, Math.min(concurrency, items.length));
|
|
11674
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
11675
|
+
return results;
|
|
11676
|
+
}
|
|
11677
|
+
function startParallelProbeRuns(input) {
|
|
11678
|
+
let firstResolved = false;
|
|
11679
|
+
let doneCount = 0;
|
|
11680
|
+
let resolveFirst;
|
|
11681
|
+
let rejectFirst;
|
|
11682
|
+
const firstSuccess = new Promise((resolve4, reject) => {
|
|
11683
|
+
resolveFirst = resolve4;
|
|
11684
|
+
rejectFirst = reject;
|
|
11685
|
+
});
|
|
11686
|
+
let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
|
|
11687
|
+
const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
|
|
11688
|
+
const t0 = Date.now();
|
|
11689
|
+
try {
|
|
11690
|
+
await ocrImageViaNim({
|
|
11691
|
+
imagePath: input.probeImage,
|
|
11692
|
+
prompt: OCR_PROMPT2,
|
|
11693
|
+
model,
|
|
11694
|
+
maxTokens: input.modelMaxTokens[model] ?? 8192,
|
|
11695
|
+
baseUrl: input.baseUrl,
|
|
11696
|
+
keyPool: input.keyPool,
|
|
11697
|
+
timeoutMs: input.timeoutMs,
|
|
11698
|
+
maxRetries: 2,
|
|
11699
|
+
logger: input.logger,
|
|
11700
|
+
stage: "probe"
|
|
11701
|
+
});
|
|
11702
|
+
const result = { model, durationMs: Date.now() - t0, success: true };
|
|
11703
|
+
input.onProbeResult?.({ index, model, result });
|
|
11704
|
+
if (!firstResolved) {
|
|
11705
|
+
firstResolved = true;
|
|
11706
|
+
resolveFirst?.({ selectedModel: model, firstDurationMs: result.durationMs });
|
|
11707
|
+
}
|
|
11708
|
+
return result;
|
|
11709
|
+
} catch (err) {
|
|
11710
|
+
const result = {
|
|
11711
|
+
model,
|
|
11712
|
+
durationMs: Date.now() - t0,
|
|
11713
|
+
success: false,
|
|
11714
|
+
error: err instanceof Error ? err.message : String(err)
|
|
11715
|
+
};
|
|
11716
|
+
lastErr = result.error ?? lastErr;
|
|
11717
|
+
input.onProbeResult?.({ index, model, result });
|
|
11718
|
+
return result;
|
|
11719
|
+
} finally {
|
|
11720
|
+
doneCount += 1;
|
|
11721
|
+
if (doneCount === input.models.length && !firstResolved) {
|
|
11722
|
+
rejectFirst?.(new UnifiedOcrError("PROBE_FAILED", "probe", `\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: ${lastErr}`));
|
|
11723
|
+
}
|
|
11724
|
+
}
|
|
11725
|
+
});
|
|
11726
|
+
return { firstSuccess, allResults };
|
|
11674
11727
|
}
|
|
11675
11728
|
async function loadModelCache(path) {
|
|
11676
11729
|
try {
|