@clazic/kordoc 2.4.12 → 2.4.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-KJEZPVEK.js → chunk-6CADPLGJ.js} +2 -2
- package/dist/{chunk-KJEZPVEK.js.map → chunk-6CADPLGJ.js.map} +1 -1
- package/dist/{chunk-5R37N6KE.js → chunk-X6NIA6BK.js} +2 -2
- package/dist/cli.js +5 -5
- package/dist/index.cjs +92 -39
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +92 -39
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-XLLXVB7V.js → utils-EK3CPEZG.js} +2 -2
- package/dist/{watch-SOMS2KR7.js → watch-NSWBVKQZ.js} +3 -3
- package/package.json +1 -1
- /package/dist/{chunk-5R37N6KE.js.map → chunk-X6NIA6BK.js.map} +0 -0
- /package/dist/{utils-XLLXVB7V.js.map → utils-EK3CPEZG.js.map} +0 -0
- /package/dist/{watch-SOMS2KR7.js.map → watch-NSWBVKQZ.js.map} +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/utils.ts
|
|
4
|
-
var VERSION = true ? "2.4.
|
|
4
|
+
var VERSION = true ? "2.4.13" : "0.0.0-dev";
|
|
5
5
|
function toArrayBuffer(buf) {
|
|
6
6
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
7
7
|
return buf.buffer;
|
|
@@ -105,4 +105,4 @@ export {
|
|
|
105
105
|
classifyError,
|
|
106
106
|
normalizeKordocError
|
|
107
107
|
};
|
|
108
|
-
//# sourceMappingURL=chunk-
|
|
108
|
+
//# sourceMappingURL=chunk-X6NIA6BK.js.map
|
package/dist/cli.js
CHANGED
|
@@ -4,12 +4,12 @@ import {
|
|
|
4
4
|
markdownToHwpx,
|
|
5
5
|
markdownToXlsx,
|
|
6
6
|
parse
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-6CADPLGJ.js";
|
|
8
8
|
import "./chunk-YW5G6BCJ.js";
|
|
9
9
|
import {
|
|
10
10
|
VERSION,
|
|
11
11
|
toArrayBuffer
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-X6NIA6BK.js";
|
|
13
13
|
import "./chunk-MOL7MDBG.js";
|
|
14
14
|
import "./chunk-7FMKAV4P.js";
|
|
15
15
|
import "./chunk-34WIGIQC.js";
|
|
@@ -177,7 +177,7 @@ async function runParse(files, opts) {
|
|
|
177
177
|
saveImages(absPath);
|
|
178
178
|
}
|
|
179
179
|
} catch (err) {
|
|
180
|
-
const { sanitizeError } = await import("./utils-
|
|
180
|
+
const { sanitizeError } = await import("./utils-EK3CPEZG.js");
|
|
181
181
|
process.stderr.write(`
|
|
182
182
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
183
183
|
`);
|
|
@@ -259,7 +259,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
259
259
|
`));
|
|
260
260
|
}
|
|
261
261
|
} catch (err) {
|
|
262
|
-
const { sanitizeError } = await import("./utils-
|
|
262
|
+
const { sanitizeError } = await import("./utils-EK3CPEZG.js");
|
|
263
263
|
process.stderr.write(` FAIL
|
|
264
264
|
`);
|
|
265
265
|
process.stderr.write(` \u2192 ${sanitizeError(err)}
|
|
@@ -291,7 +291,7 @@ program.command("init-env").description("kordoc\uC6A9 .env \uD15C\uD50C\uB9BF \u
|
|
|
291
291
|
}
|
|
292
292
|
});
|
|
293
293
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
294
|
-
const { watchDirectory } = await import("./watch-
|
|
294
|
+
const { watchDirectory } = await import("./watch-NSWBVKQZ.js");
|
|
295
295
|
await watchDirectory({
|
|
296
296
|
dir,
|
|
297
297
|
outDir: opts.outDir,
|
package/dist/index.cjs
CHANGED
|
@@ -3138,7 +3138,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
3138
3138
|
var import_xmldom = require("@xmldom/xmldom");
|
|
3139
3139
|
|
|
3140
3140
|
// src/utils.ts
|
|
3141
|
-
var VERSION = true ? "2.4.
|
|
3141
|
+
var VERSION = true ? "2.4.13" : "0.0.0-dev";
|
|
3142
3142
|
function toArrayBuffer(buf) {
|
|
3143
3143
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3144
3144
|
return buf.buffer;
|
|
@@ -11396,6 +11396,7 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11396
11396
|
const maxRetriesPerPage = options.maxRetriesPerPage ?? 5;
|
|
11397
11397
|
const dpi = options.dpi ?? 300;
|
|
11398
11398
|
const modelsInput = options.modelCandidates?.length ? options.modelCandidates : DEFAULT_MODELS;
|
|
11399
|
+
const probeConcurrency = Math.max(1, Math.floor(options.probeConcurrency ?? Math.min(3, modelsInput.length)));
|
|
11399
11400
|
const modelCache = await loadModelCache(modelCachePath);
|
|
11400
11401
|
const models = sortModelsByCache(modelsInput, modelCache);
|
|
11401
11402
|
const modelMaxTokens = { ...DEFAULT_MODEL_MAX_TOKENS, ...options.modelMaxTokens ?? {} };
|
|
@@ -11446,44 +11447,34 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11446
11447
|
const probeStart = Date.now();
|
|
11447
11448
|
currentStage = "probe";
|
|
11448
11449
|
markStageStart("probe", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC218\uD589 \uC911");
|
|
11449
|
-
logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models });
|
|
11450
|
+
logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
|
|
11450
11451
|
const probeImage = await pickRepresentativeImage(images);
|
|
11451
|
-
|
|
11452
|
-
|
|
11453
|
-
|
|
11454
|
-
|
|
11455
|
-
|
|
11456
|
-
|
|
11457
|
-
|
|
11458
|
-
|
|
11459
|
-
|
|
11460
|
-
|
|
11461
|
-
|
|
11462
|
-
|
|
11463
|
-
|
|
11464
|
-
|
|
11465
|
-
logger,
|
|
11466
|
-
stage: "probe"
|
|
11467
|
-
});
|
|
11468
|
-
probeResults.push({ model, durationMs: Date.now() - t0, success: true });
|
|
11469
|
-
} catch (err) {
|
|
11470
|
-
probeResults.push({
|
|
11471
|
-
model,
|
|
11472
|
-
durationMs: Date.now() - t0,
|
|
11473
|
-
success: false,
|
|
11474
|
-
error: err instanceof Error ? err.message : String(err)
|
|
11475
|
-
});
|
|
11452
|
+
let probeDone = 0;
|
|
11453
|
+
const probeRuns = startParallelProbeRuns({
|
|
11454
|
+
models,
|
|
11455
|
+
probeConcurrency,
|
|
11456
|
+
probeImage,
|
|
11457
|
+
modelMaxTokens,
|
|
11458
|
+
baseUrl,
|
|
11459
|
+
keyPool,
|
|
11460
|
+
timeoutMs,
|
|
11461
|
+
logger,
|
|
11462
|
+
onProbeResult: ({ index, model, result }) => {
|
|
11463
|
+
probeDone += 1;
|
|
11464
|
+
markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
|
|
11465
|
+
logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
|
|
11476
11466
|
}
|
|
11477
|
-
|
|
11478
|
-
|
|
11479
|
-
|
|
11480
|
-
const
|
|
11481
|
-
if (!selectedModel) throw new UnifiedOcrError("PROBE_FAILED", "probe", "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.");
|
|
11482
|
-
const fallbackModelOrder = probeResults.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs).map((r) => r.model);
|
|
11467
|
+
});
|
|
11468
|
+
const selected = await probeRuns.firstSuccess;
|
|
11469
|
+
const selectedModel = selected.selectedModel;
|
|
11470
|
+
const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
|
|
11483
11471
|
timingsMs.probe = Date.now() - probeStart;
|
|
11484
|
-
await updateModelCache(modelCachePath, probeResults);
|
|
11485
11472
|
markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
|
|
11486
|
-
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC", { selectedModel,
|
|
11473
|
+
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
|
|
11474
|
+
const probeResultsPromise = probeRuns.allResults.then(async (results) => {
|
|
11475
|
+
await updateModelCache(modelCachePath, results);
|
|
11476
|
+
return results;
|
|
11477
|
+
});
|
|
11487
11478
|
const ocrStart = Date.now();
|
|
11488
11479
|
currentStage = "ocr";
|
|
11489
11480
|
markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
|
|
@@ -11565,7 +11556,7 @@ ${rawMd}
|
|
|
11565
11556
|
workspaceDir,
|
|
11566
11557
|
selectedModel,
|
|
11567
11558
|
probeImage,
|
|
11568
|
-
probeResults,
|
|
11559
|
+
probeResults: await probeResultsPromise,
|
|
11569
11560
|
pageCount: images.length,
|
|
11570
11561
|
keyHealth: keyPool.snapshot(),
|
|
11571
11562
|
timingsMs,
|
|
@@ -11691,9 +11682,71 @@ async function pickRepresentativeImage(images) {
|
|
|
11691
11682
|
use.sort((a, b) => a.size - b.size);
|
|
11692
11683
|
return use[Math.floor(use.length / 2)].path;
|
|
11693
11684
|
}
|
|
11694
|
-
function
|
|
11695
|
-
const
|
|
11696
|
-
|
|
11685
|
+
async function mapWithConcurrency(items, concurrency, mapper) {
|
|
11686
|
+
const results = new Array(items.length);
|
|
11687
|
+
let nextIndex = 0;
|
|
11688
|
+
async function worker() {
|
|
11689
|
+
while (true) {
|
|
11690
|
+
const idx = nextIndex;
|
|
11691
|
+
if (idx >= items.length) return;
|
|
11692
|
+
nextIndex += 1;
|
|
11693
|
+
results[idx] = await mapper(items[idx], idx);
|
|
11694
|
+
}
|
|
11695
|
+
}
|
|
11696
|
+
const workerCount = Math.max(1, Math.min(concurrency, items.length));
|
|
11697
|
+
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
11698
|
+
return results;
|
|
11699
|
+
}
|
|
11700
|
+
function startParallelProbeRuns(input) {
|
|
11701
|
+
let firstResolved = false;
|
|
11702
|
+
let doneCount = 0;
|
|
11703
|
+
let resolveFirst;
|
|
11704
|
+
let rejectFirst;
|
|
11705
|
+
const firstSuccess = new Promise((resolve4, reject) => {
|
|
11706
|
+
resolveFirst = resolve4;
|
|
11707
|
+
rejectFirst = reject;
|
|
11708
|
+
});
|
|
11709
|
+
let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
|
|
11710
|
+
const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
|
|
11711
|
+
const t0 = Date.now();
|
|
11712
|
+
try {
|
|
11713
|
+
await ocrImageViaNim({
|
|
11714
|
+
imagePath: input.probeImage,
|
|
11715
|
+
prompt: OCR_PROMPT2,
|
|
11716
|
+
model,
|
|
11717
|
+
maxTokens: input.modelMaxTokens[model] ?? 8192,
|
|
11718
|
+
baseUrl: input.baseUrl,
|
|
11719
|
+
keyPool: input.keyPool,
|
|
11720
|
+
timeoutMs: input.timeoutMs,
|
|
11721
|
+
maxRetries: 2,
|
|
11722
|
+
logger: input.logger,
|
|
11723
|
+
stage: "probe"
|
|
11724
|
+
});
|
|
11725
|
+
const result = { model, durationMs: Date.now() - t0, success: true };
|
|
11726
|
+
input.onProbeResult?.({ index, model, result });
|
|
11727
|
+
if (!firstResolved) {
|
|
11728
|
+
firstResolved = true;
|
|
11729
|
+
resolveFirst?.({ selectedModel: model, firstDurationMs: result.durationMs });
|
|
11730
|
+
}
|
|
11731
|
+
return result;
|
|
11732
|
+
} catch (err) {
|
|
11733
|
+
const result = {
|
|
11734
|
+
model,
|
|
11735
|
+
durationMs: Date.now() - t0,
|
|
11736
|
+
success: false,
|
|
11737
|
+
error: err instanceof Error ? err.message : String(err)
|
|
11738
|
+
};
|
|
11739
|
+
lastErr = result.error ?? lastErr;
|
|
11740
|
+
input.onProbeResult?.({ index, model, result });
|
|
11741
|
+
return result;
|
|
11742
|
+
} finally {
|
|
11743
|
+
doneCount += 1;
|
|
11744
|
+
if (doneCount === input.models.length && !firstResolved) {
|
|
11745
|
+
rejectFirst?.(new UnifiedOcrError("PROBE_FAILED", "probe", `\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: ${lastErr}`));
|
|
11746
|
+
}
|
|
11747
|
+
}
|
|
11748
|
+
});
|
|
11749
|
+
return { firstSuccess, allResults };
|
|
11697
11750
|
}
|
|
11698
11751
|
async function loadModelCache(path) {
|
|
11699
11752
|
try {
|