@clazic/kordoc 2.4.13 → 2.4.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-UX75CBUO.js → chunk-6CADPLGJ.js} +2 -2
- package/dist/{chunk-UX75CBUO.js.map → chunk-6CADPLGJ.js.map} +1 -1
- package/dist/{chunk-5R37N6KE.js → chunk-X6NIA6BK.js} +2 -2
- package/dist/cli.js +5 -5
- package/dist/index.cjs +70 -39
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +70 -39
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-XLLXVB7V.js → utils-EK3CPEZG.js} +2 -2
- package/dist/{watch-3MTAXFEA.js → watch-NSWBVKQZ.js} +3 -3
- package/package.json +1 -1
- /package/dist/{chunk-5R37N6KE.js.map → chunk-X6NIA6BK.js.map} +0 -0
- /package/dist/{utils-XLLXVB7V.js.map → utils-EK3CPEZG.js.map} +0 -0
- /package/dist/{watch-3MTAXFEA.js.map → watch-NSWBVKQZ.js.map} +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/utils.ts
|
|
4
|
-
var VERSION = true ? "2.4.
|
|
4
|
+
var VERSION = true ? "2.4.13" : "0.0.0-dev";
|
|
5
5
|
function toArrayBuffer(buf) {
|
|
6
6
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
7
7
|
return buf.buffer;
|
|
@@ -105,4 +105,4 @@ export {
|
|
|
105
105
|
classifyError,
|
|
106
106
|
normalizeKordocError
|
|
107
107
|
};
|
|
108
|
-
//# sourceMappingURL=chunk-
|
|
108
|
+
//# sourceMappingURL=chunk-X6NIA6BK.js.map
|
package/dist/cli.js
CHANGED
|
@@ -4,12 +4,12 @@ import {
|
|
|
4
4
|
markdownToHwpx,
|
|
5
5
|
markdownToXlsx,
|
|
6
6
|
parse
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-6CADPLGJ.js";
|
|
8
8
|
import "./chunk-YW5G6BCJ.js";
|
|
9
9
|
import {
|
|
10
10
|
VERSION,
|
|
11
11
|
toArrayBuffer
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-X6NIA6BK.js";
|
|
13
13
|
import "./chunk-MOL7MDBG.js";
|
|
14
14
|
import "./chunk-7FMKAV4P.js";
|
|
15
15
|
import "./chunk-34WIGIQC.js";
|
|
@@ -177,7 +177,7 @@ async function runParse(files, opts) {
|
|
|
177
177
|
saveImages(absPath);
|
|
178
178
|
}
|
|
179
179
|
} catch (err) {
|
|
180
|
-
const { sanitizeError } = await import("./utils-
|
|
180
|
+
const { sanitizeError } = await import("./utils-EK3CPEZG.js");
|
|
181
181
|
process.stderr.write(`
|
|
182
182
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
183
183
|
`);
|
|
@@ -259,7 +259,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
259
259
|
`));
|
|
260
260
|
}
|
|
261
261
|
} catch (err) {
|
|
262
|
-
const { sanitizeError } = await import("./utils-
|
|
262
|
+
const { sanitizeError } = await import("./utils-EK3CPEZG.js");
|
|
263
263
|
process.stderr.write(` FAIL
|
|
264
264
|
`);
|
|
265
265
|
process.stderr.write(` \u2192 ${sanitizeError(err)}
|
|
@@ -291,7 +291,7 @@ program.command("init-env").description("kordoc\uC6A9 .env \uD15C\uD50C\uB9BF \u
|
|
|
291
291
|
}
|
|
292
292
|
});
|
|
293
293
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
294
|
-
const { watchDirectory } = await import("./watch-
|
|
294
|
+
const { watchDirectory } = await import("./watch-NSWBVKQZ.js");
|
|
295
295
|
await watchDirectory({
|
|
296
296
|
dir,
|
|
297
297
|
outDir: opts.outDir,
|
package/dist/index.cjs
CHANGED
|
@@ -3138,7 +3138,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
3138
3138
|
var import_xmldom = require("@xmldom/xmldom");
|
|
3139
3139
|
|
|
3140
3140
|
// src/utils.ts
|
|
3141
|
-
var VERSION = true ? "2.4.
|
|
3141
|
+
var VERSION = true ? "2.4.13" : "0.0.0-dev";
|
|
3142
3142
|
function toArrayBuffer(buf) {
|
|
3143
3143
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3144
3144
|
return buf.buffer;
|
|
@@ -11450,47 +11450,31 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11450
11450
|
logStage("info", "probe", "start", "\uBAA8\uB378 \uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2DC\uC791", { models, probeConcurrency });
|
|
11451
11451
|
const probeImage = await pickRepresentativeImage(images);
|
|
11452
11452
|
let probeDone = 0;
|
|
11453
|
-
const
|
|
11454
|
-
|
|
11455
|
-
|
|
11456
|
-
|
|
11457
|
-
|
|
11458
|
-
|
|
11459
|
-
|
|
11460
|
-
|
|
11461
|
-
|
|
11462
|
-
|
|
11463
|
-
timeoutMs,
|
|
11464
|
-
maxRetries: 2,
|
|
11465
|
-
logger,
|
|
11466
|
-
stage: "probe"
|
|
11467
|
-
});
|
|
11468
|
-
const result = { model, durationMs: Date.now() - t0, success: true };
|
|
11469
|
-
probeDone += 1;
|
|
11470
|
-
markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
|
|
11471
|
-
logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
|
|
11472
|
-
return result;
|
|
11473
|
-
} catch (err) {
|
|
11474
|
-
const result = {
|
|
11475
|
-
model,
|
|
11476
|
-
durationMs: Date.now() - t0,
|
|
11477
|
-
success: false,
|
|
11478
|
-
error: err instanceof Error ? err.message : String(err)
|
|
11479
|
-
};
|
|
11453
|
+
const probeRuns = startParallelProbeRuns({
|
|
11454
|
+
models,
|
|
11455
|
+
probeConcurrency,
|
|
11456
|
+
probeImage,
|
|
11457
|
+
modelMaxTokens,
|
|
11458
|
+
baseUrl,
|
|
11459
|
+
keyPool,
|
|
11460
|
+
timeoutMs,
|
|
11461
|
+
logger,
|
|
11462
|
+
onProbeResult: ({ index, model, result }) => {
|
|
11480
11463
|
probeDone += 1;
|
|
11481
11464
|
markStageProgress("probe", Math.round(probeDone / models.length * 100), probeDone, models.length, `\uBAA8\uB378 \uD504\uB85C\uBE0C ${probeDone}/${models.length}`);
|
|
11482
11465
|
logStage("debug", "probe", "progress", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC9C4\uD589", { index: index + 1, total: models.length, model, result });
|
|
11483
|
-
return result;
|
|
11484
11466
|
}
|
|
11485
11467
|
});
|
|
11486
|
-
const
|
|
11487
|
-
const selectedModel =
|
|
11488
|
-
|
|
11489
|
-
const fallbackModelOrder = probeResults.filter((r) => r.success).sort((a, b) => a.durationMs - b.durationMs).map((r) => r.model);
|
|
11468
|
+
const selected = await probeRuns.firstSuccess;
|
|
11469
|
+
const selectedModel = selected.selectedModel;
|
|
11470
|
+
const fallbackModelOrder = [selectedModel, ...models.filter((model) => model !== selectedModel)];
|
|
11490
11471
|
timingsMs.probe = Date.now() - probeStart;
|
|
11491
|
-
await updateModelCache(modelCachePath, probeResults);
|
|
11492
11472
|
markStageDone("probe", `\uD504\uB85C\uBE0C \uC644\uB8CC: ${selectedModel}`);
|
|
11493
|
-
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC", { selectedModel,
|
|
11473
|
+
logStage("info", "probe", "done", "\uBAA8\uB378 \uD504\uB85C\uBE0C \uC644\uB8CC(\uCCAB \uC131\uACF5 \uBAA8\uB378 \uC6B0\uC120)", { selectedModel, firstDurationMs: selected.firstDurationMs, elapsedMs: timingsMs.probe });
|
|
11474
|
+
const probeResultsPromise = probeRuns.allResults.then(async (results) => {
|
|
11475
|
+
await updateModelCache(modelCachePath, results);
|
|
11476
|
+
return results;
|
|
11477
|
+
});
|
|
11494
11478
|
const ocrStart = Date.now();
|
|
11495
11479
|
currentStage = "ocr";
|
|
11496
11480
|
markStageStart("ocr", `OCR \uC9C4\uD589 \uC911 (${selectedModel})`);
|
|
@@ -11572,7 +11556,7 @@ ${rawMd}
|
|
|
11572
11556
|
workspaceDir,
|
|
11573
11557
|
selectedModel,
|
|
11574
11558
|
probeImage,
|
|
11575
|
-
probeResults,
|
|
11559
|
+
probeResults: await probeResultsPromise,
|
|
11576
11560
|
pageCount: images.length,
|
|
11577
11561
|
keyHealth: keyPool.snapshot(),
|
|
11578
11562
|
timingsMs,
|
|
@@ -11713,9 +11697,56 @@ async function mapWithConcurrency(items, concurrency, mapper) {
|
|
|
11713
11697
|
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
|
11714
11698
|
return results;
|
|
11715
11699
|
}
|
|
11716
|
-
function
|
|
11717
|
-
|
|
11718
|
-
|
|
11700
|
+
function startParallelProbeRuns(input) {
|
|
11701
|
+
let firstResolved = false;
|
|
11702
|
+
let doneCount = 0;
|
|
11703
|
+
let resolveFirst;
|
|
11704
|
+
let rejectFirst;
|
|
11705
|
+
const firstSuccess = new Promise((resolve4, reject) => {
|
|
11706
|
+
resolveFirst = resolve4;
|
|
11707
|
+
rejectFirst = reject;
|
|
11708
|
+
});
|
|
11709
|
+
let lastErr = "\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: \uC0AC\uC6A9 \uAC00\uB2A5\uD55C OCR \uBAA8\uB378\uC774 \uC5C6\uC2B5\uB2C8\uB2E4.";
|
|
11710
|
+
const allResults = mapWithConcurrency(input.models, input.probeConcurrency, async (model, index) => {
|
|
11711
|
+
const t0 = Date.now();
|
|
11712
|
+
try {
|
|
11713
|
+
await ocrImageViaNim({
|
|
11714
|
+
imagePath: input.probeImage,
|
|
11715
|
+
prompt: OCR_PROMPT2,
|
|
11716
|
+
model,
|
|
11717
|
+
maxTokens: input.modelMaxTokens[model] ?? 8192,
|
|
11718
|
+
baseUrl: input.baseUrl,
|
|
11719
|
+
keyPool: input.keyPool,
|
|
11720
|
+
timeoutMs: input.timeoutMs,
|
|
11721
|
+
maxRetries: 2,
|
|
11722
|
+
logger: input.logger,
|
|
11723
|
+
stage: "probe"
|
|
11724
|
+
});
|
|
11725
|
+
const result = { model, durationMs: Date.now() - t0, success: true };
|
|
11726
|
+
input.onProbeResult?.({ index, model, result });
|
|
11727
|
+
if (!firstResolved) {
|
|
11728
|
+
firstResolved = true;
|
|
11729
|
+
resolveFirst?.({ selectedModel: model, firstDurationMs: result.durationMs });
|
|
11730
|
+
}
|
|
11731
|
+
return result;
|
|
11732
|
+
} catch (err) {
|
|
11733
|
+
const result = {
|
|
11734
|
+
model,
|
|
11735
|
+
durationMs: Date.now() - t0,
|
|
11736
|
+
success: false,
|
|
11737
|
+
error: err instanceof Error ? err.message : String(err)
|
|
11738
|
+
};
|
|
11739
|
+
lastErr = result.error ?? lastErr;
|
|
11740
|
+
input.onProbeResult?.({ index, model, result });
|
|
11741
|
+
return result;
|
|
11742
|
+
} finally {
|
|
11743
|
+
doneCount += 1;
|
|
11744
|
+
if (doneCount === input.models.length && !firstResolved) {
|
|
11745
|
+
rejectFirst?.(new UnifiedOcrError("PROBE_FAILED", "probe", `\uC18D\uB3C4 \uD504\uB85C\uBE0C \uC2E4\uD328: ${lastErr}`));
|
|
11746
|
+
}
|
|
11747
|
+
}
|
|
11748
|
+
});
|
|
11749
|
+
return { firstSuccess, allResults };
|
|
11719
11750
|
}
|
|
11720
11751
|
async function loadModelCache(path) {
|
|
11721
11752
|
try {
|