@clazic/kordoc 2.4.16 → 2.4.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-RH6IBTHH.js → chunk-T7EBS5XP.js} +32 -9
- package/dist/{chunk-RH6IBTHH.js.map → chunk-T7EBS5XP.js.map} +1 -1
- package/dist/{chunk-QR27D67R.js → chunk-W2KDIKDF.js} +2 -2
- package/dist/cli.js +5 -5
- package/dist/index.cjs +38 -90
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +38 -90
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +2 -2
- package/dist/{utils-HHJDSSR6.js → utils-DHOODYKU.js} +2 -2
- package/dist/{watch-YAILKKKP.js → watch-YGIU7RN7.js} +3 -3
- package/package.json +1 -1
- /package/dist/{chunk-QR27D67R.js.map → chunk-W2KDIKDF.js.map} +0 -0
- /package/dist/{utils-HHJDSSR6.js.map → utils-DHOODYKU.js.map} +0 -0
- /package/dist/{watch-YAILKKKP.js.map → watch-YGIU7RN7.js.map} +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/utils.ts
|
|
4
|
-
var VERSION = true ? "2.4.
|
|
4
|
+
var VERSION = true ? "2.4.17" : "0.0.0-dev";
|
|
5
5
|
function toArrayBuffer(buf) {
|
|
6
6
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
7
7
|
return buf.buffer;
|
|
@@ -105,4 +105,4 @@ export {
|
|
|
105
105
|
classifyError,
|
|
106
106
|
normalizeKordocError
|
|
107
107
|
};
|
|
108
|
-
//# sourceMappingURL=chunk-
|
|
108
|
+
//# sourceMappingURL=chunk-W2KDIKDF.js.map
|
package/dist/cli.js
CHANGED
|
@@ -4,12 +4,12 @@ import {
|
|
|
4
4
|
markdownToHwpx,
|
|
5
5
|
markdownToXlsx,
|
|
6
6
|
parse
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-T7EBS5XP.js";
|
|
8
8
|
import "./chunk-YW5G6BCJ.js";
|
|
9
9
|
import {
|
|
10
10
|
VERSION,
|
|
11
11
|
toArrayBuffer
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-W2KDIKDF.js";
|
|
13
13
|
import "./chunk-MOL7MDBG.js";
|
|
14
14
|
import "./chunk-7FMKAV4P.js";
|
|
15
15
|
import "./chunk-34WIGIQC.js";
|
|
@@ -177,7 +177,7 @@ async function runParse(files, opts) {
|
|
|
177
177
|
saveImages(absPath);
|
|
178
178
|
}
|
|
179
179
|
} catch (err) {
|
|
180
|
-
const { sanitizeError } = await import("./utils-
|
|
180
|
+
const { sanitizeError } = await import("./utils-DHOODYKU.js");
|
|
181
181
|
process.stderr.write(`
|
|
182
182
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
183
183
|
`);
|
|
@@ -259,7 +259,7 @@ program.command("convert <input>").description("\uB9C8\uD06C\uB2E4\uC6B4 \uD30C\
|
|
|
259
259
|
`));
|
|
260
260
|
}
|
|
261
261
|
} catch (err) {
|
|
262
|
-
const { sanitizeError } = await import("./utils-
|
|
262
|
+
const { sanitizeError } = await import("./utils-DHOODYKU.js");
|
|
263
263
|
process.stderr.write(` FAIL
|
|
264
264
|
`);
|
|
265
265
|
process.stderr.write(` \u2192 ${sanitizeError(err)}
|
|
@@ -291,7 +291,7 @@ program.command("init-env").description("kordoc\uC6A9 .env \uD15C\uD50C\uB9BF \u
|
|
|
291
291
|
}
|
|
292
292
|
});
|
|
293
293
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
294
|
-
const { watchDirectory } = await import("./watch-
|
|
294
|
+
const { watchDirectory } = await import("./watch-YGIU7RN7.js");
|
|
295
295
|
await watchDirectory({
|
|
296
296
|
dir,
|
|
297
297
|
outDir: opts.outDir,
|
package/dist/index.cjs
CHANGED
|
@@ -3138,7 +3138,7 @@ var import_jszip2 = __toESM(require("jszip"), 1);
|
|
|
3138
3138
|
var import_xmldom = require("@xmldom/xmldom");
|
|
3139
3139
|
|
|
3140
3140
|
// src/utils.ts
|
|
3141
|
-
var VERSION = true ? "2.4.
|
|
3141
|
+
var VERSION = true ? "2.4.17" : "0.0.0-dev";
|
|
3142
3142
|
function toArrayBuffer(buf) {
|
|
3143
3143
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
3144
3144
|
return buf.buffer;
|
|
@@ -11369,17 +11369,39 @@ var DEFAULT_STAGE_WEIGHTS = {
|
|
|
11369
11369
|
render: 20,
|
|
11370
11370
|
probe: 5,
|
|
11371
11371
|
ocr: 45,
|
|
11372
|
-
proofread:
|
|
11372
|
+
proofread: 0,
|
|
11373
11373
|
merge: 5
|
|
11374
11374
|
};
|
|
11375
|
-
var OCR_PROMPT2 =
|
|
11376
|
-
|
|
11377
|
-
"
|
|
11378
|
-
"\uADDC\uCE59:",
|
|
11379
|
-
"- \
|
|
11380
|
-
"- \
|
|
11381
|
-
"- \
|
|
11382
|
-
"- \
|
|
11375
|
+
var OCR_PROMPT2 = [
|
|
11376
|
+
"\uC774 PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0\uC5D0\uC11C \uD14D\uC2A4\uD2B8\uC640 \uD45C\uB97C \uCD94\uCD9C\uD558\uC5EC Markdown\uC73C\uB85C \uBCC0\uD658\uD558\uACE0, OCR \uC624\uC778\uC2DD \uC624\uB958\uB97C \uC989\uC2DC \uAD50\uC815\uD558\uC5EC \uCD5C\uC885 \uACB0\uACFC\uBB3C\uC744 \uCD9C\uB825\uD558\uC138\uC694.",
|
|
11377
|
+
"",
|
|
11378
|
+
"\uCD94\uCD9C \uADDC\uCE59:",
|
|
11379
|
+
"- \uD14D\uC2A4\uD2B8, \uD45C, \uC81C\uBAA9, \uB9AC\uC2A4\uD2B8\uB97C \uC6D0\uBB38 \uAD6C\uC870 \uADF8\uB300\uB85C Markdown\uC73C\uB85C \uBCC0\uD658",
|
|
11380
|
+
"- \uD45C\uB294 Markdown \uD14C\uC774\uBE14 \uBB38\uBC95 \uC0AC\uC6A9 (| \uAD6C\uBD84, |---|---| \uD5E4\uB354 \uAD6C\uBD84\uC120 \uD3EC\uD568)",
|
|
11381
|
+
"- \uD5E4\uB529\uC740 \uC2DC\uAC01\uC801 \uD06C\uAE30\uC5D0 \uB530\uB77C # ~ ###### \uC0AC\uC6A9",
|
|
11382
|
+
"- \uB9AC\uC2A4\uD2B8\uB294 - \uB610\uB294 1. \uC0AC\uC6A9",
|
|
11383
|
+
"- \uC774\uBBF8\uC9C0, \uB3C4\uD615 \uB4F1 \uBE44\uD14D\uC2A4\uD2B8 \uC694\uC18C\uB294 \uBB34\uC2DC",
|
|
11384
|
+
"- \uC6D0\uBB38\uC758 \uC77D\uAE30 \uC21C\uC11C\uC640 \uAD6C\uC870\uB97C \uC720\uC9C0",
|
|
11385
|
+
"",
|
|
11386
|
+
"\uC808\uB300 \uAE08\uC9C0 \uC0AC\uD56D:",
|
|
11387
|
+
"- \uBB38\uC7A5\xB7\uB2E8\uB77D\xB7\uD56D\uBAA9\uC744 \uCD94\uAC00\uD558\uAC70\uB098 \uC0AD\uC81C\uD558\uC9C0 \uB9D0 \uAC83",
|
|
11388
|
+
"- \uC22B\uC790, \uD37C\uC13C\uD2B8, \uB0A0\uC9DC, \uB2E8\uC704, \uAE08\uC561\uC744 \uC808\uB300 \uBCC0\uACBD\uD558\uC9C0 \uB9D0 \uAC83",
|
|
11389
|
+
"- \uACE0\uC720\uBA85\uC0AC, \uAE30\uAD00\uBA85, \uBC95\uB839\uBA85, \uC9C0\uBA85\uC744 \uBCC0\uACBD\uD558\uC9C0 \uB9D0 \uAC83",
|
|
11390
|
+
"- \uD45C\uC758 \uC81C\uBAA9\uC744 \uBCC0\uACBD \uB610\uB294 \uC0AD\uC81C\uD558\uC9C0 \uB9D0 \uAC83",
|
|
11391
|
+
"- \uD45C\uC758 \uD589\xB7\uC5F4 \uC218, \uC140 \uB0B4\uC6A9, \uD5E4\uB354\uB97C \uBCC0\uACBD\uD558\uC9C0 \uB9D0 \uAC83",
|
|
11392
|
+
"- \uC81C\uBAA9 \uC218\uC900(#, ##, ### \uB4F1)\uC744 \uC784\uC758\uB85C \uBC14\uAFB8\uC9C0 \uB9D0 \uAC83",
|
|
11393
|
+
"- \uC6D0\uBB38\uC5D0 \uC5C6\uB294 \uB0B4\uC6A9\uC744 \uC694\uC57D\xB7\uBCF4\uC644\xB7\uCD94\uB860\uD558\uC9C0 \uB9D0 \uAC83",
|
|
11394
|
+
"- ` ``` `\uB85C \uAC10\uC2F8\uAC70\uB098 \uC124\uBA85 \uD14D\uC2A4\uD2B8\uB97C \uCD94\uAC00\uD558\uC9C0 \uB9D0 \uAC83",
|
|
11395
|
+
"",
|
|
11396
|
+
"\uD5C8\uC6A9\uB418\uB294 \uAD50\uC815 \uBC94\uC704 (OCR \uC624\uC778\uC2DD \uC218\uC815):",
|
|
11397
|
+
"- \uBA85\uBC31\uD55C \uAE00\uC790 \uC624\uC778\uC2DD \uC218\uC815 (\uC608: '0' \u2192 'O', 'l' \u2192 '1' \uB4F1 \uB9E5\uB77D\uC0C1 \uBA85\uD655\uD55C \uACBD\uC6B0\uB9CC)",
|
|
11398
|
+
"- \uB2E8\uC5B4 \uC911\uAC04\uC5D0 \uC798\uBABB \uC0BD\uC785\uB41C \uACF5\uBC31 \uC81C\uAC70",
|
|
11399
|
+
"- \uC904\uBC14\uAFC8 \uC624\uB958\uB85C \uBD84\uB9AC\uB41C \uBB38\uC7A5 \uBCD1\uD569 (\uC758\uBBF8 \uB2E8\uC704 \uAE30\uC900)",
|
|
11400
|
+
"- Markdown \uBB38\uBC95 \uC624\uB958 \uC218\uC815 (\uD45C \uAD6C\uBD84\uC120 \uB204\uB77D, \uB9AC\uC2A4\uD2B8 \uB4E4\uC5EC\uC4F0\uAE30 \uB4F1)",
|
|
11401
|
+
"",
|
|
11402
|
+
"\uCD9C\uB825 \uADDC\uCE59:",
|
|
11403
|
+
"- \uBCC0\uD658\uB41C Markdown \uBCF8\uBB38\uB9CC \uCD9C\uB825\uD560 \uAC83 (\uC124\uBA85, \uC8FC\uC11D, \uBA54\uD0C0 \uD14D\uC2A4\uD2B8 \uC5C6\uC774)",
|
|
11404
|
+
"- \uD655\uC2E4\uD558\uC9C0 \uC54A\uC73C\uBA74 \uC6D0\uBB38\uC744 \uADF8\uB300\uB85C \uC720\uC9C0\uD560 \uAC83"
|
|
11383
11405
|
].join("\n");
|
|
11384
11406
|
function elapsedMs(startAt) {
|
|
11385
11407
|
return Math.round(import_node_perf_hooks.performance.now() - startAt);
|
|
@@ -11390,7 +11412,6 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11390
11412
|
const workspaceDir = (0, import_path5.resolve)(options.workspaceDir ?? (0, import_path5.join)((0, import_path5.dirname)(absInput), `${stem}_ocr_workspace`));
|
|
11391
11413
|
const imagesDir = (0, import_path5.join)(workspaceDir, "images");
|
|
11392
11414
|
const rawDir = (0, import_path5.join)(workspaceDir, "ocr", "raw");
|
|
11393
|
-
const proofDir = (0, import_path5.join)(workspaceDir, "ocr", "proofread");
|
|
11394
11415
|
const diffDir = (0, import_path5.join)(workspaceDir, "ocr", "diff");
|
|
11395
11416
|
const outputPath = (0, import_path5.resolve)(options.outputPath ?? (0, import_path5.join)((0, import_path5.dirname)(absInput), `${stem}.md`));
|
|
11396
11417
|
const reportPath = (0, import_path5.join)(workspaceDir, "run-report.json");
|
|
@@ -11410,11 +11431,10 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11410
11431
|
const logger = (options.logger ?? createLoggerFromEnv()).withRun(runId).child({ component: "pipeline/unified-ocr.ts" });
|
|
11411
11432
|
await (0, import_promises2.mkdir)(imagesDir, { recursive: true });
|
|
11412
11433
|
await (0, import_promises2.mkdir)(rawDir, { recursive: true });
|
|
11413
|
-
await (0, import_promises2.mkdir)(proofDir, { recursive: true });
|
|
11414
11434
|
await (0, import_promises2.mkdir)(diffDir, { recursive: true });
|
|
11415
11435
|
const timingsMs = {};
|
|
11416
11436
|
const markStageStart = (stage, message) => emitProgress(options.onEvent, stage, 0, stageWeights, { message, type: "stage_start" });
|
|
11417
|
-
const markStageProgress = (stage, stagePercent, current, total, message) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message });
|
|
11437
|
+
const markStageProgress = (stage, stagePercent, current, total, message, model) => emitProgress(options.onEvent, stage, stagePercent, stageWeights, { type: "stage_progress", current, total, message, model });
|
|
11418
11438
|
const markStageDone = (stage, message) => emitProgress(options.onEvent, stage, 100, stageWeights, { message, type: "stage_done" });
|
|
11419
11439
|
let currentStage = "convert";
|
|
11420
11440
|
const logStage = (level, stage, event, message, meta) => {
|
|
@@ -11519,56 +11539,17 @@ async function runUnifiedOcrPipeline(inputPath, options = {}) {
|
|
|
11519
11539
|
const pagePath = (0, import_path5.join)(rawDir, `page_${String(i + 1).padStart(4, "0")}.md`);
|
|
11520
11540
|
await (0, import_promises2.writeFile)(pagePath, markdown, "utf-8");
|
|
11521
11541
|
rawPagePaths.push(pagePath);
|
|
11522
|
-
markStageProgress("ocr", Math.round((i + 1) / images.length * 100), i + 1, images.length, `OCR ${i + 1}/${images.length}
|
|
11542
|
+
markStageProgress("ocr", Math.round((i + 1) / images.length * 100), i + 1, images.length, `OCR ${i + 1}/${images.length}`, selectedModel);
|
|
11523
11543
|
logStage("debug", "ocr", "progress", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { page: i + 1, total: images.length });
|
|
11524
11544
|
}
|
|
11525
11545
|
timingsMs.ocr = elapsedMs(ocrStart);
|
|
11526
11546
|
markStageDone("ocr", "OCR \uC644\uB8CC");
|
|
11527
11547
|
logStage("info", "ocr", "done", "\uD398\uC774\uC9C0 OCR \uC644\uB8CC", { elapsedMs: timingsMs.ocr });
|
|
11528
|
-
const proofStart = import_node_perf_hooks.performance.now();
|
|
11529
|
-
currentStage = "proofread";
|
|
11530
|
-
markStageStart("proofread", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC9C4\uD589 \uC911");
|
|
11531
|
-
logStage("info", "proofread", "start", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC2DC\uC791", { pages: rawPagePaths.length });
|
|
11532
|
-
const proofedPaths = [];
|
|
11533
|
-
for (let i = 0; i < rawPagePaths.length; i++) {
|
|
11534
|
-
const rawMd = await (0, import_promises2.readFile)(rawPagePaths[i], "utf-8");
|
|
11535
|
-
const prompt = `${PROOFREAD_PROMPT}
|
|
11536
|
-
|
|
11537
|
-
---
|
|
11538
|
-
${rawMd}
|
|
11539
|
-
---`;
|
|
11540
|
-
const corrected = await ocrImageViaNim({
|
|
11541
|
-
textOnlyPrompt: prompt,
|
|
11542
|
-
model: selectedModel,
|
|
11543
|
-
maxTokens: modelMaxTokens[selectedModel] ?? 8192,
|
|
11544
|
-
baseUrl,
|
|
11545
|
-
keyPool,
|
|
11546
|
-
timeoutMs,
|
|
11547
|
-
maxRetries: maxRetriesPerPage,
|
|
11548
|
-
logger,
|
|
11549
|
-
stage: "proofread"
|
|
11550
|
-
});
|
|
11551
|
-
const safeCorrected = preserveNumericIntegrity(rawMd, corrected);
|
|
11552
|
-
const taggedCorrected = addUncertainTag(rawMd, safeCorrected);
|
|
11553
|
-
const pagePath = (0, import_path5.join)(proofDir, `page_${String(i + 1).padStart(4, "0")}.md`);
|
|
11554
|
-
await (0, import_promises2.writeFile)(pagePath, taggedCorrected, "utf-8");
|
|
11555
|
-
await (0, import_promises2.writeFile)(
|
|
11556
|
-
(0, import_path5.join)(diffDir, `page_${String(i + 1).padStart(4, "0")}.json`),
|
|
11557
|
-
JSON.stringify(buildDiffSummary(rawMd, taggedCorrected), null, 2),
|
|
11558
|
-
"utf-8"
|
|
11559
|
-
);
|
|
11560
|
-
proofedPaths.push(pagePath);
|
|
11561
|
-
markStageProgress("proofread", Math.round((i + 1) / rawPagePaths.length * 100), i + 1, rawPagePaths.length, `\uAD50\uC815 ${i + 1}/${rawPagePaths.length}`);
|
|
11562
|
-
logStage("debug", "proofread", "progress", "\uD398\uC774\uC9C0 \uAD50\uC815 \uC644\uB8CC", { page: i + 1, total: rawPagePaths.length });
|
|
11563
|
-
}
|
|
11564
|
-
timingsMs.proofread = elapsedMs(proofStart);
|
|
11565
|
-
markStageDone("proofread", "\uAD50\uC815 \uC644\uB8CC");
|
|
11566
|
-
logStage("info", "proofread", "done", "\uBE44\uD30C\uAD34 \uAD50\uC815 \uC644\uB8CC", { elapsedMs: timingsMs.proofread });
|
|
11567
11548
|
const mergeStart = import_node_perf_hooks.performance.now();
|
|
11568
11549
|
currentStage = "merge";
|
|
11569
11550
|
markStageStart("merge", "\uCD5C\uC885 Markdown \uBCD1\uD569 \uC911");
|
|
11570
|
-
logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages:
|
|
11571
|
-
const merged = await mergeMarkdownPages(
|
|
11551
|
+
logStage("info", "merge", "start", "\uCD5C\uC885 \uBCD1\uD569 \uC2DC\uC791", { pages: rawPagePaths.length });
|
|
11552
|
+
const merged = await mergeMarkdownPages(rawPagePaths);
|
|
11572
11553
|
await (0, import_promises2.writeFile)(outputPath, merged, "utf-8");
|
|
11573
11554
|
timingsMs.merge = elapsedMs(mergeStart);
|
|
11574
11555
|
markStageDone("merge", "\uBCD1\uD569 \uC644\uB8CC");
|
|
@@ -11643,7 +11624,8 @@ function emitProgress(cb, stage, stagePercent, weights, extra) {
|
|
|
11643
11624
|
current: extra.current,
|
|
11644
11625
|
total: extra.total,
|
|
11645
11626
|
code: extra.code,
|
|
11646
|
-
message: extra.message
|
|
11627
|
+
message: extra.message,
|
|
11628
|
+
model: extra.model
|
|
11647
11629
|
});
|
|
11648
11630
|
}
|
|
11649
11631
|
async function convertWithLibreOffice(buffer, ext) {
|
|
@@ -12026,40 +12008,6 @@ function ensureSupportedInput(path) {
|
|
|
12026
12008
|
throw new UnifiedOcrError("UNSUPPORTED_INPUT", "convert", `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uC785\uB825 \uD3EC\uB9F7: ${ext}`);
|
|
12027
12009
|
}
|
|
12028
12010
|
}
|
|
12029
|
-
function extractNumericTokens(text) {
|
|
12030
|
-
return text.match(/\d[\d,./-]*/g) ?? [];
|
|
12031
|
-
}
|
|
12032
|
-
function preserveNumericIntegrity(rawText, correctedText) {
|
|
12033
|
-
const rawTokens = extractNumericTokens(rawText);
|
|
12034
|
-
const correctedTokens = extractNumericTokens(correctedText);
|
|
12035
|
-
if (rawTokens.length !== correctedTokens.length) return rawText;
|
|
12036
|
-
for (let i = 0; i < rawTokens.length; i++) {
|
|
12037
|
-
if (rawTokens[i] !== correctedTokens[i]) return rawText;
|
|
12038
|
-
}
|
|
12039
|
-
return correctedText;
|
|
12040
|
-
}
|
|
12041
|
-
function addUncertainTag(rawText, correctedText) {
|
|
12042
|
-
if (correctedText.includes("[\uD655\uC778\uD544\uC694:")) return correctedText;
|
|
12043
|
-
const rawLen = rawText.trim().length;
|
|
12044
|
-
const corrLen = correctedText.trim().length;
|
|
12045
|
-
if (rawLen === 0 || corrLen === 0) return correctedText;
|
|
12046
|
-
const rawLines = rawText.split("\n").filter(Boolean).length;
|
|
12047
|
-
const corrLines = correctedText.split("\n").filter(Boolean).length;
|
|
12048
|
-
const rawTableLines = rawText.split("\n").filter((l) => l.includes("|")).length;
|
|
12049
|
-
const corrTableLines = correctedText.split("\n").filter((l) => l.includes("|")).length;
|
|
12050
|
-
const suspicious = corrLen < rawLen * 0.75 || corrLines < Math.max(1, Math.floor(rawLines * 0.8)) || rawTableLines >= 2 && corrTableLines < Math.floor(rawTableLines * 0.7);
|
|
12051
|
-
if (!suspicious) return correctedText;
|
|
12052
|
-
return `${correctedText}
|
|
12053
|
-
|
|
12054
|
-
[\uD655\uC778\uD544\uC694: \uAD50\uC815 \uACB0\uACFC\uAC00 \uCD95\uC57D\uB418\uC5C8\uC744 \uC218 \uC788\uC5B4 \uC6D0\uBB38\uACFC \uB300\uC870\uAC00 \uD544\uC694\uD569\uB2C8\uB2E4.]`;
|
|
12055
|
-
}
|
|
12056
|
-
function buildDiffSummary(before, after) {
|
|
12057
|
-
return {
|
|
12058
|
-
changed: before !== after,
|
|
12059
|
-
beforeLength: before.length,
|
|
12060
|
-
afterLength: after.length
|
|
12061
|
-
};
|
|
12062
|
-
}
|
|
12063
12011
|
function normalizePipelineError(err, stage) {
|
|
12064
12012
|
if (err instanceof UnifiedOcrError) return err;
|
|
12065
12013
|
const message = err instanceof Error ? err.message : String(err);
|