@clazic/kordoc 2.5.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -1
- package/dist/batch-provider-XRF6F26E.js +234 -0
- package/dist/batch-provider-XRF6F26E.js.map +1 -0
- package/dist/chunk-S7BHLD2V.js +200 -0
- package/dist/{chunk-Y4WFKJ5P.js.map → chunk-S7BHLD2V.js.map} +1 -1
- package/dist/{chunk-IJGNPAK2.js → chunk-TND4YFBV.js} +2 -2
- package/dist/{chunk-QG6BYZMR.js → chunk-TS3F57LY.js} +160 -8
- package/dist/chunk-TS3F57LY.js.map +1 -0
- package/dist/cli.js +53 -6
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +420 -145
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +71 -2
- package/dist/index.d.ts +71 -2
- package/dist/index.js +407 -135
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +44 -3
- package/dist/mcp.js.map +1 -1
- package/dist/{resolve-XWYJYKKH.js → resolve-ZSUEJK3E.js} +4 -4
- package/dist/{utils-RBXHHCLI.js → utils-F66K7PXH.js} +2 -2
- package/dist/{watch-5CCMTZ7F.js → watch-2S5ULHAM.js} +4 -4
- package/package.json +1 -1
- package/dist/batch-provider-5BFJRKAZ.js +0 -190
- package/dist/batch-provider-5BFJRKAZ.js.map +0 -1
- package/dist/chunk-QG6BYZMR.js.map +0 -1
- package/dist/chunk-Y4WFKJ5P.js +0 -167
- /package/dist/{chunk-IJGNPAK2.js.map → chunk-TND4YFBV.js.map} +0 -0
- /package/dist/{resolve-XWYJYKKH.js.map → resolve-ZSUEJK3E.js.map} +0 -0
- /package/dist/{utils-RBXHHCLI.js.map → utils-F66K7PXH.js.map} +0 -0
- /package/dist/{watch-5CCMTZ7F.js.map → watch-2S5ULHAM.js.map} +0 -0
|
@@ -7,13 +7,13 @@ import {
|
|
|
7
7
|
precheckZipSize,
|
|
8
8
|
sanitizeHref,
|
|
9
9
|
toArrayBuffer
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-TND4YFBV.js";
|
|
11
11
|
import {
|
|
12
12
|
parsePageRange
|
|
13
13
|
} from "./chunk-MOL7MDBG.js";
|
|
14
14
|
import {
|
|
15
15
|
createCliOcrProvider
|
|
16
|
-
} from "./chunk-
|
|
16
|
+
} from "./chunk-S7BHLD2V.js";
|
|
17
17
|
import {
|
|
18
18
|
markdownToBlocks
|
|
19
19
|
} from "./chunk-YW5G6BCJ.js";
|
|
@@ -5685,7 +5685,7 @@ async function parsePdfDocument(buffer, options) {
|
|
|
5685
5685
|
if (ocrMode === "off") {
|
|
5686
5686
|
throw Object.assign(new KordocError(`\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF (${pageCount}\uD398\uC774\uC9C0, ${totalChars}\uC790)`), { isImageBased: true });
|
|
5687
5687
|
}
|
|
5688
|
-
const { resolveOcrProvider } = await import("./resolve-
|
|
5688
|
+
const { resolveOcrProvider } = await import("./resolve-ZSUEJK3E.js");
|
|
5689
5689
|
const { ocrPages } = await import("./provider-T2D5XRTI.js");
|
|
5690
5690
|
const tryProvider = async (provider, filter) => {
|
|
5691
5691
|
try {
|
|
@@ -6686,7 +6686,7 @@ function mergeKoreanLines(text) {
|
|
|
6686
6686
|
}
|
|
6687
6687
|
|
|
6688
6688
|
// src/index.ts
|
|
6689
|
-
import { readFile } from "fs/promises";
|
|
6689
|
+
import { readFile as readFile2 } from "fs/promises";
|
|
6690
6690
|
|
|
6691
6691
|
// src/xlsx/parser.ts
|
|
6692
6692
|
import JSZip3 from "jszip";
|
|
@@ -9821,10 +9821,161 @@ async function markdownToXlsx(markdown, options) {
|
|
|
9821
9821
|
return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
|
|
9822
9822
|
}
|
|
9823
9823
|
|
|
9824
|
-
// src/
|
|
9825
|
-
import {
|
|
9824
|
+
// src/convert/index.ts
|
|
9825
|
+
import { readFile } from "fs/promises";
|
|
9826
|
+
|
|
9827
|
+
// src/convert/libreoffice.ts
|
|
9826
9828
|
import libre from "libreoffice-convert";
|
|
9829
|
+
|
|
9830
|
+
// src/convert/error.ts
|
|
9831
|
+
var ConvertError = class extends Error {
|
|
9832
|
+
constructor(code, message) {
|
|
9833
|
+
super(message);
|
|
9834
|
+
this.code = code;
|
|
9835
|
+
this.name = "ConvertError";
|
|
9836
|
+
}
|
|
9837
|
+
};
|
|
9838
|
+
|
|
9839
|
+
// src/convert/libreoffice.ts
|
|
9827
9840
|
var libreConvert = libre.convert;
|
|
9841
|
+
async function assertSofficeAvailable() {
|
|
9842
|
+
const { runCommand } = await import("./utils-F66K7PXH.js");
|
|
9843
|
+
try {
|
|
9844
|
+
await runCommand("soffice", ["--version"]);
|
|
9845
|
+
} catch {
|
|
9846
|
+
throw new ConvertError(
|
|
9847
|
+
"SOFFICE_NOT_FOUND",
|
|
9848
|
+
"soffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4. LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694."
|
|
9849
|
+
);
|
|
9850
|
+
}
|
|
9851
|
+
}
|
|
9852
|
+
async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
|
|
9853
|
+
return new Promise((resolve2, reject) => {
|
|
9854
|
+
const timer = setTimeout(() => {
|
|
9855
|
+
reject(
|
|
9856
|
+
new ConvertError("TIMEOUT", `\uBCC0\uD658 \uD0C0\uC784\uC544\uC6C3 (${timeoutMs}ms \uCD08\uACFC)`)
|
|
9857
|
+
);
|
|
9858
|
+
}, timeoutMs);
|
|
9859
|
+
libreConvert(buffer, targetExt, void 0, (err, done) => {
|
|
9860
|
+
clearTimeout(timer);
|
|
9861
|
+
if (err || !done) {
|
|
9862
|
+
reject(
|
|
9863
|
+
new ConvertError(
|
|
9864
|
+
"CONVERT_FAILED",
|
|
9865
|
+
err?.message ?? "LibreOffice \uBCC0\uD658 \uC2E4\uD328"
|
|
9866
|
+
)
|
|
9867
|
+
);
|
|
9868
|
+
return;
|
|
9869
|
+
}
|
|
9870
|
+
resolve2(done);
|
|
9871
|
+
});
|
|
9872
|
+
});
|
|
9873
|
+
}
|
|
9874
|
+
|
|
9875
|
+
// src/convert/index.ts
|
|
9876
|
+
var isConverting = false;
|
|
9877
|
+
var queue = [];
|
|
9878
|
+
async function acquireConvertLock() {
|
|
9879
|
+
if (!isConverting) {
|
|
9880
|
+
isConverting = true;
|
|
9881
|
+
return () => {
|
|
9882
|
+
isConverting = false;
|
|
9883
|
+
const next = queue.shift();
|
|
9884
|
+
next?.();
|
|
9885
|
+
};
|
|
9886
|
+
}
|
|
9887
|
+
return new Promise((resolve2) => {
|
|
9888
|
+
queue.push(() => {
|
|
9889
|
+
isConverting = true;
|
|
9890
|
+
resolve2(() => {
|
|
9891
|
+
isConverting = false;
|
|
9892
|
+
const next = queue.shift();
|
|
9893
|
+
next?.();
|
|
9894
|
+
});
|
|
9895
|
+
});
|
|
9896
|
+
});
|
|
9897
|
+
}
|
|
9898
|
+
async function convertToPdf(input, options) {
|
|
9899
|
+
let buffer;
|
|
9900
|
+
try {
|
|
9901
|
+
if (typeof input === "string") {
|
|
9902
|
+
buffer = await readFile(input);
|
|
9903
|
+
} else if (Buffer.isBuffer(input)) {
|
|
9904
|
+
buffer = input;
|
|
9905
|
+
} else {
|
|
9906
|
+
buffer = Buffer.from(input);
|
|
9907
|
+
}
|
|
9908
|
+
} catch (err) {
|
|
9909
|
+
return {
|
|
9910
|
+
success: false,
|
|
9911
|
+
code: "PARSE_ERROR",
|
|
9912
|
+
error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
|
|
9913
|
+
stage: "detect"
|
|
9914
|
+
};
|
|
9915
|
+
}
|
|
9916
|
+
const MAX_FILE_SIZE = 500 * 1024 * 1024;
|
|
9917
|
+
if (buffer.length > MAX_FILE_SIZE) {
|
|
9918
|
+
return {
|
|
9919
|
+
success: false,
|
|
9920
|
+
code: "FILE_TOO_LARGE",
|
|
9921
|
+
error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
|
|
9922
|
+
stage: "detect"
|
|
9923
|
+
};
|
|
9924
|
+
}
|
|
9925
|
+
const format = detectFormat(toArrayBuffer(buffer));
|
|
9926
|
+
if (format !== "hwp" && format !== "hwpx") {
|
|
9927
|
+
return {
|
|
9928
|
+
success: false,
|
|
9929
|
+
code: "UNSUPPORTED_FORMAT",
|
|
9930
|
+
error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
|
|
9931
|
+
stage: "detect"
|
|
9932
|
+
};
|
|
9933
|
+
}
|
|
9934
|
+
try {
|
|
9935
|
+
await assertSofficeAvailable();
|
|
9936
|
+
} catch (err) {
|
|
9937
|
+
if (err instanceof ConvertError) {
|
|
9938
|
+
return {
|
|
9939
|
+
success: false,
|
|
9940
|
+
code: err.code,
|
|
9941
|
+
error: err.message,
|
|
9942
|
+
stage: "validate"
|
|
9943
|
+
};
|
|
9944
|
+
}
|
|
9945
|
+
throw err;
|
|
9946
|
+
}
|
|
9947
|
+
const releaseLock = await acquireConvertLock();
|
|
9948
|
+
try {
|
|
9949
|
+
options?.onProgress?.(10, "convert");
|
|
9950
|
+
const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
|
|
9951
|
+
options?.onProgress?.(100, "done");
|
|
9952
|
+
return {
|
|
9953
|
+
success: true,
|
|
9954
|
+
pdf: new Uint8Array(pdf),
|
|
9955
|
+
sourceFormat: format
|
|
9956
|
+
};
|
|
9957
|
+
} catch (err) {
|
|
9958
|
+
if (err instanceof ConvertError) {
|
|
9959
|
+
return {
|
|
9960
|
+
success: false,
|
|
9961
|
+
code: err.code,
|
|
9962
|
+
error: err.message,
|
|
9963
|
+
stage: "convert"
|
|
9964
|
+
};
|
|
9965
|
+
}
|
|
9966
|
+
return {
|
|
9967
|
+
success: false,
|
|
9968
|
+
code: classifyError(err),
|
|
9969
|
+
error: err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328",
|
|
9970
|
+
stage: "convert"
|
|
9971
|
+
};
|
|
9972
|
+
} finally {
|
|
9973
|
+
releaseLock();
|
|
9974
|
+
}
|
|
9975
|
+
}
|
|
9976
|
+
|
|
9977
|
+
// src/pipeline/unified-ocr.ts
|
|
9978
|
+
import { performance } from "perf_hooks";
|
|
9828
9979
|
var OCR_PROMPT = [
|
|
9829
9980
|
"\uC774 PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0\uC5D0\uC11C \uD14D\uC2A4\uD2B8\uC640 \uD45C\uB97C \uCD94\uCD9C\uD558\uC5EC Markdown\uC73C\uB85C \uBCC0\uD658\uD558\uACE0, OCR \uC624\uC778\uC2DD \uC624\uB958\uB97C \uC989\uC2DC \uAD50\uC815\uD558\uC5EC \uCD5C\uC885 \uACB0\uACFC\uBB3C\uC744 \uCD9C\uB825\uD558\uC138\uC694.",
|
|
9830
9981
|
"",
|
|
@@ -9864,7 +10015,7 @@ async function parse2(input, options) {
|
|
|
9864
10015
|
let buffer;
|
|
9865
10016
|
if (typeof input === "string") {
|
|
9866
10017
|
try {
|
|
9867
|
-
const buf = await
|
|
10018
|
+
const buf = await readFile2(input);
|
|
9868
10019
|
buffer = toArrayBuffer(buf);
|
|
9869
10020
|
} catch (err) {
|
|
9870
10021
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
@@ -10199,6 +10350,7 @@ export {
|
|
|
10199
10350
|
extractFormFields,
|
|
10200
10351
|
markdownToHwpx,
|
|
10201
10352
|
markdownToXlsx,
|
|
10353
|
+
convertToPdf,
|
|
10202
10354
|
parse2 as parse
|
|
10203
10355
|
};
|
|
10204
10356
|
/*! Bundled license information:
|
|
@@ -10206,4 +10358,4 @@ export {
|
|
|
10206
10358
|
cfb/cfb.js:
|
|
10207
10359
|
(*! crc32.js (C) 2014-present SheetJS -- http://sheetjs.com *)
|
|
10208
10360
|
*/
|
|
10209
|
-
//# sourceMappingURL=chunk-
|
|
10361
|
+
//# sourceMappingURL=chunk-TS3F57LY.js.map
|