@clazic/kordoc 2.5.1 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,13 +7,13 @@ import {
7
7
  precheckZipSize,
8
8
  sanitizeHref,
9
9
  toArrayBuffer
10
- } from "./chunk-IJGNPAK2.js";
10
+ } from "./chunk-TND4YFBV.js";
11
11
  import {
12
12
  parsePageRange
13
13
  } from "./chunk-MOL7MDBG.js";
14
14
  import {
15
15
  createCliOcrProvider
16
- } from "./chunk-Y4WFKJ5P.js";
16
+ } from "./chunk-S7BHLD2V.js";
17
17
  import {
18
18
  markdownToBlocks
19
19
  } from "./chunk-YW5G6BCJ.js";
@@ -5685,7 +5685,7 @@ async function parsePdfDocument(buffer, options) {
5685
5685
  if (ocrMode === "off") {
5686
5686
  throw Object.assign(new KordocError(`\uC774\uBBF8\uC9C0 \uAE30\uBC18 PDF (${pageCount}\uD398\uC774\uC9C0, ${totalChars}\uC790)`), { isImageBased: true });
5687
5687
  }
5688
- const { resolveOcrProvider } = await import("./resolve-XWYJYKKH.js");
5688
+ const { resolveOcrProvider } = await import("./resolve-ZSUEJK3E.js");
5689
5689
  const { ocrPages } = await import("./provider-T2D5XRTI.js");
5690
5690
  const tryProvider = async (provider, filter) => {
5691
5691
  try {
@@ -6686,7 +6686,7 @@ function mergeKoreanLines(text) {
6686
6686
  }
6687
6687
 
6688
6688
  // src/index.ts
6689
- import { readFile } from "fs/promises";
6689
+ import { readFile as readFile2 } from "fs/promises";
6690
6690
 
6691
6691
  // src/xlsx/parser.ts
6692
6692
  import JSZip3 from "jszip";
@@ -9821,10 +9821,161 @@ async function markdownToXlsx(markdown, options) {
9821
9821
  return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
9822
9822
  }
9823
9823
 
9824
- // src/pipeline/unified-ocr.ts
9825
- import { performance } from "perf_hooks";
9824
+ // src/convert/index.ts
9825
+ import { readFile } from "fs/promises";
9826
+
9827
+ // src/convert/libreoffice.ts
9826
9828
  import libre from "libreoffice-convert";
9829
+
9830
+ // src/convert/error.ts
9831
+ var ConvertError = class extends Error {
9832
+ constructor(code, message) {
9833
+ super(message);
9834
+ this.code = code;
9835
+ this.name = "ConvertError";
9836
+ }
9837
+ };
9838
+
9839
+ // src/convert/libreoffice.ts
9827
9840
  var libreConvert = libre.convert;
9841
+ async function assertSofficeAvailable() {
9842
+ const { runCommand } = await import("./utils-F66K7PXH.js");
9843
+ try {
9844
+ await runCommand("soffice", ["--version"]);
9845
+ } catch {
9846
+ throw new ConvertError(
9847
+ "SOFFICE_NOT_FOUND",
9848
+ "soffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4. LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694."
9849
+ );
9850
+ }
9851
+ }
9852
+ async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
9853
+ return new Promise((resolve2, reject) => {
9854
+ const timer = setTimeout(() => {
9855
+ reject(
9856
+ new ConvertError("TIMEOUT", `\uBCC0\uD658 \uD0C0\uC784\uC544\uC6C3 (${timeoutMs}ms \uCD08\uACFC)`)
9857
+ );
9858
+ }, timeoutMs);
9859
+ libreConvert(buffer, targetExt, void 0, (err, done) => {
9860
+ clearTimeout(timer);
9861
+ if (err || !done) {
9862
+ reject(
9863
+ new ConvertError(
9864
+ "CONVERT_FAILED",
9865
+ err?.message ?? "LibreOffice \uBCC0\uD658 \uC2E4\uD328"
9866
+ )
9867
+ );
9868
+ return;
9869
+ }
9870
+ resolve2(done);
9871
+ });
9872
+ });
9873
+ }
9874
+
9875
+ // src/convert/index.ts
9876
+ var isConverting = false;
9877
+ var queue = [];
9878
+ async function acquireConvertLock() {
9879
+ if (!isConverting) {
9880
+ isConverting = true;
9881
+ return () => {
9882
+ isConverting = false;
9883
+ const next = queue.shift();
9884
+ next?.();
9885
+ };
9886
+ }
9887
+ return new Promise((resolve2) => {
9888
+ queue.push(() => {
9889
+ isConverting = true;
9890
+ resolve2(() => {
9891
+ isConverting = false;
9892
+ const next = queue.shift();
9893
+ next?.();
9894
+ });
9895
+ });
9896
+ });
9897
+ }
9898
+ async function convertToPdf(input, options) {
9899
+ let buffer;
9900
+ try {
9901
+ if (typeof input === "string") {
9902
+ buffer = await readFile(input);
9903
+ } else if (Buffer.isBuffer(input)) {
9904
+ buffer = input;
9905
+ } else {
9906
+ buffer = Buffer.from(input);
9907
+ }
9908
+ } catch (err) {
9909
+ return {
9910
+ success: false,
9911
+ code: "PARSE_ERROR",
9912
+ error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
9913
+ stage: "detect"
9914
+ };
9915
+ }
9916
+ const MAX_FILE_SIZE = 500 * 1024 * 1024;
9917
+ if (buffer.length > MAX_FILE_SIZE) {
9918
+ return {
9919
+ success: false,
9920
+ code: "FILE_TOO_LARGE",
9921
+ error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
9922
+ stage: "detect"
9923
+ };
9924
+ }
9925
+ const format = detectFormat(toArrayBuffer(buffer));
9926
+ if (format !== "hwp" && format !== "hwpx") {
9927
+ return {
9928
+ success: false,
9929
+ code: "UNSUPPORTED_FORMAT",
9930
+ error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
9931
+ stage: "detect"
9932
+ };
9933
+ }
9934
+ try {
9935
+ await assertSofficeAvailable();
9936
+ } catch (err) {
9937
+ if (err instanceof ConvertError) {
9938
+ return {
9939
+ success: false,
9940
+ code: err.code,
9941
+ error: err.message,
9942
+ stage: "validate"
9943
+ };
9944
+ }
9945
+ throw err;
9946
+ }
9947
+ const releaseLock = await acquireConvertLock();
9948
+ try {
9949
+ options?.onProgress?.(10, "convert");
9950
+ const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
9951
+ options?.onProgress?.(100, "done");
9952
+ return {
9953
+ success: true,
9954
+ pdf: new Uint8Array(pdf),
9955
+ sourceFormat: format
9956
+ };
9957
+ } catch (err) {
9958
+ if (err instanceof ConvertError) {
9959
+ return {
9960
+ success: false,
9961
+ code: err.code,
9962
+ error: err.message,
9963
+ stage: "convert"
9964
+ };
9965
+ }
9966
+ return {
9967
+ success: false,
9968
+ code: classifyError(err),
9969
+ error: err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328",
9970
+ stage: "convert"
9971
+ };
9972
+ } finally {
9973
+ releaseLock();
9974
+ }
9975
+ }
9976
+
9977
+ // src/pipeline/unified-ocr.ts
9978
+ import { performance } from "perf_hooks";
9828
9979
  var OCR_PROMPT = [
9829
9980
  "\uC774 PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0\uC5D0\uC11C \uD14D\uC2A4\uD2B8\uC640 \uD45C\uB97C \uCD94\uCD9C\uD558\uC5EC Markdown\uC73C\uB85C \uBCC0\uD658\uD558\uACE0, OCR \uC624\uC778\uC2DD \uC624\uB958\uB97C \uC989\uC2DC \uAD50\uC815\uD558\uC5EC \uCD5C\uC885 \uACB0\uACFC\uBB3C\uC744 \uCD9C\uB825\uD558\uC138\uC694.",
9830
9981
  "",
@@ -9864,7 +10015,7 @@ async function parse2(input, options) {
9864
10015
  let buffer;
9865
10016
  if (typeof input === "string") {
9866
10017
  try {
9867
- const buf = await readFile(input);
10018
+ const buf = await readFile2(input);
9868
10019
  buffer = toArrayBuffer(buf);
9869
10020
  } catch (err) {
9870
10021
  const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
@@ -10199,6 +10350,7 @@ export {
10199
10350
  extractFormFields,
10200
10351
  markdownToHwpx,
10201
10352
  markdownToXlsx,
10353
+ convertToPdf,
10202
10354
  parse2 as parse
10203
10355
  };
10204
10356
  /*! Bundled license information:
@@ -10206,4 +10358,4 @@ export {
10206
10358
  cfb/cfb.js:
10207
10359
  (*! crc32.js (C) 2014-present SheetJS -- http://sheetjs.com *)
10208
10360
  */
10209
- //# sourceMappingURL=chunk-QG6BYZMR.js.map
10361
+ //# sourceMappingURL=chunk-TS3F57LY.js.map